Skip to content

Commit baa9c99

Browse files
authoredJul 1, 2022
Merge pull request #89 from ONSdigital/spp_5971_death_count_validation
Spp 5971 death count validation
2 parents 08eea28 + c182024 commit baa9c99

File tree

4 files changed

+68
-1
lines changed

4 files changed

+68
-1
lines changed
 

‎pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "statistical_methods_library"
3-
version = "4.4.0"
3+
version = "4.5.0"
44
description = ""
55
authors = ["Your Name <you@example.com>"]
66
license = "MIT"

‎statistical_methods_library/estimation.py

+16
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,22 @@ def estimate(
215215
):
216216
raise ValidationError("The h value must be the same per period and stratum.")
217217

218+
# death(death_marker=1) count must be less than sample(sample_marker=1)
219+
if (
220+
death_marker_col is not None
221+
and (
222+
input_df.groupBy([period_col, strata_col])
223+
.agg(
224+
sum(col(death_marker_col)).alias("sum_death_marker"),
225+
sum(col(sample_marker_col)).alias("sum_sample_marker"),
226+
)
227+
.filter(col("sum_death_marker") > col("sum_sample_marker"))
228+
.count()
229+
)
230+
> 0
231+
):
232+
raise ValidationError("The death count must be less than sample count.")
233+
218234
# --- prepare our working data frame ---
219235
col_list = [
220236
col(period_col).alias("period"),
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
reference,period,strata,sample_inclusion_marker,death_marker,H,auxiliary,calibration_group
2+
1,202009,10,1,0,True,10,10
3+
2,202009,10,1,0,True,10,10
4+
3,202009,10,1,1,True,10,10
5+
4,202009,10,0,1,True,10,10
6+
5,202009,10,0,1,True,10,10
7+
6,202009,10,0,1,True,10,10
8+
7,202009,11,1,0,True,15,11
9+
8,202009,11,1,0,True,15,11
10+
9,202009,11,1,1,True,15,11
11+
10,202009,11,0,0,True,15,11
12+
11,202009,11,0,0,True,15,11
13+
12,202009,11,0,1,True,15,11
14+
13,202009,12,1,0,False,20,12
15+
14,202009,12,1,0,False,20,12
16+
15,202009,12,1,1,False,20,12
17+
16,202009,12,0,0,False,20,12
18+
17,202009,12,0,0,False,20,12
19+
18,202009,12,0,1,False,20,12
20+
1,202010,10,0,0,True,10,10
21+
2,202010,10,0,0,True,10,10
22+
3,202010,10,0,1,True,10,10
23+
4,202010,10,0,0,True,10,10
24+
5,202010,10,0,0,True,10,10
25+
6,202010,10,0,1,True,10,10
26+
7,202010,11,1,1,True,15,11
27+
8,202010,11,1,1,True,15,11
28+
9,202010,11,1,1,True,15,11
29+
10,202010,11,0,1,True,15,11
30+
11,202010,11,0,1,True,15,11
31+
12,202010,11,0,1,True,15,11
32+
13,202010,12,1,0,False,20,12
33+
14,202010,12,1,0,False,20,12
34+
15,202010,12,1,1,False,20,12
35+
16,202010,12,0,0,False,20,12
36+
17,202010,12,0,0,False,20,12
37+
18,202010,12,0,1,False,20,12

‎tests/test_estimation.py

+14
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,20 @@ def test_dataframe_non_boolean_markers(fxt_load_test_csv):
174174
estimation.estimate(test_dataframe, *params)
175175

176176

177+
@pytest.mark.dependency()
178+
def test_dataframe_large_death_count(fxt_load_test_csv):
179+
test_dataframe = fxt_load_test_csv(
180+
dataframe_columns,
181+
dataframe_types,
182+
"estimation",
183+
"unit",
184+
"large_death_count",
185+
)
186+
with pytest.raises(estimation.ValidationError):
187+
estimation_params = [*params, death_col, h_col]
188+
estimation.estimate(test_dataframe, *estimation_params)
189+
190+
177191
# --- Test validation fail if mixed h values in a strata ---
178192
@pytest.mark.dependency()
179193
def test_dataframe_mixed_h_values_in_strata(fxt_load_test_csv):

0 commit comments

Comments
 (0)
Please sign in to comment.