Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spp 5901 rename methods #90

Merged
merged 5 commits into from
Jul 6, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "statistical_methods_library"
version = "4.5.0"
version = "5.0.0"
description = ""
authors = ["Your Name <[email protected]>"]
license = "MIT"
6 changes: 3 additions & 3 deletions statistical_methods_library/estimation.py
Original file line number Diff line number Diff line change
@@ -16,7 +16,7 @@ class ValidationError(Exception):
pass


def estimate(
def ht_ratio(
input_df: DataFrame,
unique_identifier_col: str,
period_col: str,
@@ -33,8 +33,8 @@ def estimate(
calibration_weight_col: typing.Optional[str] = "calibration_weight",
) -> DataFrame:
"""
Perform estimation of design and calibration weights using Expansion and
Ratio estimation.
Perform Horvitz-Thompson estimation of design and calibration weights
using Expansion and Ratio estimation.

###Arguments
* `input_df`: The input data frame.
2 changes: 1 addition & 1 deletion statistical_methods_library/imputation.py
Original file line number Diff line number Diff line change
@@ -56,7 +56,7 @@ class DataIntegrityError(ImputationError):
pass


def impute(
def ratio_of_means(
input_df: DataFrame,
reference_col: str,
period_col: str,
Original file line number Diff line number Diff line change
@@ -31,7 +31,7 @@ class Marker(Enum):
"""The value has not been winsorised because design * calibration is <= 1."""


def one_sided_winsorise(
def winsorise(
input_df: DataFrame,
reference_col: str,
period_col: str,
@@ -43,7 +43,7 @@ def one_sided_winsorise(
calibration_col: typing.Optional[str] = None,
auxiliary_col: typing.Optional[str] = None,
marker_col: typing.Optional[str] = "winsorisation_marker",
):
) -> DataFrame:
"""
Perform One-sided Winsorisation.

3 changes: 2 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
@@ -11,13 +11,14 @@ def fxt_load_test_csv(fxt_spark_session):
# be called as a function in our tests and thus this fixture would have no
# access to its return value. We do this so that our loader can be
# passed a file name.
def load(columns, types, test_module, test_category, test_data):
def load(columns, types, test_module, test_function, test_category, test_data):
test_dataframe = fxt_spark_session.read.csv(
str(
pathlib.Path(
"tests",
"fixture_data",
test_module,
test_function,
test_category,
f"{test_data}.csv",
)
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
reference,period,strata,target,auxiliary,
1111000,203902,9000,6000,2,
1111001,203902,8000,7000,2,
1111002,203902,9000,,2,
1111003,203902,8000,,2,
1111004,203902,9000,,2,
1111007,203902,8000,,2,
1111000,203903,9000,,2,
1111001,203903,8000,,2,
1111002,203903,9000,1500.2,2,
1111003,203903,8000,2000,2,
1111004,203903,9000,,2,
1111005,203903,8000,1220,2,
1111006,203903,9000,4500,2,
1111007,203903,8000,,2,
1111000,203904,9000,,2,
1111001,203904,8000,,2,
1111002,203904,9000,,2,
1111003,203904,8000,1000,2,
1111004,203904,9000,2300,2,
1111005,203904,8000,,2,
1111006,203904,9000,7000,2,
reference,period,strata,target,auxiliary,
1111000,203902,9000,6000,2,
1111001,203902,8000,7000,2,
1111002,203902,9000,,2,
1111003,203902,8000,,2,
1111004,203902,9000,,2,
1111007,203902,8000,,2,
1111000,203903,9000,,2,
1111001,203903,8000,,2,
1111002,203903,9000,1500.2,2,
1111003,203903,8000,2000,2,
1111004,203903,9000,,2,
1111005,203903,8000,1220,2,
1111006,203903,9000,4500,2,
1111007,203903,8000,,2,
1111000,203904,9000,,2,
1111001,203904,8000,,2,
1111002,203904,9000,,2,
1111003,203904,8000,1000,2,
1111004,203904,9000,2300,2,
1111005,203904,8000,,2,
1111006,203904,9000,7000,2,
1111007,203904,8000,,2,
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
reference,period,strata,target,auxiliary
101,204001,9000,,2
101,204002,9000,10000,2
102,204002,7000,,2
101,204003,9000,,2
102,204003,7000,20000,2
103,204003,9000,,2
101,204004,9000,,2
102,204004,7000,,2
103,204004,9000,,2
reference,period,strata,target,auxiliary
101,204001,9000,,2
101,204002,9000,10000,2
102,204002,7000,,2
101,204003,9000,,2
102,204003,7000,20000,2
103,204003,9000,,2
101,204004,9000,,2
102,204004,7000,,2
103,204004,9000,,2
104,204004,7000,9000,2
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
reference,period,strata,target,auxiliary
1,202001,1,10,1
1,202002,1,20,1
1,202003,1,,1
1,202004,1,,1
2,202001,1,10,1
2,202002,1,20,1
2,202003,1,0,1
2,202004,1,30,1
3,202001,1,10,1
3,202002,1,,1
3,202003,1,20,1
3,202004,1,,1
4,202001,1,10,1
4,202002,1,,1
4,202003,1,,1
4,202004,1,,1
reference,period,strata,target,auxiliary
1,202001,1,10,1
1,202002,1,20,1
1,202003,1,,1
1,202004,1,,1
2,202001,1,10,1
2,202002,1,20,1
2,202003,1,0,1
2,202004,1,30,1
3,202001,1,10,1
3,202002,1,,1
3,202003,1,20,1
3,202004,1,,1
4,202001,1,10,1
4,202002,1,,1
4,202003,1,,1
4,202004,1,,1
Original file line number Diff line number Diff line change
@@ -1,28 +1,28 @@
reference,period,strata,target,auxiliary
130001,202001,100,6583,20
130001,202002,100,3759,20
130001,202003,100,8256,20
130001,202004,100,6743,20
130001,202005,100,6811,20
130001,202006,100,9896,20
130001,202007,100,3282,20
130002,202001,100,687,69
130002,202002,100,8987,69
130002,202003,100,371,69
130002,202004,100,3385,69
130002,202005,100,6606,69
130002,202006,100,5901,69
130002,202007,100,9834,69
130003,202001,100,7113,39
130003,202002,100,8583,39
130003,202003,100,5129,39
130003,202004,100,2024,39
130003,202005,100,9422,39
130003,202006,100,8710,39
130003,202007,100,6461,39
130004,202001,100,2571,37
130004,202002,100,,37
130004,202003,100,,37
130004,202005,100,,37
130004,202006,100,,37
130004,202007,100,6621,37
reference,period,strata,target,auxiliary
130001,202001,100,6583,20
130001,202002,100,3759,20
130001,202003,100,8256,20
130001,202004,100,6743,20
130001,202005,100,6811,20
130001,202006,100,9896,20
130001,202007,100,3282,20
130002,202001,100,687,69
130002,202002,100,8987,69
130002,202003,100,371,69
130002,202004,100,3385,69
130002,202005,100,6606,69
130002,202006,100,5901,69
130002,202007,100,9834,69
130003,202001,100,7113,39
130003,202002,100,8583,39
130003,202003,100,5129,39
130003,202004,100,2024,39
130003,202005,100,9422,39
130003,202006,100,8710,39
130003,202007,100,6461,39
130004,202001,100,2571,37
130004,202002,100,,37
130004,202003,100,,37
130004,202005,100,,37
130004,202006,100,,37
130004,202007,100,6621,37
Original file line number Diff line number Diff line change
@@ -1,28 +1,28 @@
reference,period,strata,target,auxiliary
140001,202001,100,3582,27
140001,202002,100,4533,27
140001,202003,100,4471,27
140001,202004,100,3040,27
140001,202005,100,8519,27
140001,202006,100,8690,27
140001,202007,100,9979,27
140002,202001,100,5359,83
140002,202002,100,7091,83
140002,202003,100,7098,83
140002,202004,100,7408,83
140002,202005,100,1937,83
140002,202006,100,6711,83
140002,202007,100,1052,83
140003,202001,100,439,89
140003,202002,100,7783,89
140003,202003,100,897,89
140003,202004,100,8477,89
140003,202005,100,2207,89
140003,202006,100,9889,89
140003,202007,100,2528,89
140004,202001,100,,28
140004,202002,100,,28
140004,202003,100,,28
140004,202005,100,,28
140004,202006,100,,28
140004,202007,100,7243,28
reference,period,strata,target,auxiliary
140001,202001,100,3582,27
140001,202002,100,4533,27
140001,202003,100,4471,27
140001,202004,100,3040,27
140001,202005,100,8519,27
140001,202006,100,8690,27
140001,202007,100,9979,27
140002,202001,100,5359,83
140002,202002,100,7091,83
140002,202003,100,7098,83
140002,202004,100,7408,83
140002,202005,100,1937,83
140002,202006,100,6711,83
140002,202007,100,1052,83
140003,202001,100,439,89
140003,202002,100,7783,89
140003,202003,100,897,89
140003,202004,100,8477,89
140003,202005,100,2207,89
140003,202006,100,9889,89
140003,202007,100,2528,89
140004,202001,100,,28
140004,202002,100,,28
140004,202003,100,,28
140004,202005,100,,28
140004,202006,100,,28
140004,202007,100,7243,28
Original file line number Diff line number Diff line change
@@ -1,28 +1,28 @@
reference,period,strata,target,auxiliary
150001,202001,100,5891,15
150001,202002,100,4581,15
150001,202003,100,4741,15
150001,202004,100,8092,15
150001,202005,100,2727,15
150001,202006,100,2363,15
150001,202007,100,9549,15
150002,202001,100,8611,5
150002,202002,100,9383,5
150002,202003,100,1064,5
150002,202004,100,7419,5
150002,202005,100,9967,5
150002,202006,100,1393,5
150002,202007,100,2593,5
150003,202001,100,3426,79
150003,202002,100,6246,79
150003,202003,100,8402,79
150003,202004,100,3891,79
150003,202005,100,5463,79
150003,202006,100,6852,79
150003,202007,100,2959,79
150004,202001,100,,69
150004,202002,100,,69
150004,202003,100,4208,69
150004,202005,100,2818,69
150004,202006,100,,69
150004,202007,100,,69
reference,period,strata,target,auxiliary
150001,202001,100,5891,15
150001,202002,100,4581,15
150001,202003,100,4741,15
150001,202004,100,8092,15
150001,202005,100,2727,15
150001,202006,100,2363,15
150001,202007,100,9549,15
150002,202001,100,8611,5
150002,202002,100,9383,5
150002,202003,100,1064,5
150002,202004,100,7419,5
150002,202005,100,9967,5
150002,202006,100,1393,5
150002,202007,100,2593,5
150003,202001,100,3426,79
150003,202002,100,6246,79
150003,202003,100,8402,79
150003,202004,100,3891,79
150003,202005,100,5463,79
150003,202006,100,6852,79
150003,202007,100,2959,79
150004,202001,100,,69
150004,202002,100,,69
150004,202003,100,4208,69
150004,202005,100,2818,69
150004,202006,100,,69
150004,202007,100,,69
Loading