Skip to content

Commit 3228508

Browse files
authored
Reduce test flakiness (#575)
1 parent 415cb80 commit 3228508

9 files changed

+261
-201
lines changed

azure-pipelines.yml

+35-12
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,8 @@ jobs:
9898
# Work around https://github.com/pypa/pip/issues/9542
9999
- script: 'pip install -U numpy~=1.21.0'
100100
displayName: 'Upgrade numpy'
101-
102-
- script: 'pip install pytest pytest-runner jupyter jupyter-client nbconvert nbformat seaborn xgboost tqdm && python setup.py pytest'
101+
102+
- script: 'pip install pytest pytest-runner jupyter jupyter-client nbconvert nbformat seaborn xgboost tqdm && pip list && python setup.py pytest'
103103
displayName: 'Unit tests'
104104
env:
105105
PYTEST_ADDOPTS: '-m "notebook"'
@@ -126,12 +126,6 @@ jobs:
126126
# Work around https://github.com/pypa/pip/issues/9542
127127
- script: 'pip install -U numpy~=1.21.0'
128128
displayName: 'Upgrade numpy'
129-
130-
# shap 0.39 and sklearn 1.0 interact badly in these notebooks
131-
# shap 0.40 has a bug in waterfall (https://github.com/slundberg/shap/issues/2283) that breaks our main tests
132-
# but fixes the interaction here...
133-
- script: 'pip install -U shap~=0.40.0'
134-
displayName: 'Upgrade shap'
135129

136130
- script: 'pip install pytest pytest-runner jupyter jupyter-client nbconvert nbformat seaborn xgboost tqdm && python setup.py pytest'
137131
displayName: 'Unit tests'
@@ -207,7 +201,7 @@ jobs:
207201
- script: 'pip install pytest pytest-runner && python setup.py pytest'
208202
displayName: 'Unit tests'
209203
env:
210-
PYTEST_ADDOPTS: '-m "not (notebook or automl or dml or causal)" -n 2'
204+
PYTEST_ADDOPTS: '-m "not (notebook or automl or dml or serial or cate_api)" -n 2'
211205
COVERAGE_PROCESS_START: 'setup.cfg'
212206
- task: PublishTestResults@2
213207
displayName: 'Publish Test Results **/test-results.xml'
@@ -253,15 +247,44 @@ jobs:
253247
parameters:
254248
package: '-e .[tf,plt]'
255249
job:
256-
job: Tests_causal
250+
job: Tests_serial
257251
dependsOn: 'EvalChanges'
258252
condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True')
259-
displayName: 'Run tests (Causal)'
253+
displayName: 'Run tests (Serial)'
260254
steps:
261255
- script: 'pip install pytest pytest-runner && python setup.py pytest'
262256
displayName: 'Unit tests'
263257
env:
264-
PYTEST_ADDOPTS: '-m "causal" -n 1'
258+
PYTEST_ADDOPTS: '-m "serial" -n 1'
259+
COVERAGE_PROCESS_START: 'setup.cfg'
260+
- task: PublishTestResults@2
261+
displayName: 'Publish Test Results **/test-results.xml'
262+
inputs:
263+
testResultsFiles: '**/test-results.xml'
264+
testRunTitle: 'Python $(python.version), image $(imageName)'
265+
condition: succeededOrFailed()
266+
267+
- task: PublishCodeCoverageResults@1
268+
displayName: 'Publish Code Coverage Results'
269+
inputs:
270+
codeCoverageTool: Cobertura
271+
summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml'
272+
273+
- template: azure-pipelines-steps.yml
274+
parameters:
275+
package: '-e .[tf,plt]'
276+
job:
277+
job: Tests_CATE_API
278+
dependsOn: 'EvalChanges'
279+
condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True')
280+
displayName: 'Run tests (Other)'
281+
steps:
282+
- script: 'pip install pytest pytest-runner'
283+
displayName: 'Install pytest'
284+
- script: 'python setup.py pytest'
285+
displayName: 'CATE Unit tests'
286+
env:
287+
PYTEST_ADDOPTS: '-m "cate_api" -n auto'
265288
COVERAGE_PROCESS_START: 'setup.cfg'
266289
- task: PublishTestResults@2
267290
displayName: 'Publish Test Results **/test-results.xml'

econml/tests/test_causal_analysis.py

+22-15
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,14 @@
22
# Licensed under the MIT License.
33

44
import unittest
5+
6+
from contextlib import ExitStack
7+
import itertools
58
import numpy as np
69
from numpy.core.fromnumeric import squeeze
710
import pandas as pd
8-
from contextlib import ExitStack
911
import pytest
12+
1013
from econml.solutions.causal_analysis import CausalAnalysis
1114
from econml.solutions.causal_analysis._causal_analysis import _CausalInsightsConstants
1215

@@ -15,7 +18,7 @@ def assert_less_close(arr1, arr2):
1518
assert np.all(np.logical_or(arr1 <= arr2, np.isclose(arr1, arr2)))
1619

1720

18-
@pytest.mark.causal
21+
@pytest.mark.serial
1922
class TestCausalAnalysis(unittest.TestCase):
2023

2124
def test_basic_array(self):
@@ -670,21 +673,24 @@ def test_random_state(self):
670673
inds = [0, 1, 2, 3]
671674
cats = [2, 3]
672675
hinds = [0, 3]
673-
for n_model in ['linear', 'automl']:
674-
for h_model in ['linear', 'forest']:
675-
for classification in [True, False]:
676-
ca = CausalAnalysis(inds, cats, hinds, classification=classification,
677-
nuisance_models=n_model, heterogeneity_model=h_model, random_state=123)
678-
ca.fit(X, y)
679-
glo = ca.global_causal_effect()
680676

681-
ca2 = CausalAnalysis(inds, cats, hinds, classification=classification,
682-
nuisance_models=n_model, heterogeneity_model=h_model, random_state=123)
683-
ca2.fit(X, y)
684-
glo2 = ca.global_causal_effect()
677+
for n_model, h_model, classification in\
678+
itertools.product(['linear', 'automl'],
679+
['linear', 'forest'],
680+
[True, False]):
685681

686-
np.testing.assert_equal(glo.point.values, glo2.point.values)
687-
np.testing.assert_equal(glo.stderr.values, glo2.stderr.values)
682+
ca = CausalAnalysis(inds, cats, hinds, classification=classification,
683+
nuisance_models=n_model, heterogeneity_model=h_model, random_state=123)
684+
ca.fit(X, y)
685+
glo = ca.global_causal_effect()
686+
687+
ca2 = CausalAnalysis(inds, cats, hinds, classification=classification,
688+
nuisance_models=n_model, heterogeneity_model=h_model, random_state=123)
689+
ca2.fit(X, y)
690+
glo2 = ca.global_causal_effect()
691+
692+
np.testing.assert_equal(glo.point.values, glo2.point.values)
693+
np.testing.assert_equal(glo.stderr.values, glo2.stderr.values)
688694

689695
def test_can_set_categories(self):
690696
y = pd.Series(np.random.choice([0, 1], size=(500,)))
@@ -784,6 +790,7 @@ def test_invalid_inds(self):
784790
# Pass an example where W is irrelevant and X is confounder
785791
# As long as DML doesnt change the order of the inputs, then things should be good. Otherwise X would be
786792
# zeroed out and the test will fail
793+
787794
def test_scaling_transforms(self):
788795
# shouldn't matter if X is scaled much larger or much smaller than W, we should still get good estimates
789796
n = 2000

econml/tests/test_dmliv.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,23 @@
11
# Copyright (c) Microsoft Corporation. All rights reserved.
22
# Licensed under the MIT License.
33

4-
import unittest
5-
import pytest
64
import pickle
5+
import unittest
6+
77
import numpy as np
8+
import pytest
89
from scipy import special
9-
from sklearn.linear_model import LinearRegression, LogisticRegression
1010
from sklearn.ensemble import RandomForestRegressor
11+
from sklearn.linear_model import LinearRegression, LogisticRegression
12+
from sklearn.preprocessing import PolynomialFeatures
13+
14+
from econml.iv.dml import OrthoIV, DMLIV, NonParamDMLIV
1115
from econml.iv.dr._dr import _DummyCATE
1216
from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression
13-
from sklearn.preprocessing import PolynomialFeatures
1417
from econml.utilities import shape
15-
from econml.iv.dml import OrthoIV, DMLIV, NonParamDMLIV
1618

1719

20+
@pytest.mark.cate_api
1821
class TestDMLIV(unittest.TestCase):
1922
def test_cate_api(self):
2023
def const_marg_eff_shape(n, d_x, d_y, binary_T):

0 commit comments

Comments
 (0)