18
18
Tuple ,
19
19
Iterable ,
20
20
Iterator ,
21
- Protocol ,
22
21
)
23
22
else :
24
23
from typing_extensions import Literal
25
24
from typing import Union , Iterable , Any , Optional , Iterator , Callable , Tuple
26
25
import warnings
27
- from numpy .random import randint
28
26
import numpy as np
29
27
import pyerf
30
28
@@ -81,7 +79,7 @@ class InstabilityWarning(UserWarning):
81
79
# def __call__(self, *args: Any, weights: np.ndarray = None) -> Any:
82
80
# ...
83
81
84
- DataType = Union [Tuple [np .ndarray , ...], np .ndarray ]
82
+ DataType = Union [Tuple [Union [ np .ndarray , Sequence [ Any ]], ...], np .ndarray , "pd.Series" ]
85
83
SeedType = Union [
86
84
None ,
87
85
int ,
@@ -94,7 +92,7 @@ class InstabilityWarning(UserWarning):
94
92
95
93
@overload
96
94
def ci (
97
- data : Union [ Tuple [ Union [ np . ndarray , Sequence [ Any ]], ...], np . ndarray , "pd.Series" ] ,
95
+ data : DataType ,
98
96
statfunction : Optional [StatFunctionWithWeights ] = None ,
99
97
alpha : Union [float , Iterable [float ]] = 0.05 ,
100
98
n_samples : int = 10000 ,
@@ -112,7 +110,7 @@ def ci(
112
110
113
111
@overload
114
112
def ci (
115
- data : Union [ Tuple [ Union [ np . ndarray , Sequence [ Any ]], ...], np . ndarray , "pd.Series" ] ,
113
+ data : DataType ,
116
114
statfunction : Optional [StatFunctionWithWeights ] = None ,
117
115
alpha : Union [float , Iterable [float ]] = 0.05 ,
118
116
n_samples : int = 10000 ,
@@ -130,7 +128,7 @@ def ci(
130
128
131
129
@overload
132
130
def ci (
133
- data : Union [ Tuple [ Union [ np . ndarray , Sequence [ Any ]], ...], np . ndarray , "pd.Series" ] ,
131
+ data : DataType ,
134
132
statfunction : Optional [StatFunction ] = None ,
135
133
alpha : Union [float , Iterable [float ]] = 0.05 ,
136
134
n_samples : int = 10000 ,
@@ -148,7 +146,7 @@ def ci(
148
146
149
147
@overload
150
148
def ci (
151
- data : Union [ Tuple [ Union [ np . ndarray , Sequence [ Any ]], ...], np . ndarray , "pd.Series" ] ,
149
+ data : DataType ,
152
150
statfunction : Optional [StatFunction ] = None ,
153
151
alpha : Union [float , Iterable [float ]] = 0.05 ,
154
152
n_samples : int = 10000 ,
@@ -164,7 +162,7 @@ def ci(
164
162
165
163
166
164
def ci (
167
- data : Union [ Tuple [ Union [ np . ndarray , Sequence [ Any ]], ...], np . ndarray , "pd.Series" ] ,
165
+ data : DataType ,
168
166
statfunction : Optional [Union [StatFunctionWithWeights , StatFunction ]] = None ,
169
167
alpha : Union [float , Iterable [float ]] = 0.05 ,
170
168
n_samples : int = 10000 ,
@@ -205,7 +203,7 @@ def ci(
205
203
intervals. If it is an iterable, alpha is assumed to be an iterable of
206
204
each desired percentile.
207
205
n_samples: float, optional
208
- The number of bootstrap samples to use (default=10000 )
206
+ The number of bootstrap samples to use (default=10_000 )
209
207
method: string, optional
210
208
The method to use: one of 'pi', 'bca', or 'abc' (default='bca')
211
209
output: string, optional
@@ -219,8 +217,8 @@ def ci(
219
217
If False, assume data is a single array. If True or "paired",
220
218
assume data is a tuple/other iterable of arrays of the same length that
221
219
should be sampled together (eg, values in each array at a particular index are
222
- linked in some way). If None, decide based on whether the data is an
223
- actual tuple. If "independent", sample the tuple of arrays separately.
220
+ linked in some way). If None, "paired" is used if data is an actual
221
+ tuple, and False otherwise . If "independent", sample the tuple of arrays separately.
224
222
For True/"paired", each array must be the same length. (default=None)
225
223
226
224
An example of a situation where True/"paired" might be useful is if you have
@@ -431,11 +429,11 @@ def ci(
431
429
out = stat [(nvals , np .indices (nvals .shape )[1 :].squeeze ())]
432
430
elif output == "errorbar" :
433
431
if nvals .ndim == 1 :
434
- out = abs (statfunction (* tdata ) - stat [nvals ])[np .newaxis ].T
432
+ out = np . abs (statfunction (* tdata ) - stat [nvals ])[np .newaxis ].T
435
433
else :
436
- out = abs (
434
+ out = np . abs (
437
435
statfunction (* tdata ) - stat [(nvals , np .indices (nvals .shape )[1 :])]
438
- )[ np . newaxis ]. T
436
+ ). T . squeeze ()
439
437
else :
440
438
raise ValueError ("Output option {0} is not supported." .format (output ))
441
439
@@ -460,7 +458,7 @@ def _ci_abc(
460
458
n = tdata [0 ].shape [0 ] * 1.0
461
459
nn = tdata [0 ].shape [0 ]
462
460
463
- I = np .identity (nn )
461
+ Imatrix = np .identity (nn )
464
462
ep = epsilon / n * 1.0
465
463
p0 = np .repeat (1.0 / n , nn )
466
464
@@ -469,7 +467,7 @@ def _ci_abc(
469
467
except TypeError as e :
470
468
raise TypeError ("statfunction does not accept correct arguments for ABC" ) from e
471
469
472
- di_full = I - p0
470
+ di_full = Imatrix - p0
473
471
tp = np .fromiter (
474
472
(statfunction (* tdata , weights = p0 + ep * di ) for di in di_full ), dtype = float
475
473
)
@@ -716,15 +714,15 @@ def bootstrap_indices_moving_block(
716
714
def pval (
717
715
data : DataType ,
718
716
statfunction : StatFunction = np .average ,
719
- compfunction : Callable [[Any ], bool ] = lambda s : cast (bool , s > 0 ),
717
+ compfunction : Callable [[Any ], Any ] = lambda s : cast (bool , s > 0 ),
720
718
n_samples : int = 10000 ,
721
719
multi : Optional [bool ] = None ,
722
720
seed : SeedType = None ,
723
- ) -> "np.number[Any]" :
721
+ ) -> "Union[ np.number[Any], np.ndarray ]" :
724
722
"""
725
723
Given a set of data ``data``, a statistics function ``statfunction`` that
726
- applies to that data, and the criteriafunction ``compfunction``, computes the
727
- bootstrap probability thatthe statistics function ``statfunction`` on that data
724
+ applies to that data, and the criteria function ``compfunction``, computes the
725
+ bootstrap probability that the statistics function ``statfunction`` on that data
728
726
satisfies the the criteria function ``compfunction``. Data points are assumed to
729
727
be delineated by axis 0.
730
728
@@ -742,9 +740,10 @@ def pval(
742
740
to these samples individually.
743
741
compfunction: function (stat) -> True or False
744
742
This function should accept result of the statfunction computed on the samples of
745
- data from ``data``. It is applied to these results individually.
743
+ data from ``data``. It is applied to these results individually. The default
744
+ tests for each element of statfunction output being > 0.
746
745
n_samples: float, optional
747
- The number of bootstrap samples to use (default=10000)
746
+ The number of bootstrap samples to use (default=10_000).
748
747
multi: boolean, optional
749
748
If False, assume data is a single array. If True, assume data is a tuple/other
750
749
iterable of arrays of the same length that should be sampled together. If None,
@@ -756,17 +755,6 @@ def pval(
756
755
The probability that the statistics defined by the statfunction satisfies the
757
756
criteria defined by the compfunction.
758
757
759
- Examples
760
- --------
761
- To calculate the confidence intervals for the mean of some numbers:
762
-
763
- >> boot.ci( np.randn(100), np.average )
764
-
765
- Given some data points in arrays x and y calculate the confidence intervals
766
- for all linear regression coefficients simultaneously:
767
-
768
- >> boot.ci( (x,y), scipy.stats.linregress )
769
-
770
758
References
771
759
----------
772
760
Efron, An Introduction to the Bootstrap. Chapman & Hall 1993
@@ -796,4 +784,4 @@ def pval(
796
784
797
785
pval_stat = [compfunction (s ) for s in stat ]
798
786
# print pval_stat
799
- return np .mean (pval_stat )
787
+ return cast ( "Union[ np.number[Any], np.ndarray]" , np . mean (pval_stat , axis = 0 ) )
0 commit comments