From 64be173b907613a19b0a655a83139a3f286baeb6 Mon Sep 17 00:00:00 2001 From: Romain Jacob <32954999+romain-jacob@users.noreply.github.com> Date: Fri, 4 Aug 2023 20:57:21 +0200 Subject: [PATCH] ENH: stats: add nonparametric one-sample quantile test and CI (#12680) * mwe of the function, tests added but does not work yet * added self to contrib list" * added function name to __init__ to appear in the online doc * function is ready, docstring and tests to finish * ENH: added non-parametric confidence intervals for quantiles into scipy.stats * fixed docstring * fixed pep8 typos and doctest typos * improved efficiency by using binom.isf fixed minor documentation things * added None return to the docstring * fixed docstring bug * docstring updates * reverted content of THANKS.txt * added reference to a related R package * fixed doctring lines being too long * corrected line length issues * update version number Co-authored-by: Matt Haberland * corrected typo in docstring Co-authored-by: Matt Haberland * corrected typo in doctring Co-authored-by: Matt Haberland * typo in doctring Co-authored-by: Matt Haberland * pass on docstring * Updated unit test for confint_quantile * MAINT: stats: confint: revert undesired changes to stats.py * DOC: stats: confint_quantile: add whitespace back to docstrings * DOC: stats: confint_quantile: add carriage return back to docstringsUpdate scipy/stats/stats.py Co-authored-by: Matt Haberland * DOC: stats: confint_quantile: add carriage return back to docstringsUpdate scipy/stats/stats.py Co-authored-by: Matt Haberland * DOC: stats: confint_quantile: add carriage return back to docstringsUpdate scipy/stats/stats.py Co-authored-by: Matt Haberland * DOC: fixed docstring test * DOC: fixed doctest * DOC: fixed doctest * MAINT: added back the confint code * MAINT: stats.confint_quantile: fix merge issues * STY: stats.confint_quantile: PEP8 * ENH: stats.quantile_test: add quantile test and move CI to method of result object [skip ci] * MAINT: stats.quantile_test: adjust (correct?) `confidence_interval` [skip ci] * DOC: stats.quantile_test: update documentation [skip azp] [skip actions] * FIX: correct p-values for the Quantile test * some PEP8 formatting Co-authored-by: Matt Haberland * comment update Co-authored-by: Matt Haberland * Cleanup Co-authored-by: Matt Haberland * cleanup Co-authored-by: Matt Haberland * Fixed comment typo Co-authored-by: Matt Haberland * doc typo Co-authored-by: Matt Haberland * doc typo Co-authored-by: Matt Haberland * added IV test for quantile_test * wip tests for QuantileTest * cleaned up the alternative IV test * unitests for QuantileTest * PEP8 pass * added example for the confidence_interval method * Apply suggestions from code review * updated Notes section of the docstring * added examples * TST: stats.quantile_test: adjust tests * Fix typo in the example Co-authored-by: Matt Haberland * Text improvement in the example Co-authored-by: Matt Haberland * Improvement example text Co-authored-by: Matt Haberland * Simplification iv test Co-authored-by: Matt Haberland * Fixing typo in comments Co-authored-by: Matt Haberland * MAINT: stats.quantile_test: rewriting to follow Conover's book [skip ci] Co-authored-by: Matt Haberland * TST: stats.quantile_test: use output of R implem in test [skip ci] * MAINT:stats.quantile_test: using dataclass instead of tuple_bunch [skip ci] * TST: stats.quantile_test: adding Conover examples as tests [skip ci] * MAINT: stats.quantile_test: more adjustments per review * TST: stats.quantile_test: strengthen test of CIs vs p-values * DOC: Docstring updates + change result attributes DOC: Sync docstring for test with actual implementation DOC: Indent hypotheses [skip actions] DOC: Fix docstring formatting [skip actions] DOC: Try to fix another sphinx issue [skip actions] TST: Fix hardcoded value in doctest [skip actions] TST: More fix for doctest [skip actions] DOC: More thrashing [skip actions] DOC: sphinx was confused by `p`th [skip actions] DOC: try to deal with more sphinx weirdness [skip actions] DOC: Change pth to latex [skip actions] [skip cirrus] DOC: Make p-value consistent in docs [skip actions] [skip cirrus] DOC: More docstring cleanup [skip actions] [skip cirrus] Update scipy/stats/_stats_py.py Co-authored-by: Matt Haberland Update scipy/stats/_stats_py.py [skip actions] [skip cirrus] Co-authored-by: Matt Haberland Remove blank line (squash) MAINT: Make result contain only one statistic + info MAINT: Bring some lines to 79 chars or less MAINT: Fix statistic not set for alternative two-side MAINT: Update statistic_type -> statistic_sign TST: Fix doctests [skip actions] [skip cirrus] MAINT: Change to statistic_type 1, 2 MAINT: Fix formatting in tests Apply suggestions from code review [skip cirrus] [skip actions] Co-authored-by: Matt Haberland DOC: Documentation fixes [skip actions] [skip cirrus] DOC: documentation fixes [skip actions] [skip cirrus] DOC: More doc fixes [skip actions] [skip cirrus] DOC: Adjust formatting for bullet list [skip actions] [skip cirrus] DOC: Adjust subscript to superscript in pth [skip cirrus] [skip actions] DOC: Remove extraneous blank line [skip actions] [skip cirrus] DOC: Unbold hypotheses [skip cirrus] [skip actions] * DOC: More doc fixes [skip actions] [skip cirrus] --------- Co-authored-by: Matt Haberland Co-authored-by: Albert Steppi --- scipy/stats/__init__.py | 1 + scipy/stats/_stats_py.py | 468 +++++++++++++++++++++++++++++++- scipy/stats/tests/test_stats.py | 139 ++++++++++ 3 files changed, 603 insertions(+), 5 deletions(-) diff --git a/scipy/stats/__init__.py b/scipy/stats/__init__.py index 36f085363e5a..8a4f7dc16f8d 100644 --- a/scipy/stats/__init__.py +++ b/scipy/stats/__init__.py @@ -307,6 +307,7 @@ ttest_1samp binomtest + quantile_test skewtest kurtosistest normaltest diff --git a/scipy/stats/_stats_py.py b/scipy/stats/_stats_py.py index 9e73ea78e57b..f318ec5fb32d 100644 --- a/scipy/stats/_stats_py.py +++ b/scipy/stats/_stats_py.py @@ -53,7 +53,7 @@ siegelslopes) from ._stats import (_kendall_dis, _toint64, _weightedrankedtau, _local_correlations) -from dataclasses import dataclass +from dataclasses import dataclass, field from ._hypotests import _all_partitions from ._stats_pythran import _compute_outer_prob_inside_method from ._resampling import (MonteCarloMethod, PermutationMethod, BootstrapMethod, @@ -92,10 +92,10 @@ 'kstest', 'ks_1samp', 'ks_2samp', 'chisquare', 'power_divergence', 'tiecorrect', 'ranksums', 'kruskal', 'friedmanchisquare', - 'rankdata', - 'combine_pvalues', 'wasserstein_distance', 'energy_distance', + 'rankdata', 'combine_pvalues', 'quantile_test', + 'wasserstein_distance', 'energy_distance', 'brunnermunzel', 'alexandergovern', - 'expectile', ] + 'expectile'] def _chk_asarray(a, axis): @@ -1656,7 +1656,6 @@ def kurtosistest(a, axis=0, nan_policy='propagate', alternative='two-sided'): * 'propagate': returns nan * 'raise': throws an error * 'omit': performs the calculations ignoring nan values - alternative : {'two-sided', 'less', 'greater'}, optional Defines the alternative hypothesis. The following options are available (default is 'two-sided'): @@ -9729,6 +9728,465 @@ def combine_pvalues(pvalues, method='fisher', weights=None): return SignificanceResult(statistic, pval) +@dataclass +class QuantileTestResult: + r""" + Result of `scipy.stats.quantile_test`. + + Attributes + ---------- + statistic: float + The statistic used to calculate the p-value; either ``T1``, the + number of observations less than or equal to the hypothesized quantile, + or ``T2``, the number of observations strictly less than the + hypothesized quantile. Two test statistics are required to handle the + possibility the data was generated from a discrete or mixed + distribution. + + statistic_type : int + ``1`` or ``2`` depending on which of ``T1`` or ``T2`` was used to + calculate the p-value respectively. ``T1`` corresponds to the + ``"greater"`` alternative hypothesis and ``T2`` to the ``"less"``. For + the ``"two-sided"`` case, the statistic type that leads to smallest + p-value is used. For significant tests, ``statistic_type = 1`` means + there is evidence that the population quantile is significantly greater + than the hypothesized value and ``statistic_type = 2`` means there is + evidence that it is significantly less than the hypothesized value. + + pvalue : float + The p-value of the hypothesis test. + """ + statistic: float + statistic_type: int + pvalue: float + _alternative: list[str] = field(repr=False) + _x : np.ndarray = field(repr=False) + _p : float = field(repr=False) + + def confidence_interval(self, confidence_level=0.95): + """ + Compute the confidence interval of the quantile. + + Parameters + ---------- + confidence_level : float, default: 0.95 + Confidence level for the computed confidence interval + of the quantile. Default is 0.95. + + Returns + ------- + ci : ``ConfidenceInterval`` object + The object has attributes ``low`` and ``high`` that hold the + lower and upper bounds of the confidence interval. + + Examples + -------- + >>> import numpy as np + >>> import scipy.stats as stats + >>> p = 0.75 # quantile of interest + >>> q = 0 # hypothesized value of the quantile + >>> x = np.exp(np.arange(0, 1.01, 0.01)) + >>> res = stats.quantile_test(x, q=q, p=p, alternative='less') + >>> lb, ub = res.confidence_interval() + >>> lb, ub + (-inf, 2.293318740264183) + >>> res = stats.quantile_test(x, q=q, p=p, alternative='two-sided') + >>> lb, ub = res.confidence_interval(0.9) + >>> lb, ub + (1.9542373206359396, 2.293318740264183) + """ + + alternative = self._alternative + p = self._p + x = np.sort(self._x) + n = len(x) + bd = stats.binom(n, p) + + if confidence_level <= 0 or confidence_level >= 1: + message = "`confidence_level` must be a number between 0 and 1." + raise ValueError(message) + + low_index = np.nan + high_index = np.nan + + if alternative == 'less': + p = 1 - confidence_level + low = -np.inf + high_index = int(bd.isf(p)) + high = x[high_index] if high_index < n else np.nan + elif alternative == 'greater': + p = 1 - confidence_level + low_index = int(bd.ppf(p)) - 1 + low = x[low_index] if low_index >= 0 else np.nan + high = np.inf + elif alternative == 'two-sided': + p = (1 - confidence_level) / 2 + low_index = int(bd.ppf(p)) - 1 + low = x[low_index] if low_index >= 0 else np.nan + high_index = int(bd.isf(p)) + high = x[high_index] if high_index < n else np.nan + + return ConfidenceInterval(low, high) + + +def quantile_test_iv(x, q, p, alternative): + + x = np.atleast_1d(x) + message = '`x` must be a one-dimensional array of numbers.' + if x.ndim != 1 or not np.issubdtype(x.dtype, np.number): + raise ValueError(message) + + q = np.array(q)[()] + message = "`q` must be a scalar." + if q.ndim != 0 or not np.issubdtype(q.dtype, np.number): + raise ValueError(message) + + p = np.array(p)[()] + message = "`p` must be a float strictly between 0 and 1." + if p.ndim != 0 or p >= 1 or p <= 0: + raise ValueError(message) + + alternatives = {'two-sided', 'less', 'greater'} + message = f"`alternative` must be one of {alternatives}" + if alternative not in alternatives: + raise ValueError(message) + + return x, q, p, alternative + + +def quantile_test(x, *, q=0, p=0.5, alternative='two-sided'): + r""" + Perform a quantile test and compute a confidence interval of the quantile. + + This function tests the null hypothesis that `q` is the value of the + quantile associated with probability `p` of the population underlying + sample `x`. For example, with default parameters, it tests that the + median of the population underlying `x` is zero. The function returns an + object including the test statistic, a p-value, and a method for computing + the confidence interval around the quantile. + + Parameters + ---------- + x : array_like + A one-dimensional sample. + q : float, default: 0 + The hypothesized value of the quantile. + p : float, default: 0.5 + The probability associated with the quantile; i.e. the proportion of + the population less than `q` is `p`. Must be strictly between 0 and + 1. + alternative : {'two-sided', 'less', 'greater'}, optional + Defines the alternative hypothesis. + The following options are available (default is 'two-sided'): + + * 'two-sided': the quantile associated with the probability `p` + is not `q`. + * 'less': the quantile associated with the probability `p` is less + than `q`. + * 'greater': the quantile associated with the probability `p` is + greater than `q`. + + Returns + ------- + result : QuantileTestResult + An object with the following attributes: + + statistic : float + One of two test statistics that may be used in the quantile test. + The first test statistic, ``T1``, is the proportion of samples in + `x` that are less than or equal to the hypothesized quantile + `q`. The second test statistic, ``T2``, is the proportion of + samples in `x` that are strictly less than the hypothesized + quantile `q`. + + When ``alternative = 'greater'``, ``T1`` is used to calculate the + p-value and ``statistic`` is set to ``T1``. + + When ``alternative = 'less'``, ``T2`` is used to calculate the + p-value and ``statistic`` is set to ``T2``. + + When ``alternative = 'two-sided'``, both ``T1`` and ``T2`` are + considered, and the one that leads to the smallest p-value is used. + + statistic_type : int + Either `1` or `2` depending on which of ``T1`` or ``T2`` was + used to calculate the p-value. + + pvalue : float + The p-value associated with the given alternative. + + The object also has the following method: + + confidence_interval(confidence_level=0.95) + Computes a confidence interval around the the + population quantile associated with the probability `p`. The + confidence interval is returned in a ``namedtuple`` with + fields `low` and `high`. Values are `nan` when there are + not enough observations to compute the confidence interval at + the desired confidence. + + Notes + ----- + This test and its method for computing confidence intervals are + non-parametric. They are valid if and only if the observations are i.i.d. + + The implementation of the test follows Conover [1]_. Two test statistics + are considered. + + ``T1``: The number of observations in `x` less than or equal to `q`. + + ``T1 = (x <= q).sum()`` + + ``T2``: The number of observations in `x` strictly less than `q`. + + ``T2 = (x < q).sum()`` + + The use of two test statistics is necessary to handle the possibility that + `x` was generated from a discrete or mixed distribution. + + The null hypothesis for the test is: + + H0: The :math:`p^{\mathrm{th}}` population quantile is `q`. + + and the null distribution for each test statistic is + :math:`\mathrm{binom}\left(n, p\right)`. When ``alternative='less'``, + the alternative hypothesis is: + + H1: The :math:`p^{\mathrm{th}}` population quantile is less than `q`. + + and the p-value is the probability that the binomial random variable + + .. math:: + Y \sim \mathrm{binom}\left(n, p\right) + + is greater than or equal to the observed value ``T2``. + + When ``alternative='greater'``, the alternative hypothesis is: + + H1: The :math:`p^{\mathrm{th}}` population quantile is greater than `q` + + and the p-value is the probability that the binomial random variable Y + is less than or equal to the observed value ``T1``. + + When ``alternative='two-sided'``, the alternative hypothesis is + + H1: `q` is not the :math:`p^{\mathrm{th}}` population quantile. + + and the p-value is twice the smaller of the p-values for the ``'less'`` + and ``'greater'`` cases. Both of these p-values can exceed 0.5 for the same + data, so the value is clipped into the interval :math:`[0, 1]`. + + The approach for confidence intervals is attributed to Thompson [2]_ and + later proven to be applicable to any set of i.i.d. samples [3]_. The + computation is based on the observation that the probability of a quantile + :math:`q` to be larger than any observations :math:`x_m (1\leq m \leq N)` + can be computed as + + .. math:: + + \mathbb{P}(x_m \leq q) = 1 - \sum_{k=0}^{m-1} \binom{N}{k} + q^k(1-q)^{N-k} + + By default, confidence intervals are computed for a 95% confidence level. + A common interpretation of a 95% confidence intervals is that if i.i.d. + samples are drawn repeatedly from the same population and confidence + intervals are formed each time, the confidence interval will contain the + true value of the specified quantile in approximately 95% of trials. + + A similar function is available in the QuantileNPCI R package [4]_. The + foundation is the same, but it computes the confidence interval bounds by + doing interpolations between the sample values, whereas this function uses + only sample values as bounds. Thus, ``quantile_test.confidence_interval`` + returns more conservative intervals (i.e., larger). + + The same computation of confidence intervals for quantiles is included in + the confintr package [5]_. + + Two-sided confidence intervals are not guaranteed to be optimal; i.e., + there may exist a tighter interval that may contain the quantile of + interest with probability larger than the confidence level. + Without further assumption on the samples (e.g., the nature of the + underlying distribution), the one-sided intervals are optimally tight. + + References + ---------- + .. [1] W. J. Conover. Practical Nonparametric Statistics, 3rd Ed. 1999. + .. [2] W. R. Thompson, "On Confidence Ranges for the Median and Other + Expectation Distributions for Populations of Unknown Distribution + Form," The Annals of Mathematical Statistics, vol. 7, no. 3, + pp. 122-128, 1936, Accessed: Sep. 18, 2019. [Online]. Available: + https://www.jstor.org/stable/2957563. + .. [3] H. A. David and H. N. Nagaraja, "Order Statistics in Nonparametric + Inference" in Order Statistics, John Wiley & Sons, Ltd, 2005, pp. + 159-170. Available: + https://onlinelibrary.wiley.com/doi/10.1002/0471722162.ch7. + .. [4] N. Hutson, A. Hutson, L. Yan, "QuantileNPCI: Nonparametric + Confidence Intervals for Quantiles," R package, + https://cran.r-project.org/package=QuantileNPCI + .. [5] M. Mayer, "confintr: Confidence Intervals," R package, + https://cran.r-project.org/package=confintr + + + Examples + -------- + + Suppose we wish to test the null hypothesis that the median of a population + is equal to 0.5. We choose a confidence level of 99%; that is, we will + reject the null hypothesis in favor of the alternative if the p-value is + less than 0.01. + + When testing random variates from the standard uniform distribution, which + has a median of 0.5, we expect the data to be consistent with the null + hypothesis most of the time. + + >>> import numpy as np + >>> from scipy import stats + >>> rng = np.random.default_rng(6981396440634228121) + >>> rvs = stats.uniform.rvs(size=100, random_state=rng) + >>> stats.quantile_test(rvs, q=0.5, p=0.5) + QuantileTestResult(statistic=45, statistic_type=1, pvalue=0.36820161732669576) + + As expected, the p-value is not below our threshold of 0.01, so + we cannot reject the null hypothesis. + + When testing data from the standard *normal* distribution, which has a + median of 0, we would expect the null hypothesis to be rejected. + + >>> rvs = stats.norm.rvs(size=100, random_state=rng) + >>> stats.quantile_test(rvs, q=0.5, p=0.5) + QuantileTestResult(statistic=67, statistic_type=2, pvalue=0.0008737198369123724) + + Indeed, the p-value is lower than our threshold of 0.01, so we reject the + null hypothesis in favor of the default "two-sided" alternative: the median + of the population is *not* equal to 0.5. + + However, suppose we were to test the null hypothesis against the + one-sided alternative that the median of the population is *greater* than + 0.5. Since the median of the standard normal is less than 0.5, we would not + expect the null hypothesis to be rejected. + + >>> stats.quantile_test(rvs, q=0.5, p=0.5, alternative='greater') + QuantileTestResult(statistic=67, statistic_type=1, pvalue=0.9997956114162866) + + Unsurprisingly, with a p-value greater than our threshold, we would not + reject the null hypothesis in favor of the chosen alternative. + + The quantile test can be used for any quantile, not only the median. For + example, we can test whether the third quartile of the distribution + underlying the sample is greater than 0.6. + + >>> rvs = stats.uniform.rvs(size=100, random_state=rng) + >>> stats.quantile_test(rvs, q=0.6, p=0.75, alternative='greater') + QuantileTestResult(statistic=64, statistic_type=1, pvalue=0.00940696592998271) + + The p-value is lower than the threshold. We reject the null hypothesis in + favor of the alternative: the third quartile of the distribution underlying + our sample is greater than 0.6. + + `quantile_test` can also compute confidence intervals for any quantile. + + >>> rvs = stats.norm.rvs(size=100, random_state=rng) + >>> res = stats.quantile_test(rvs, q=0.6, p=0.75) + >>> ci = res.confidence_interval(confidence_level=0.95) + >>> ci + ConfidenceInterval(low=0.284491604437432, high=0.8912531024914844) + + When testing a one-sided alternative, the confidence interval contains + all observations such that if passed as `q`, the p-value of the + test would be greater than 0.05, and therefore the null hypothesis + would not be rejected. For example: + + >>> rvs.sort() + >>> q, p, alpha = 0.6, 0.75, 0.95 + >>> res = stats.quantile_test(rvs, q=q, p=p, alternative='less') + >>> ci = res.confidence_interval(confidence_level=alpha) + >>> for x in rvs[rvs <= ci.high]: + ... res = stats.quantile_test(rvs, q=x, p=p, alternative='less') + ... assert res.pvalue > 1-alpha + >>> for x in rvs[rvs > ci.high]: + ... res = stats.quantile_test(rvs, q=x, p=p, alternative='less') + ... assert res.pvalue < 1-alpha + + Also, if a 95% confidence interval is repeatedly generated for random + samples, the confidence interval will contain the true quantile value in + approximately 95% of replications. + + >>> dist = stats.rayleigh() # our "unknown" distribution + >>> p = 0.2 + >>> true_stat = dist.ppf(p) # the true value of the statistic + >>> n_trials = 1000 + >>> quantile_ci_contains_true_stat = 0 + >>> for i in range(n_trials): + ... data = dist.rvs(size=100, random_state=rng) + ... res = stats.quantile_test(data, p=p) + ... ci = res.confidence_interval(0.95) + ... if ci[0] < true_stat < ci[1]: + ... quantile_ci_contains_true_stat += 1 + >>> quantile_ci_contains_true_stat >= 950 + True + + This works with any distribution and any quantile, as long as the samples + are i.i.d. + """ + # Implementation carefully follows [1] 3.2 + # "H0: the p*th quantile of X is x*" + # To facilitate comparison with [1], we'll use variable names that + # best match Conover's notation + X, x_star, p_star, H1 = quantile_test_iv(x, q, p, alternative) + + # "We will use two test statistics in this test. Let T1 equal " + # "the number of observations less than or equal to x*, and " + # "let T2 equal the number of observations less than x*." + T1 = (X <= x_star).sum() + T2 = (X < x_star).sum() + + # "The null distribution of the test statistics T1 and T2 is " + # "the binomial distribution, with parameters n = sample size, and " + # "p = p* as given in the null hypothesis.... Y has the binomial " + # "distribution with parameters n and p*." + n = len(X) + Y = stats.binom(n=n, p=p_star) + + # "H1: the p* population quantile is less than x*" + if H1 == 'less': + # "The p-value is the probability that a binomial random variable Y " + # "is greater than *or equal to* the observed value of T2...using p=p*" + pvalue = Y.sf(T2-1) # Y.pmf(T2) + Y.sf(T2) + statistic = T2 + statistic_type = 2 + # "H1: the p* population quantile is greater than x*" + elif H1 == 'greater': + # "The p-value is the probability that a binomial random variable Y " + # "is less than or equal to the observed value of T1... using p = p*" + pvalue = Y.cdf(T1) + statistic = T1 + statistic_type = 1 + # "H1: x* is not the p*th population quantile" + elif H1 == 'two-sided': + # "The p-value is twice the smaller of the probabilities that a + # binomial random variable Y is less than or equal to the observed + # value of T1 or greater than or equal to the observed value of T2 + # using p=p*." + # Note: both one-sided p-values can exceed 0.5 for the same data, so + # `clip` + pvalues = [Y.cdf(T1), Y.sf(T2 - 1)] # [greater, less] + sorted_idx = np.argsort(pvalues) + pvalue = np.clip(2*pvalues[sorted_idx[0]], 0, 1) + if sorted_idx[0]: + statistic, statistic_type = T2, 2 + else: + statistic, statistic_type = T1, 1 + + return QuantileTestResult( + statistic=statistic, + statistic_type=statistic_type, + pvalue=pvalue, + _alternative=H1, + _x=X, + _p=p_star + ) + + ##################################### # STATISTICAL DISTANCES # ##################################### diff --git a/scipy/stats/tests/test_stats.py b/scipy/stats/tests/test_stats.py index 7b7ec27c26b7..8fb85d090250 100644 --- a/scipy/stats/tests/test_stats.py +++ b/scipy/stats/tests/test_stats.py @@ -7996,6 +7996,145 @@ def test_alias(self): assert_equal(res.stat, res.statistic) +class TestQuantileTest(): + r""" Test the non-parametric quantile test, + including the computation of confidence intervals + """ + + def test_quantile_test_iv(self): + x = [1, 2, 3] + + message = "`x` must be a one-dimensional array of numbers." + with pytest.raises(ValueError, match=message): + stats.quantile_test([x]) + + message = "`q` must be a scalar." + with pytest.raises(ValueError, match=message): + stats.quantile_test(x, q=[1, 2]) + + message = "`p` must be a float strictly between 0 and 1." + with pytest.raises(ValueError, match=message): + stats.quantile_test(x, p=[0.5, 0.75]) + with pytest.raises(ValueError, match=message): + stats.quantile_test(x, p=2) + with pytest.raises(ValueError, match=message): + stats.quantile_test(x, p=-0.5) + + message = "`alternative` must be one of..." + with pytest.raises(ValueError, match=message): + stats.quantile_test(x, alternative='one-sided') + + message = "`confidence_level` must be a number between 0 and 1." + with pytest.raises(ValueError, match=message): + stats.quantile_test(x).confidence_interval(1) + + @pytest.mark.parametrize( + 'p, alpha, lb, ub, alternative', + [[0.3, 0.95, 1.221402758160170, 1.476980793882643, 'two-sided'], + [0.5, 0.9, 1.506817785112854, 1.803988415397857, 'two-sided'], + [0.25, 0.95, -np.inf, 1.39096812846378, 'less'], + [0.8, 0.9, 2.117000016612675, np.inf, 'greater']] + ) + def test_R_ci_quantile(self, p, alpha, lb, ub, alternative): + # Test against R library `confintr` function `ci_quantile`, e.g. + # library(confintr) + # options(digits=16) + # x <- exp(seq(0, 1, by = 0.01)) + # ci_quantile(x, q = 0.3)$interval + # ci_quantile(x, q = 0.5, probs = c(0.05, 0.95))$interval + # ci_quantile(x, q = 0.25, probs = c(0, 0.95))$interval + # ci_quantile(x, q = 0.8, probs = c(0.1, 1))$interval + x = np.exp(np.arange(0, 1.01, 0.01)) + res = stats.quantile_test(x, p=p, alternative=alternative) + assert_allclose(res.confidence_interval(alpha), [lb, ub], rtol=1e-15) + + @pytest.mark.parametrize( + 'q, p, alternative, ref', + [[1.2, 0.3, 'two-sided', 0.01515567517648], + [1.8, 0.5, 'two-sided', 0.1109183496606]] + ) + def test_R_pvalue(self, q, p, alternative, ref): + # Test against R library `snpar` function `quant.test`, e.g. + # library(snpar) + # options(digits=16) + # x < - exp(seq(0, 1, by=0.01)) + # quant.test(x, q=1.2, p=0.3, exact=TRUE, alternative='t') + x = np.exp(np.arange(0, 1.01, 0.01)) + res = stats.quantile_test(x, q=q, p=p, alternative=alternative) + assert_allclose(res.pvalue, ref, rtol=1e-12) + + @pytest.mark.parametrize('case', ['continuous', 'discrete']) + @pytest.mark.parametrize('alternative', ['less', 'greater']) + @pytest.mark.parametrize('alpha', [0.9, 0.95]) + def test_pval_ci_match(self, case, alternative, alpha): + # Verify that the following statement holds: + + # The 95% confidence interval corresponding with alternative='less' + # has -inf as its lower bound, and the upper bound `xu` is the greatest + # element from the sample `x` such that: + # `stats.quantile_test(x, q=xu, p=p, alternative='less').pvalue`` + # will be greater than 5%. + + # And the corresponding statement for the alternative='greater' case. + + seed = int((7**len(case) + len(alternative))*alpha) + rng = np.random.default_rng(seed) + if case == 'continuous': + p, q = rng.random(size=2) + rvs = rng.random(size=100) + else: + rvs = rng.integers(1, 11, size=100) + p = rng.random() + q = rng.integers(1, 11) + + res = stats.quantile_test(rvs, q=q, p=p, alternative=alternative) + ci = res.confidence_interval(confidence_level=alpha) + + # select elements inside the confidence interval based on alternative + if alternative == 'less': + i_inside = rvs <= ci.high + else: + i_inside = rvs >= ci.low + + for x in rvs[i_inside]: + res = stats.quantile_test(rvs, q=x, p=p, alternative=alternative) + assert res.pvalue > 1 - alpha + + for x in rvs[~i_inside]: + res = stats.quantile_test(rvs, q=x, p=p, alternative=alternative) + assert res.pvalue < 1 - alpha + + def test_match_conover_examples(self): + # Test against the examples in [1] (Conover Practical Nonparametric + # Statistics Third Edition) pg 139 + + # Example 1 + # Data is [189, 233, 195, 160, 212, 176, 231, 185, 199, 213, 202, 193, + # 174, 166, 248] + # Two-sided test of whether the upper quartile (p=0.75) equals 193 + # (q=193). Conover shows that 7 of the observations are less than or + # equal to 193, and "for the binomial random variable Y, P(Y<=7) = + # 0.0173", so the two-sided p-value is twice that, 0.0346. + x = [189, 233, 195, 160, 212, 176, 231, 185, 199, 213, 202, 193, + 174, 166, 248] + pvalue_expected = 0.0346 + res = stats.quantile_test(x, q=193, p=0.75, alternative='two-sided') + assert_allclose(res.pvalue, pvalue_expected, rtol=1e-5) + + # Example 2 + # Conover doesn't give explicit data, just that 8 out of 112 + # observations are 60 or less. The test is whether the median time is + # equal to 60 against the alternative that the median is greater than + # 60. The p-value is calculated as P(Y<=8), where Y is again a binomial + # distributed random variable, now with p=0.5 and n=112. Conover uses a + # normal approximation, but we can easily calculate the CDF of the + # binomial distribution. + x = [59]*8 + [61]*(112-8) + pvalue_expected = stats.binom(p=0.5, n=112).pmf(k=8) + res = stats.quantile_test(x, q=60, p=0.5, alternative='greater') + assert_allclose(res.pvalue, pvalue_expected, atol=1e-10) + + class TestPageTrendTest: # expected statistic and p-values generated using R at # https://rdrr.io/cran/cultevo/, e.g.