From b7bb49c473ffd7105bb956a19356ec2bfd1282f3 Mon Sep 17 00:00:00 2001 From: Serge Rey Date: Tue, 25 Jun 2019 20:40:06 -0700 Subject: [PATCH] DOC: update api docs --- doc/api.rst | 9 +++ esda/getisord.py | 94 ++++++++++++++++++---------- esda/smaup.py | 160 ++++++++++++++++++++++++++--------------------- 3 files changed, 157 insertions(+), 106 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 196ecde7..cb65ccc1 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -61,6 +61,15 @@ Moran Statistics esda.Moran_Rate esda.Moran_Local_Rate +Modifiable Areal Unit Tests +--------------------------- + +.. autosummary:: + :toctree: generated/ + + smaup.Smaup + + Utility Functions ---------------- diff --git a/esda/getisord.py b/esda/getisord.py index 3cf7270f..3d84abb9 100644 --- a/esda/getisord.py +++ b/esda/getisord.py @@ -2,9 +2,9 @@ Getis and Ord G statistic for spatial autocorrelation """ __author__ = "Sergio J. Rey , Myunghwa Hwang " -__all__ = ['G', 'G_Local'] +__all__ = ["G", "G_Local"] -from libpysal.common import np, stats +from libpysal.common import np, stats from libpysal.weights.spatial_lag import lag_spatial as slag from .tabular import _univariate_handler @@ -75,20 +75,25 @@ class G(object): >>> numpy.random.seed(10) Preparing a point data set + >>> points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)] Creating a weights object from points + >>> w = libpysal.weights.DistanceBand(points,threshold=15) >>> w.transform = "B" Preparing a variable + >>> y = numpy.array([2, 3, 3.2, 5, 8, 7]) Applying Getis and Ord G test + >>> from esda.getisord import G >>> g = G(y,w) Examining the results + >>> round(g.G, 3) 0.557 @@ -106,26 +111,29 @@ def __init__(self, y, w, permutations=PERMUTATIONS): self.permutations = permutations self.__moments() self.y2 = y * y - y = y.reshape(len(y), 1) # Ensure that y is an n by 1 vector, otherwise y*y.T == y*y + y = y.reshape( + len(y), 1 + ) # Ensure that y is an n by 1 vector, otherwise y*y.T == y*y self.den_sum = (y * y.T).sum() - (y * y).sum() self.G = self.__calc(self.y) self.z_norm = (self.G - self.EG) / np.sqrt(self.VG) self.p_norm = 1.0 - stats.norm.cdf(np.abs(self.z_norm)) if permutations: - sim = [self.__calc(np.random.permutation(self.y)) - for i in range(permutations)] + sim = [ + self.__calc(np.random.permutation(self.y)) for i in range(permutations) + ] self.sim = sim = np.array(sim) above = sim >= self.G larger = sum(above) if (self.permutations - larger) < larger: larger = self.permutations - larger - self.p_sim = (larger + 1.0) / (permutations + 1.) + self.p_sim = (larger + 1.0) / (permutations + 1.0) self.EG_sim = sum(sim) / permutations self.seG_sim = sim.std() self.VG_sim = self.seG_sim ** 2 self.z_sim = (self.G - self.EG_sim) / self.seG_sim - self.p_z_sim = 1. - stats.norm.cdf(np.abs(self.z_sim)) + self.p_z_sim = 1.0 - stats.norm.cdf(np.abs(self.z_sim)) def __moments(self): y = self.y @@ -138,8 +146,8 @@ def __moments(self): s1 = w.s1 s2 = w.s2 b0 = (n2 - 3 * n + 3) * s1 - n * s2 + 3 * s02 - b1 = (-1.) * ((n2 - n) * s1 - 2 * n * s2 + 6 * s02) - b2 = (-1.) * (2 * n * s1 - (n + 3) * s2 + 6 * s02) + b1 = (-1.0) * ((n2 - n) * s1 - 2 * n * s2 + 6 * s02) + b2 = (-1.0) * (2 * n * s1 - (n + 3) * s2 + 6 * s02) b3 = 4 * (n - 1) * s1 - 2 * (n + 1) * s2 + 8 * s02 b4 = s1 - s2 + s02 self.b0 = b0 @@ -150,12 +158,10 @@ def __moments(self): y2 = y * y y3 = y * y2 y4 = y2 * y2 - EG2 = (b0 * (sum( - y2) ** 2) + b1 * sum(y4) + b2 * (sum(y) ** 2) * sum(y2)) + EG2 = b0 * (sum(y2) ** 2) + b1 * sum(y4) + b2 * (sum(y) ** 2) * sum(y2) EG2 += b3 * sum(y) * sum(y3) + b4 * (sum(y) ** 4) EG2NUM = EG2 - EG2DEN = (((sum(y) ** 2 - sum(y2)) ** 2) * n * (n - 1) * ( - n - 2) * (n - 3)) + EG2DEN = ((sum(y) ** 2 - sum(y2)) ** 2) * n * (n - 1) * (n - 2) * (n - 3) self.EG2 = EG2NUM / EG2DEN self.VG = self.EG2 - self.EG ** 2 @@ -170,7 +176,9 @@ def _statistic(self): return self.G @classmethod - def by_col(cls, df, cols, w=None, inplace=False, pvalue='sim', outvals=None, **stat_kws): + def by_col( + cls, df, cols, w=None, inplace=False, pvalue="sim", outvals=None, **stat_kws + ): """ Function to compute a G statistic on a dataframe @@ -203,10 +211,17 @@ def by_col(cls, df, cols, w=None, inplace=False, pvalue='sim', outvals=None, **s returns a copy of the dataframe with the relevant columns attached. """ - return _univariate_handler(df, cols, w=w, inplace=inplace, pvalue=pvalue, - outvals=outvals, stat=cls, - swapname=cls.__name__.lower(), **stat_kws) - + return _univariate_handler( + df, + cols, + w=w, + inplace=inplace, + pvalue=pvalue, + outvals=outvals, + stat=cls, + swapname=cls.__name__.lower(), + **stat_kws + ) class G_Local(object): @@ -318,9 +333,11 @@ class G_Local(object): >>> numpy.random.seed(10) Applying Getis and Ord local G* test using a binary weights object + >>> lg_star = G_Local(y,w,transform='B',star=True) Examining the results + >>> lg_star.Zs array([-1.39727626, -0.28917762, 0.65064964, -0.28917762, 1.23452088, 2.02424331]) @@ -330,9 +347,11 @@ class G_Local(object): >>> numpy.random.seed(12345) Applying Getis and Ord local G test using a row-standardized weights object + >>> lg = G_Local(y,w,transform='R') Examining the results + >>> lg.Zs array([-0.62074534, -0.01780611, 1.31558703, -0.12824171, 0.28843496, 1.77833941]) @@ -346,6 +365,7 @@ class G_Local(object): >>> lg_star = G_Local(y,w,transform='R',star=True) Examining the results + >>> lg_star.Zs array([-0.62488094, -0.09144599, 0.41150696, -0.09144599, 0.24690418, 1.28024388]) @@ -353,7 +373,8 @@ class G_Local(object): 0.101 """ - def __init__(self, y, w, transform='R', permutations=PERMUTATIONS, star=False): + + def __init__(self, y, w, transform="R", permutations=PERMUTATIONS, star=False): y = np.asarray(y).flatten() self.n = len(y) self.y = y @@ -363,8 +384,7 @@ def __init__(self, y, w, transform='R', permutations=PERMUTATIONS, star=False): self.permutations = permutations self.star = star self.calc() - self.p_norm = np.array( - [1 - stats.norm.cdf(np.abs(i)) for i in self.Zs]) + self.p_norm = np.array([1 - stats.norm.cdf(np.abs(i)) for i in self.Zs]) if permutations: self.__crand() sim = np.transpose(self.rGs) @@ -391,7 +411,7 @@ def __crand(self): ids = np.arange(self.w.n) ido = self.w.id_order wc = self.__getCardinalities() - if self.w_transform == 'r': + if self.w_transform == "r": den = np.array(wc) + self.star else: den = np.ones(self.w.n) @@ -401,14 +421,12 @@ def __crand(self): yi_star = y[i] * self.star wci = wc[i] rGs[i] = (y[idsi[rids[:, 0:wci]]]).sum(1) + yi_star - rGs[i] = (np.array(rGs[i]) / den[i]) / ( - self.y_sum - (1 - self.star) * y[i]) + rGs[i] = (np.array(rGs[i]) / den[i]) / (self.y_sum - (1 - self.star) * y[i]) self.rGs = rGs def __getCardinalities(self): ido = self.w.id_order - self.wc = np.array( - [self.w.cardinalities[ido[i]] for i in range(self.n)]) + self.wc = np.array([self.w.cardinalities[ido[i]] for i in range(self.n)]) return self.wc def calc(self): @@ -425,10 +443,10 @@ def calc(self): yl_mean = ydi / N s2 = (y2_sum - y2) / N - (yl_mean) ** 2 else: - self.w.transform = 'B' + self.w.transform = "B" yl = 1.0 * slag(self.w, y) yl += y - if self.w_transform == 'r': + if self.w_transform == "r": yl = yl / (self.__getCardinalities() + 1.0) self.Gs = yl / y_sum N = self.n @@ -436,7 +454,7 @@ def calc(self): s2 = y.var() EGs_num, VGs_num = 1.0, 1.0 - if self.w_transform == 'b': + if self.w_transform == "b": W = self.__getCardinalities() W += self.star EGs_num = W * 1.0 @@ -454,7 +472,9 @@ def _statistic(self): return self.Gs @classmethod - def by_col(cls, df, cols, w=None, inplace=False, pvalue='sim', outvals=None, **stat_kws): + def by_col( + cls, df, cols, w=None, inplace=False, pvalue="sim", outvals=None, **stat_kws + ): """ Function to compute a G_Local statistic on a dataframe @@ -489,6 +509,14 @@ def by_col(cls, df, cols, w=None, inplace=False, pvalue='sim', outvals=None, **s the relevant columns attached. """ - return _univariate_handler(df, cols, w=w, inplace=inplace, pvalue=pvalue, - outvals=outvals, stat=cls, - swapname=cls.__name__.lower(), **stat_kws) + return _univariate_handler( + df, + cols, + w=w, + inplace=inplace, + pvalue=pvalue, + outvals=outvals, + stat=cls, + swapname=cls.__name__.lower(), + **stat_kws + ) diff --git a/esda/smaup.py b/esda/smaup.py index 6ac84c6b..880432ee 100644 --- a/esda/smaup.py +++ b/esda/smaup.py @@ -44,7 +44,7 @@ class Smaup(object): critical_1 : float : critical value at 0.90 confidence level summary : string - : message with interpretation of results + : message with interpretation of results Notes ----- @@ -64,18 +64,19 @@ class Smaup(object): >>> n = len(y) >>> k = int(n/2) >>> s = Smaup(n,k,rho) - >>> s.smaup + >>> s.smaup 0.15221341690376405 - >>> s.critical_01 + >>> s.critical_01 0.38970613333333337 - >>> s.critical_05 + >>> s.critical_05 0.3557221333333333 - >>> s.critical_1 + >>> s.critical_1 0.3157950666666666 - >>> s.summary - 'Pseudo p-value > 0.10 (H0 is not rejected)' - + >>> s.summary + Pseudo p-value > 0.10 (H0 is not rejected) + SIDS example replicating OpenGeoda + >>> w = libpysal.io.open(libpysal.examples.get_path("sids2.gal")).read() >>> f = libpysal.io.open(libpysal.examples.get_path("sids2.dbf")) >>> SIDR = np.array(f.by_col("SIDR74")) @@ -84,87 +85,104 @@ class Smaup(object): >>> n = len(y) >>> k = int(n/2) >>> s = Smaup(n,k,rho) - >>> s.smaup + >>> s.smaup 0.15176796553181948 - >>> s.critical_01 + >>> s.critical_01 0.38970613333333337 - >>> s.critical_05 + >>> s.critical_05 0.3557221333333333 - >>> s.critical_1 + >>> s.critical_1 0.3157950666666666 - >>> s.summary - 'Pseudo p-value > 0.10 (H0 is not rejected)' + >>> s.summary + Pseudo p-value > 0.10 (H0 is not rejected) """ + def __init__(self, n, k, rho): - + self.n = n self.k = k self.rho = rho - #Critical values of S-maup for alpha =0.01 - CV0_01 = np.array([[np.nan,25,100,225,400,625,900], - [-0.9,0.83702,0.09218,0.23808,0.05488,0.07218,0.02621], - [-0.7,0.83676,0.16134,0.13402,0.06737,0.05486,0.02858], - [-0.5,0.83597,0.16524,0.13446,0.06616,0.06247,0.02851], - [-0.3,0.83316,0.19276,0.13396,0.0633,0.0609,0.03696], - [0,0.8237,0.17925,0.15514,0.07732,0.07988,0.09301], - [0.3,0.76472,0.23404,0.2464,0.11588,0.10715,0.0707], - [0.5,0.67337,0.28921,0.25535,0.13992,0.12975,0.09856], - [0.7,0.52155,0.47399,0.29351,0.23923,0.20321,0.1625], - [0.9,0.28599,0.28938,0.4352,0.4406,0.34437,0.55967]]) - - #Critical values of S-maup for alpha =0.05 - CV0_05 = np.array([[np.nan,25,100,225,400,625,900], - [-0.9,0.83699,0.08023,0.10962,0.04894,0.04641,0.02423], - [-0.7,0.83662,0.12492,0.08643,0.059,0.0428,0.02459], - [-0.5,0.83578,0.13796,0.08679,0.05927,0.0426,0.02658], - [-0.3,0.78849,0.16932,0.08775,0.05464,0.04787,0.03042], - [0,0.81952,0.15746,0.11126,0.06961,0.06066,0.05234], - [0.3,0.70466,0.21088,0.1536,0.09766,0.07938,0.06461], - [0.5,0.59461,0.23497,0.18244,0.11682,0.10129,0.0886], - [0.7,0.48958,0.37226,0.2228,0.2054,0.16144,0.14123], - [0.9,0.2158,0.22532,0.27122,0.29043,0.23648,0.31424]]) - - #Critical values of S-maup for alpha =0.10 - CV0_10 = np.array([[np.nan,25,100,225,400,625,900], - [-0.9,0.69331,0.06545,0.07858,0.04015,0.03374,0.02187], - [-0.7,0.79421,0.09566,0.06777,0.05058,0.03392,0.02272], - [-0.5,0.689,0.10707,0.07039,0.05151,0.03609,0.02411], - [-0.3,0.73592,0.14282,0.07076,0.04649,0.04001,0.02614], - [0,0.71632,0.13621,0.08801,0.06112,0.04937,0.03759], - [0.3,0.63718,0.18239,0.12101,0.08324,0.06347,0.05549], - [0.5,0.46548,0.17541,0.14248,0.10008,0.08137,0.07701], - [0.7,0.3472,0.28774,0.1817,0.16442,0.13395,0.12354], - [0.9,0.1764,0.18835,0.21695,0.23031,0.19435,0.22411]]) - - summary = "" - if n <25: + # Critical values of S-maup for alpha =0.01 + CV0_01 = np.array( + [ + [np.nan, 25, 100, 225, 400, 625, 900], + [-0.9, 0.83702, 0.09218, 0.23808, 0.05488, 0.07218, 0.02621], + [-0.7, 0.83676, 0.16134, 0.13402, 0.06737, 0.05486, 0.02858], + [-0.5, 0.83597, 0.16524, 0.13446, 0.06616, 0.06247, 0.02851], + [-0.3, 0.83316, 0.19276, 0.13396, 0.0633, 0.0609, 0.03696], + [0, 0.8237, 0.17925, 0.15514, 0.07732, 0.07988, 0.09301], + [0.3, 0.76472, 0.23404, 0.2464, 0.11588, 0.10715, 0.0707], + [0.5, 0.67337, 0.28921, 0.25535, 0.13992, 0.12975, 0.09856], + [0.7, 0.52155, 0.47399, 0.29351, 0.23923, 0.20321, 0.1625], + [0.9, 0.28599, 0.28938, 0.4352, 0.4406, 0.34437, 0.55967], + ] + ) + + # Critical values of S-maup for alpha =0.05 + CV0_05 = np.array( + [ + [np.nan, 25, 100, 225, 400, 625, 900], + [-0.9, 0.83699, 0.08023, 0.10962, 0.04894, 0.04641, 0.02423], + [-0.7, 0.83662, 0.12492, 0.08643, 0.059, 0.0428, 0.02459], + [-0.5, 0.83578, 0.13796, 0.08679, 0.05927, 0.0426, 0.02658], + [-0.3, 0.78849, 0.16932, 0.08775, 0.05464, 0.04787, 0.03042], + [0, 0.81952, 0.15746, 0.11126, 0.06961, 0.06066, 0.05234], + [0.3, 0.70466, 0.21088, 0.1536, 0.09766, 0.07938, 0.06461], + [0.5, 0.59461, 0.23497, 0.18244, 0.11682, 0.10129, 0.0886], + [0.7, 0.48958, 0.37226, 0.2228, 0.2054, 0.16144, 0.14123], + [0.9, 0.2158, 0.22532, 0.27122, 0.29043, 0.23648, 0.31424], + ] + ) + + # Critical values of S-maup for alpha =0.10 + CV0_10 = np.array( + [ + [np.nan, 25, 100, 225, 400, 625, 900], + [-0.9, 0.69331, 0.06545, 0.07858, 0.04015, 0.03374, 0.02187], + [-0.7, 0.79421, 0.09566, 0.06777, 0.05058, 0.03392, 0.02272], + [-0.5, 0.689, 0.10707, 0.07039, 0.05151, 0.03609, 0.02411], + [-0.3, 0.73592, 0.14282, 0.07076, 0.04649, 0.04001, 0.02614], + [0, 0.71632, 0.13621, 0.08801, 0.06112, 0.04937, 0.03759], + [0.3, 0.63718, 0.18239, 0.12101, 0.08324, 0.06347, 0.05549], + [0.5, 0.46548, 0.17541, 0.14248, 0.10008, 0.08137, 0.07701], + [0.7, 0.3472, 0.28774, 0.1817, 0.16442, 0.13395, 0.12354], + [0.9, 0.1764, 0.18835, 0.21695, 0.23031, 0.19435, 0.22411], + ] + ) + + summary = "" + if n < 25: n = 25 - summary += ("Warning: Please treat this result with caution because the"\ - "computational experiment in this paper include, so far, values of n"\ - "from 25 to 900.\n") + summary += ( + "Warning: Please treat this result with caution because the" + "computational experiment in this paper include, so far, values of n" + "from 25 to 900.\n" + ) elif n > 900: n = 900 - summary += ("Warning: Please treat this result with caution because the"\ - "computational experiment in this paper include, so far, values of n"\ - "from 25 to 900.\n") + summary += ( + "Warning: Please treat this result with caution because the" + "computational experiment in this paper include, so far, values of n" + "from 25 to 900.\n" + ) - theta = float(k)/n + theta = float(k) / n b = -2.2 m = 7.03 - L = 1/(1+(np.exp(b+theta*m))) + L = 1 / (1 + (np.exp(b + theta * m))) p = np.exp(-0.6618) a = 1.3 - eta = p*(theta**(a)) + eta = p * (theta ** (a)) b0 = 5.32 b1 = -5.53 - tau = (theta*b1)+b0 - smaup = L/(1+eta*(np.exp(rho*tau))) + tau = (theta * b1) + b0 + smaup = L / (1 + eta * (np.exp(rho * tau))) self.smaup = smaup - if 0.8 < rho < 1.0 : - r = 0.9 + if 0.8 < rho < 1.0: + r = 0.9 elif 0.6 < rho < 0.8: r = 0.7 elif 0.4 < rho < 0.6: @@ -182,12 +200,9 @@ def __init__(self, n, k, rho): else: r = -0.9 - crit_val0_01 = interp1d(CV0_01[0,1:], - CV0_01[CV0_01[:,0] == r,1:])(n)[0] - crit_val0_05 = interp1d(CV0_05[0,1:], - CV0_05[CV0_05[:,0] == r,1:])(n)[0] - crit_val0_10 = interp1d(CV0_10[0,1:], - CV0_10[CV0_10[:,0] == r,1:])(n)[0] + crit_val0_01 = interp1d(CV0_01[0, 1:], CV0_01[CV0_01[:, 0] == r, 1:])(n)[0] + crit_val0_05 = interp1d(CV0_05[0, 1:], CV0_05[CV0_05[:, 0] == r, 1:])(n)[0] + crit_val0_10 = interp1d(CV0_10[0, 1:], CV0_10[CV0_10[:, 0] == r, 1:])(n)[0] self.critical_01 = crit_val0_01 self.critical_05 = crit_val0_05 self.critical_1 = crit_val0_10 @@ -201,4 +216,3 @@ def __init__(self, n, k, rho): else: summary += "Pseudo p-value > 0.10 (H0 is not rejected)" self.summary = summary -