From 312fae518b75ee9b7b1c0d26e1e5d9046a9cd705 Mon Sep 17 00:00:00 2001 From: j1c Date: Tue, 17 Oct 2023 03:01:05 +0000 Subject: [PATCH] deploy: ebc6f4efb148b3aa32050ce7f026683fce0a2276 --- _sources/data.ipynb | 115 ++++++++++++++++++++++++++++---- _sources/figure1.ipynb | 146 +++++++++++++++++++++++++++++------------ data.html | 77 +++++++++++++++++++--- figure1.html | 113 ++++++++++++++++++++++--------- searchindex.js | 2 +- 5 files changed, 357 insertions(+), 96 deletions(-) diff --git a/_sources/data.ipynb b/_sources/data.ipynb index 471b7d3..a58579a 100644 --- a/_sources/data.ipynb +++ b/_sources/data.ipynb @@ -40,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 20, "id": "03d96d14-c068-471b-b7c1-80e17621f73e", "metadata": {}, "outputs": [], @@ -50,12 +50,15 @@ "n = 200\n", "reps = 300\n", "\n", + "phi = 0.5\n", + "sigma = 1 - (phi**2)\n", + "\n", "np.random.seed(1)\n", "\n", - "datas = [ts_sim(\"indep_ar\", n) for _ in range(reps)]\n", + "datas = [ts_sim(\"indep_ar\", n, phi=phi, sigma=sigma) for _ in range(reps)]\n", "\n", - "X = np.hstack([data[0] for data in datas])\n", - "Y = np.hstack([data[1] for data in datas])\n", + "X = np.stack([data[0] for data in datas])\n", + "Y = np.stack([data[1] for data in datas])\n", "\n", "savedict = {\n", " 'X' : X,\n", @@ -76,24 +79,25 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "id": "d6a02659-109a-451f-9d06-773e0726e9b9", "metadata": {}, "outputs": [], "source": [ "fname = \"2-independent_ar_phi\"\n", "\n", - "n = 1200\n", + "n = 300\n", "reps = 300\n", "phis = np.arange(0.2, 1, 0.025)\n", + "sigmas = 1 - (phis**2)\n", "\n", "np.random.seed(1)\n", "\n", "Xs = []\n", "Ys = []\n", "\n", - "for phi in phis:\n", - " datas = [ts_sim(\"indep_ar\", n, phi=float(phi)) for _ in range(reps)]\n", + "for (phi, sigma) in zip(phis, sigmas):\n", + " datas = [ts_sim(\"indep_ar\", n, phi=float(phi), sigma=float(sigma)) for _ in range(reps)]\n", " Xs.append(np.hstack([data[0] for data in datas]))\n", " Ys.append(np.hstack([data[1] for data in datas]))\n", "\n", @@ -108,7 +112,7 @@ "}\n", "\n", "# save to disk\n", - "sp.io.savemat(f'{p}{fname}.mat', savedict, do_compression=True)" + "# sp.io.savemat(f'{p}{fname}.mat', savedict, do_compression=True)" ] }, { @@ -220,15 +224,98 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "id": "98cf8254-3a4e-4421-a272-5a0eb81d2c37", "metadata": {}, + "source": [ + "Generate Experiment 6 - Independent Vector AR(1) with increasing sample size\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "2899b2f6", + "metadata": {}, "outputs": [], "source": [ - "# Generate Experiment 6 - optimal lag estimation\n", - "\n" + "def indep_var(n, d, phi=0.5, seed=None):\n", + " \"\"\"\n", + " d : corresponds to dimension of the time series\n", + " \"\"\"\n", + " rng = rng = np.random.default_rng(seed)\n", + " coeff = np.eye(d*2) * phi\n", + " covar = np.eye(d*2) * (1 - (phi ** 2))\n", + " errors = np.random.multivariate_normal(np.zeros(d * 2), covar, n)\n", + "\n", + " Y = np.zeros((n, d * 2))\n", + " Y[0] = 0\n", + "\n", + " for t in range(1, n):\n", + " Y[t] = np.dot(coeff, Y[t - 1]) + errors[t]\n", + "\n", + " series1 = Y[:, :d]\n", + " series2 = Y[:, d:]\n", + "\n", + " return series1, series2" ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "5f28ebc4", + "metadata": {}, + "outputs": [], + "source": [ + "fname = \"6-independent_var_n\"\n", + "\n", + "n = 200\n", + "d = 100\n", + "reps = 300\n", + "\n", + "phi = 0.5\n", + "\n", + "datas = [indep_var(n, d, phi, seed=1) for _ in range(reps)]\n", + "\n", + "X = np.stack([data[0] for data in datas])\n", + "Y = np.stack([data[1] for data in datas])\n", + "\n", + "savedict = {\n", + " 'X' : X,\n", + " 'Y' : Y,\n", + "}\n", + "\n", + "# save to disk\n", + "sp.io.savemat(f'{p}{fname}.mat', savedict, do_compression=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "b8583276", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(300, 200, 100)" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4b0ac59d", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -247,7 +334,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.10.13" } }, "nbformat": 4, diff --git a/_sources/figure1.ipynb b/_sources/figure1.ipynb index 3a2288d..87c54d5 100644 --- a/_sources/figure1.ipynb +++ b/_sources/figure1.ipynb @@ -5,40 +5,38 @@ "id": "fd860b83-29b8-4689-ba75-79b11670a5dc", "metadata": {}, "source": [ - "# Comparing the False Positive Rate (Type I) Under the Null \n", + "# Comparing the False Positive Rate (Type I) Under the Null\n", "\n", "The purpose of this simulation is to confirm the validity of the test, and thus, we expect the testing power to be close to the significance level $\\alpha$. Here we use the independent AR(1) processes, and the sampling process is:\n", "\n", "\\begin{equation}\n", - " \\begin{bmatrix}\n", - " X_t\\\\\n", - " Y_t\n", - " \\end{bmatrix}\n", - " =\n", - " \\begin{bmatrix}\n", - " \\phi & 0\\\\\n", - " 0 & \\phi\n", - " \\end{bmatrix}\n", - " \\begin{bmatrix}\n", - " X_{t-1}\\\\\n", - " Y_{t-1}\n", - " \\end{bmatrix}\n", - " +\n", - " \\begin{bmatrix}\n", - " \\epsilon_t\\\\\n", - " \\eta_t\n", - " \\end{bmatrix},\n", - " \\label{eqn:nodep}\n", + "\\begin{bmatrix}\n", + "X_t\\\\\n", + "Y_t\n", + "\\end{bmatrix}\n", + "=\n", + "\\begin{bmatrix}\n", + "\\phi & 0\\\\\n", + "0 & \\phi\n", + "\\end{bmatrix}\n", + "\\begin{bmatrix}\n", + "X_{t-1}\\\\\n", + "Y_{t-1}\n", + "\\end{bmatrix} +\n", + "\\begin{bmatrix}\n", + "\\epsilon_t\\\\\n", + "\\eta_t\n", + "\\end{bmatrix},\n", "\\end{equation}\n", "\n", - "where $(\\epsilon_t,\\eta_t)$ is the noise generated by standard normal. For first experiment, we vary the length of time series from $n\\in \\{10, 20, 30, \\ldots, 200\\}$ with $\\phi=0.5$. For second experiment, we vary the AR coefficient $\\phi\\in\\{0.2, 0.25,\\ldots, 0.95\\}$ with $n=1200$. We use 1000 permutation per replication with 300 replications in total.\n", + "where $(\\epsilon_t,\\eta_t)$ is the noise generated by standard normal. For first experiment, we vary the length of time series from $n\\in \\{10, 20, 30, \\ldots, 200\\}$ with $\\phi=0.5$. For second experiment, we vary the AR coefficient $\\phi\\in\\{0.2, 0.25,\\ldots, 0.95\\}$ with $n=1200$. We use 1000 permutation per replication with 300 replications in total.\n", "\n", - "See here for wildHSIC and shiftHSIC computation done in matlab, and see here for data generation notebook." + "See here for wildHSIC and shiftHSIC computation done in matlab, and see here for data generation notebook.\n" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 16, "id": "86756329-8d2b-4794-998c-c7cc3174d8d9", "metadata": { "tags": [ @@ -47,47 +45,113 @@ }, "outputs": [], "source": [ - "import matplotlib.pyplot as plt" + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import scipy as sp\n", + "from pathlib import Path\n", + "from hyppo.time_series import DcorrX, MGCX, LjungBox\n", + "from joblib import Parallel, delayed" ] }, { "cell_type": "code", - "execution_count": 6, - "id": "0f8f0546-9cb7-4b0c-9cf4-3c4043cfada2", + "execution_count": 12, + "id": "4d04d001-cea3-42d4-9085-267074f5932a", "metadata": {}, "outputs": [], "source": [ - "import scipy as sp\n", - "from pathlib import Path\n", + "data = sp.io.loadmat(\"./data/1-independent_ar_n.mat\")\n", "\n", - "from hyppo.time_series import DcorrX, MGCX\n", + "X = data[\"X\"]\n", + "Y = data[\"Y\"]\n", "\n", - "from joblib import Parallel, delayed" + "n_reps = X.shape[0]\n", + "\n", + "ns = list(range(10, 201, 10))" ] }, { "cell_type": "code", - "execution_count": 5, - "id": "4d04d001-cea3-42d4-9085-267074f5932a", + "execution_count": 21, + "id": "c7fbd536-8461-4e76-8b09-b7a6b2144b34", "metadata": {}, "outputs": [], "source": [ - "data = sp.io.loadmat(\"./data/1-independent_ar_n.mat\")\n", + "def worker(X, Y, test, reps=1000):\n", + " n, d = X.shape\n", "\n", - "X = data['X']\n", - "Y = data['Y']" + " res = test.test(X, Y, reps=reps)\n", + " return n, d, res[1] # pvalue" ] }, { "cell_type": "code", - "execution_count": null, - "id": "c7fbd536-8461-4e76-8b09-b7a6b2144b34", + "execution_count": 22, + "id": "dbb6d66d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=-2)]: Using backend LokyBackend with 11 concurrent workers.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=-2)]: Done 2 out of 6 | elapsed: 1.7s remaining: 3.4s\n", + "[Parallel(n_jobs=-2)]: Done 3 out of 6 | elapsed: 1.7s remaining: 1.7s\n", + "[Parallel(n_jobs=-2)]: Done 4 out of 6 | elapsed: 1.7s remaining: 0.9s\n", + "[Parallel(n_jobs=-2)]: Done 6 out of 6 | elapsed: 1.7s remaining: 0.0s\n", + "[Parallel(n_jobs=-2)]: Done 6 out of 6 | elapsed: 1.7s finished\n", + "[Parallel(n_jobs=-2)]: Using backend LokyBackend with 11 concurrent workers.\n", + "[Parallel(n_jobs=-2)]: Done 2 out of 6 | elapsed: 20.7s remaining: 41.4s\n", + "[Parallel(n_jobs=-2)]: Done 3 out of 6 | elapsed: 20.8s remaining: 20.8s\n", + "[Parallel(n_jobs=-2)]: Done 4 out of 6 | elapsed: 20.8s remaining: 10.4s\n", + "[Parallel(n_jobs=-2)]: Done 6 out of 6 | elapsed: 20.9s remaining: 0.0s\n", + "[Parallel(n_jobs=-2)]: Done 6 out of 6 | elapsed: 20.9s finished\n", + "[Parallel(n_jobs=-2)]: Using backend LokyBackend with 11 concurrent workers.\n", + "[Parallel(n_jobs=-2)]: Done 2 out of 6 | elapsed: 8.4s remaining: 16.9s\n", + "[Parallel(n_jobs=-2)]: Done 3 out of 6 | elapsed: 8.5s remaining: 8.5s\n", + "[Parallel(n_jobs=-2)]: Done 4 out of 6 | elapsed: 10.6s remaining: 5.3s\n", + "[Parallel(n_jobs=-2)]: Done 6 out of 6 | elapsed: 11.1s remaining: 0.0s\n", + "[Parallel(n_jobs=-2)]: Done 6 out of 6 | elapsed: 11.1s finished\n" + ] + } + ], "source": [ - "def worker():\n", - " pass" + "test_dict = {\n", + " \"LjungBox\": LjungBox,\n", + " \"DcorrX\": DcorrX,\n", + " \"MGCX\": MGCX\n", + "}\n", + "\n", + "dfs = []\n", + "\n", + "for test_name, test in test_dict.items():\n", + " # if test_name == \"LjungBox\":\n", + " # auto=True\n", + " # else:\n", + " # auto=False\n", + " results = Parallel(-2, verbose=1)(delayed(worker)(X[i, :n, :], Y[i, :n, :], test(max_lag=1)) for n in [10, 20] for i in range(3))\n", + "\n", + " df = pd.DataFrame(results, columns=[\"n\", \"d\", 'pval'])\n", + " df['test'] = test_name\n", + " dfs.append(df)\n", + "\n", + "df = pd.concat(dfs, axis=0, ignore_index=True)\n", + "df.to_csv(\"./outs/indep_ar_n.csv\", index=False)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "60cbfb7e", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -106,7 +170,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.10.13" } }, "nbformat": 4, diff --git a/data.html b/data.html index b4b17e6..eb9ccbc 100644 --- a/data.html +++ b/data.html @@ -411,12 +411,15 @@

Generate Experiment 1 - Independent AR(1) with increasing sample size.n = 200 reps = 300 +phi = 0.5 +sigma = 1 - (phi**2) + np.random.seed(1) -datas = [ts_sim("indep_ar", n) for _ in range(reps)] +datas = [ts_sim("indep_ar", n, phi=phi, sigma=sigma) for _ in range(reps)] -X = np.hstack([data[0] for data in datas]) -Y = np.hstack([data[1] for data in datas]) +X = np.stack([data[0] for data in datas]) +Y = np.stack([data[1] for data in datas]) savedict = { 'X' : X, @@ -436,17 +439,18 @@

Generate Experiment 2 - Independent AR(1) with increasing phi. @@ -550,9 +554,66 @@

Generate Experiment 5 - Non-linearly cross correlated AR(1) with increasing +

Generate Experiment 6 - Independent Vector AR(1) with increasing sample size

-
# Generate Experiment 6 - optimal lag estimation
+
def indep_var(n, d, phi=0.5, seed=None):
+    """
+    d : corresponds to dimension of the time series
+    """
+    rng = rng = np.random.default_rng(seed)
+    coeff = np.eye(d*2) * phi
+    covar = np.eye(d*2) * (1 - (phi ** 2))
+    errors = np.random.multivariate_normal(np.zeros(d * 2), covar, n)
+
+    Y = np.zeros((n, d * 2))
+    Y[0] = 0
+
+    for t in range(1, n):
+        Y[t] = np.dot(coeff, Y[t - 1]) + errors[t]
+
+    series1 = Y[:, :d]
+    series2 = Y[:, d:]
+
+    return series1, series2
+
+
+
+
+
+
+
fname = "6-independent_var_n"
+
+n = 200
+d = 100
+reps = 300
+
+phi = 0.5
+
+datas = [indep_var(n, d, phi, seed=1) for _ in range(reps)]
+
+X = np.stack([data[0] for data in datas])
+Y = np.stack([data[1] for data in datas])
+
+savedict = {
+    'X' : X,
+    'Y' : Y,
+}
+
+# save to disk
+sp.io.savemat(f'{p}{fname}.mat', savedict, do_compression=True)
+
+
+
+
+
+
+
X.shape
+
+
+
+
+
(300, 200, 100)
 
diff --git a/figure1.html b/figure1.html index 8bad640..688cc2c 100644 --- a/figure1.html +++ b/figure1.html @@ -371,29 +371,27 @@

Comparing the False Positive Rate (Type I) Under the Null

Comparing the False Positive Rate (Type I) Under the Null#

The purpose of this simulation is to confirm the validity of the test, and thus, we expect the testing power to be close to the significance level \(\alpha\). Here we use the independent AR(1) processes, and the sampling process is:

-
-(1)#\[\begin{equation} - \begin{bmatrix} - X_t\\ - Y_t - \end{bmatrix} - = - \begin{bmatrix} - \phi & 0\\ - 0 & \phi - \end{bmatrix} - \begin{bmatrix} - X_{t-1}\\ - Y_{t-1} - \end{bmatrix} - + - \begin{bmatrix} - \epsilon_t\\ - \eta_t - \end{bmatrix}, - \label{eqn:nodep} +
+(1)#\[\begin{equation} +\begin{bmatrix} +X_t\\ +Y_t +\end{bmatrix} += +\begin{bmatrix} +\phi & 0\\ +0 & \phi +\end{bmatrix} +\begin{bmatrix} +X_{t-1}\\ +Y_{t-1} +\end{bmatrix} + +\begin{bmatrix} +\epsilon_t\\ +\eta_t +\end{bmatrix}, \end{equation}\]
-

where \((\epsilon_t,\eta_t)\) is the noise generated by standard normal. For first experiment, we vary the length of time series from \(n\in \{10, 20, 30, \ldots, 200\}\) with \(\phi=0.5\). For second experiment, we vary the AR coefficient \(\phi\in\{0.2, 0.25,\ldots, 0.95\}\) with \(n=1200\). We use 1000 permutation per replication with 300 replications in total.

+

where \((\epsilon_t,\eta_t)\) is the noise generated by standard normal. For first experiment, we vary the length of time series from \(n\in \{10, 20, 30, \ldots, 200\}\) with \(\phi=0.5\). For second experiment, we vary the AR coefficient \(\phi\in\{0.2, 0.25,\ldots, 0.95\}\) with \(n=1200\). We use 1000 permutation per replication with 300 replications in total.

See here for wildHSIC and shiftHSIC computation done in matlab, and see here for data generation notebook.

@@ -402,7 +400,12 @@

Comparing the False Positive Rate (Type I) Under the NullHide code cell content
-
import matplotlib.pyplot as plt
+
import pandas as pd
+import matplotlib.pyplot as plt
+import scipy as sp
+from pathlib import Path
+from hyppo.time_series import DcorrX, MGCX, LjungBox
+from joblib import Parallel, delayed
 
@@ -410,30 +413,76 @@

Comparing the False Positive Rate (Type I) Under the Null
-
import scipy as sp
-from pathlib import Path
+
data = sp.io.loadmat("./data/1-independent_ar_n.mat")
 
-from hyppo.time_series import DcorrX, MGCX
+X = data["X"]
+Y = data["Y"]
 
-from joblib import Parallel, delayed
+n_reps = X.shape[0]
+
+ns = list(range(10, 201, 10))
 
-
data = sp.io.loadmat("./data/1-independent_ar_n.mat")
+
def worker(X, Y, test, reps=1000):
+    n, d = X.shape
 
-X = data['X']
-Y = data['Y']
+    res = test.test(X, Y, reps=reps)
+    return n, d, res[1] # pvalue
 
-
def worker():
-    pass
+
test_dict = {
+    "LjungBox": LjungBox,
+    "DcorrX": DcorrX,
+    "MGCX": MGCX
+}
+
+dfs = []
+
+for test_name, test in test_dict.items():
+    # if test_name == "LjungBox":
+    #     auto=True
+    # else:
+    #     auto=False
+    results = Parallel(-2, verbose=1)(delayed(worker)(X[i, :n, :], Y[i, :n, :], test(max_lag=1)) for n in [10, 20] for i in range(3))
+
+    df = pd.DataFrame(results, columns=["n", "d", 'pval'])
+    df['test'] = test_name
+    dfs.append(df)
+
+df = pd.concat(dfs, axis=0, ignore_index=True)
+df.to_csv("./outs/indep_ar_n.csv", index=False)
+
+
+
+
+
[Parallel(n_jobs=-2)]: Using backend LokyBackend with 11 concurrent workers.
+
+
+
[Parallel(n_jobs=-2)]: Done   2 out of   6 | elapsed:    1.7s remaining:    3.4s
+[Parallel(n_jobs=-2)]: Done   3 out of   6 | elapsed:    1.7s remaining:    1.7s
+[Parallel(n_jobs=-2)]: Done   4 out of   6 | elapsed:    1.7s remaining:    0.9s
+[Parallel(n_jobs=-2)]: Done   6 out of   6 | elapsed:    1.7s remaining:    0.0s
+[Parallel(n_jobs=-2)]: Done   6 out of   6 | elapsed:    1.7s finished
+[Parallel(n_jobs=-2)]: Using backend LokyBackend with 11 concurrent workers.
+[Parallel(n_jobs=-2)]: Done   2 out of   6 | elapsed:   20.7s remaining:   41.4s
+[Parallel(n_jobs=-2)]: Done   3 out of   6 | elapsed:   20.8s remaining:   20.8s
+[Parallel(n_jobs=-2)]: Done   4 out of   6 | elapsed:   20.8s remaining:   10.4s
+[Parallel(n_jobs=-2)]: Done   6 out of   6 | elapsed:   20.9s remaining:    0.0s
+[Parallel(n_jobs=-2)]: Done   6 out of   6 | elapsed:   20.9s finished
+[Parallel(n_jobs=-2)]: Using backend LokyBackend with 11 concurrent workers.
+[Parallel(n_jobs=-2)]: Done   2 out of   6 | elapsed:    8.4s remaining:   16.9s
+[Parallel(n_jobs=-2)]: Done   3 out of   6 | elapsed:    8.5s remaining:    8.5s
+[Parallel(n_jobs=-2)]: Done   4 out of   6 | elapsed:   10.6s remaining:    5.3s
+[Parallel(n_jobs=-2)]: Done   6 out of   6 | elapsed:   11.1s remaining:    0.0s
+[Parallel(n_jobs=-2)]: Done   6 out of   6 | elapsed:   11.1s finished
 
diff --git a/searchindex.js b/searchindex.js index 8e161ad..c5aa4d0 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["coverpage", "data", "figure1"], "filenames": ["coverpage.md", "data.ipynb", "figure1.ipynb"], "titles": ["Abstract", "Generating data for analysis", "Comparing the False Positive Rate (Type I) Under the Null"], "terms": {"complex": 0, "data": [0, 2], "structur": 0, "time": [0, 2], "seri": [0, 2], "ar": [0, 2], "increasingli": 0, "more": 0, "preval": 0, "modern": 0, "scienc": 0, "problem": 0, "A": 0, "fundament": 0, "question": 0, "whether": 0, "two": 0, "have": 0, "statist": 0, "signific": [0, 2], "relationship": 0, "mani": 0, "current": 0, "approach": 0, "reli": 0, "make": 0, "parametr": 0, "assumpt": 0, "random": [0, 1], "process": [0, 2], "detect": 0, "onli": 0, "linear": 0, "associ": 0, "requir": 0, "multipl": 0, "test": [0, 2], "sacrif": 0, "power": [0, 2], "high": 0, "dimension": 0, "nonlinear": 0, "set": 0, "The": [0, 2], "distribut": 0, "ani": 0, "under": 0, "null": 0, "hypothesi": 0, "challeng": 0, "estim": [0, 1], "permut": [0, 2], "typic": 0, "invalid": 0, "thi": [0, 2], "studi": 0, "combin": 0, "distanc": 0, "correl": 0, "dcorr": 0, "multiscal": 0, "graph": 0, "mgc": 0, "from": [0, 1, 2], "independ": [0, 2], "literatur": 0, "block": 0, "analysi": 0, "address": 0, "propos": 0, "nonparametr": 0, "procedur": 0, "asymptot": 0, "valid": [0, 2], "consist": 0, "depend": 0, "stationari": 0, "abl": 0, "optim": [0, 1], "lag": [0, 1], "maxim": 0, "It": 0, "elimin": 0, "need": 0, "exhibit": 0, "superior": 0, "low": 0, "sampl": [0, 2], "size": 0, "neural": 0, "connect": 0, "fmri": 0, "reveal": 0, "signal": 0, "within": 0, "visual": 0, "network": 0, "default": 0, "mode": 0, "other": 0, "work": 0, "provid": 0, "primari": 0, "tool": [0, 1], "open": 0, "sourc": 0, "code": 0, "impact": 0, "wide": 0, "rang": [0, 1], "scientif": 0, "disciplin": 0, "import": [1, 2], "numpi": 1, "np": 1, "scipi": [1, 2], "sp": [1, 2], "pathlib": [1, 2], "path": [1, 2], "hyppo": [1, 2], "ts_sim": 1, "indep_ar": 1, "cross_corr_ar": 1, "nonlinear_process": 1, "extinct_gaussian_process": 1, "p": 1, "fname": 1, "independent_ar_n": [1, 2], "n": [1, 2], "200": [1, 2], "rep": 1, "300": [1, 2], "seed": 1, "_": 1, "x": [1, 2], "hstack": 1, "0": [1, 2], "y": [1, 2], "savedict": 1, "save": 1, "disk": 1, "io": [1, 2], "savemat": 1, "f": 1, "mat": [1, 2], "do_compress": 1, "true": 1, "independent_ar_phi": 1, "1200": [1, 2], "arang": 1, "025": 1, "xs": 1, "ys": 1, "float": 1, "append": 1, "stack": 1, "linear_ar": 1, "nonlinear_ar": 1, "extinct_gaussian": 1, "6": 1, "purpos": 2, "simul": 2, "confirm": 2, "thu": 2, "we": 2, "expect": 2, "close": 2, "level": 2, "alpha": 2, "here": 2, "us": 2, "1": 2, "begin": 2, "equat": 2, "bmatrix": 2, "x_t": 2, "y_t": 2, "end": 2, "phi": 2, "x_": 2, "t": 2, "y_": 2, "epsilon_t": 2, "eta_t": 2, "label": 2, "eqn": 2, "nodep": 2, "where": 2, "nois": 2, "gener": 2, "standard": 2, "normal": 2, "For": 2, "first": 2, "experi": 2, "vari": 2, "length": 2, "10": 2, "20": 2, "30": 2, "ldot": 2, "5": 2, "second": 2, "coeffici": 2, "2": 2, "25": 2, "95": 2, "1000": 2, "per": 2, "replic": 2, "total": 2, "see": 2, "wildhsic": 2, "shifthsic": 2, "comput": 2, "done": 2, "matlab": 2, "notebook": 2, "matplotlib": 2, "pyplot": 2, "plt": 2, "time_seri": 2, "dcorrx": 2, "mgcx": 2, "joblib": 2, "parallel": 2, "delai": 2, "loadmat": 2, "def": 2, "worker": 2, "pass": 2}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"abstract": 0, "gener": 1, "data": 1, "analysi": 1, "experi": 1, "1": 1, "independ": 1, "ar": 1, "increas": 1, "sampl": 1, "size": 1, "2": 1, "phi": 1, "3": 1, "linear": 1, "cross": 1, "correl": 1, "4": 1, "non": 1, "linearli": 1, "5": 1, "compar": 2, "fals": 2, "posit": 2, "rate": 2, "type": 2, "i": 2, "under": 2, "null": 2}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinxcontrib.bibtex": 9, "sphinx": 56}}) \ No newline at end of file +Search.setIndex({"docnames": ["coverpage", "data", "figure1"], "filenames": ["coverpage.md", "data.ipynb", "figure1.ipynb"], "titles": ["Abstract", "Generating data for analysis", "Comparing the False Positive Rate (Type I) Under the Null"], "terms": {"complex": 0, "data": [0, 2], "structur": 0, "time": [0, 1, 2], "seri": [0, 1, 2], "ar": [0, 2], "increasingli": 0, "more": 0, "preval": 0, "modern": 0, "scienc": 0, "problem": 0, "A": 0, "fundament": 0, "question": 0, "whether": 0, "two": 0, "have": 0, "statist": 0, "signific": [0, 2], "relationship": 0, "mani": 0, "current": 0, "approach": 0, "reli": 0, "make": 0, "parametr": 0, "assumpt": 0, "random": [0, 1], "process": [0, 2], "detect": 0, "onli": 0, "linear": 0, "associ": 0, "requir": 0, "multipl": 0, "test": [0, 2], "sacrif": 0, "power": [0, 2], "high": 0, "dimension": 0, "nonlinear": 0, "set": 0, "The": [0, 2], "distribut": 0, "ani": 0, "under": 0, "null": 0, "hypothesi": 0, "challeng": 0, "estim": 0, "permut": [0, 2], "typic": 0, "invalid": 0, "thi": [0, 2], "studi": 0, "combin": 0, "distanc": 0, "correl": 0, "dcorr": 0, "multiscal": 0, "graph": 0, "mgc": 0, "from": [0, 1, 2], "independ": [0, 2], "literatur": 0, "block": 0, "analysi": 0, "address": 0, "propos": 0, "nonparametr": 0, "procedur": 0, "asymptot": 0, "valid": [0, 2], "consist": 0, "depend": 0, "stationari": 0, "abl": 0, "optim": 0, "lag": 0, "maxim": 0, "It": 0, "elimin": 0, "need": 0, "exhibit": 0, "superior": 0, "low": 0, "sampl": [0, 2], "size": 0, "neural": 0, "connect": 0, "fmri": 0, "reveal": 0, "signal": 0, "within": 0, "visual": 0, "network": 0, "default": 0, "mode": 0, "other": 0, "work": 0, "provid": 0, "primari": 0, "tool": [0, 1], "open": 0, "sourc": 0, "code": 0, "impact": 0, "wide": 0, "rang": [0, 1, 2], "scientif": 0, "disciplin": 0, "import": [1, 2], "numpi": 1, "np": 1, "scipi": [1, 2], "sp": [1, 2], "pathlib": [1, 2], "path": [1, 2], "hyppo": [1, 2], "ts_sim": 1, "indep_ar": 1, "cross_corr_ar": 1, "nonlinear_process": 1, "extinct_gaussian_process": 1, "p": 1, "fname": 1, "independent_ar_n": [1, 2], "n": [1, 2], "200": [1, 2], "rep": [1, 2], "300": [1, 2], "0": [1, 2], "sigma": 1, "seed": 1, "_": 1, "x": [1, 2], "stack": 1, "y": [1, 2], "savedict": 1, "save": 1, "disk": 1, "io": [1, 2], "savemat": 1, "f": 1, "mat": [1, 2], "do_compress": 1, "true": [1, 2], "independent_ar_phi": 1, "arang": 1, "025": 1, "xs": 1, "ys": 1, "zip": 1, "float": 1, "append": [1, 2], "hstack": 1, "linear_ar": 1, "nonlinear_ar": 1, "extinct_gaussian": 1, "6": [1, 2], "vector": 1, "def": [1, 2], "indep_var": 1, "d": [1, 2], "none": 1, "correspond": 1, "dimens": 1, "rng": 1, "default_rng": 1, "coeff": 1, "ey": 1, "covar": 1, "error": 1, "multivariate_norm": 1, "zero": 1, "t": [1, 2], "dot": 1, "series1": 1, "series2": 1, "return": [1, 2], "independent_var_n": 1, "100": 1, "shape": [1, 2], "purpos": 2, "simul": 2, "confirm": 2, "thu": 2, "we": 2, "expect": 2, "close": 2, "level": 2, "alpha": 2, "here": 2, "us": 2, "1": 2, "begin": 2, "equat": 2, "bmatrix": 2, "x_t": 2, "y_t": 2, "end": 2, "phi": 2, "x_": 2, "y_": 2, "epsilon_t": 2, "eta_t": 2, "where": 2, "nois": 2, "gener": 2, "standard": 2, "normal": 2, "For": 2, "first": 2, "experi": 2, "vari": 2, "length": 2, "10": 2, "20": 2, "30": 2, "ldot": 2, "5": 2, "second": 2, "coeffici": 2, "2": 2, "25": 2, "95": 2, "1200": 2, "1000": 2, "per": 2, "replic": 2, "total": 2, "see": 2, "wildhsic": 2, "shifthsic": 2, "comput": 2, "done": 2, "matlab": 2, "notebook": 2, "panda": 2, "pd": 2, "matplotlib": 2, "pyplot": 2, "plt": 2, "time_seri": 2, "dcorrx": 2, "mgcx": 2, "ljungbox": 2, "joblib": 2, "parallel": 2, "delai": 2, "loadmat": 2, "n_rep": 2, "ns": 2, "list": 2, "201": 2, "worker": 2, "re": 2, "pvalu": 2, "test_dict": 2, "df": 2, "test_nam": 2, "item": 2, "auto": 2, "els": 2, "result": 2, "verbos": 2, "max_lag": 2, "3": 2, "datafram": 2, "column": 2, "pval": 2, "concat": 2, "axi": 2, "ignore_index": 2, "to_csv": 2, "out": 2, "indep_ar_n": 2, "csv": 2, "index": 2, "n_job": 2, "backend": 2, "lokybackend": 2, "11": 2, "concurr": 2, "elaps": 2, "7s": 2, "remain": 2, "4s": 2, "4": 2, "9s": 2, "0s": 2, "finish": 2, "41": 2, "8s": 2, "8": 2, "16": 2, "5s": 2, "6s": 2, "3s": 2, "1s": 2}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"abstract": 0, "gener": 1, "data": 1, "analysi": 1, "experi": 1, "1": 1, "independ": 1, "ar": 1, "increas": 1, "sampl": 1, "size": 1, "2": 1, "phi": 1, "3": 1, "linear": 1, "cross": 1, "correl": 1, "4": 1, "non": 1, "linearli": 1, "5": 1, "compar": 2, "fals": 2, "posit": 2, "rate": 2, "type": 2, "i": 2, "under": 2, "null": 2}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinxcontrib.bibtex": 9, "sphinx": 56}}) \ No newline at end of file