Skip to content

Commit

Permalink
deploy: ebc6f4e
Browse files Browse the repository at this point in the history
  • Loading branch information
j1c committed Oct 17, 2023
1 parent c5f4fc7 commit 312fae5
Show file tree
Hide file tree
Showing 5 changed files with 357 additions and 96 deletions.
115 changes: 101 additions & 14 deletions _sources/data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 20,
"id": "03d96d14-c068-471b-b7c1-80e17621f73e",
"metadata": {},
"outputs": [],
Expand All @@ -50,12 +50,15 @@
"n = 200\n",
"reps = 300\n",
"\n",
"phi = 0.5\n",
"sigma = 1 - (phi**2)\n",
"\n",
"np.random.seed(1)\n",
"\n",
"datas = [ts_sim(\"indep_ar\", n) for _ in range(reps)]\n",
"datas = [ts_sim(\"indep_ar\", n, phi=phi, sigma=sigma) for _ in range(reps)]\n",
"\n",
"X = np.hstack([data[0] for data in datas])\n",
"Y = np.hstack([data[1] for data in datas])\n",
"X = np.stack([data[0] for data in datas])\n",
"Y = np.stack([data[1] for data in datas])\n",
"\n",
"savedict = {\n",
" 'X' : X,\n",
Expand All @@ -76,24 +79,25 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 5,
"id": "d6a02659-109a-451f-9d06-773e0726e9b9",
"metadata": {},
"outputs": [],
"source": [
"fname = \"2-independent_ar_phi\"\n",
"\n",
"n = 1200\n",
"n = 300\n",
"reps = 300\n",
"phis = np.arange(0.2, 1, 0.025)\n",
"sigmas = 1 - (phis**2)\n",
"\n",
"np.random.seed(1)\n",
"\n",
"Xs = []\n",
"Ys = []\n",
"\n",
"for phi in phis:\n",
" datas = [ts_sim(\"indep_ar\", n, phi=float(phi)) for _ in range(reps)]\n",
"for (phi, sigma) in zip(phis, sigmas):\n",
" datas = [ts_sim(\"indep_ar\", n, phi=float(phi), sigma=float(sigma)) for _ in range(reps)]\n",
" Xs.append(np.hstack([data[0] for data in datas]))\n",
" Ys.append(np.hstack([data[1] for data in datas]))\n",
"\n",
Expand All @@ -108,7 +112,7 @@
"}\n",
"\n",
"# save to disk\n",
"sp.io.savemat(f'{p}{fname}.mat', savedict, do_compression=True)"
"# sp.io.savemat(f'{p}{fname}.mat', savedict, do_compression=True)"
]
},
{
Expand Down Expand Up @@ -220,15 +224,98 @@
]
},
{
"cell_type": "code",
"execution_count": null,
"cell_type": "markdown",
"id": "98cf8254-3a4e-4421-a272-5a0eb81d2c37",
"metadata": {},
"source": [
"Generate Experiment 6 - Independent Vector AR(1) with increasing sample size\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "2899b2f6",
"metadata": {},
"outputs": [],
"source": [
"# Generate Experiment 6 - optimal lag estimation\n",
"\n"
"def indep_var(n, d, phi=0.5, seed=None):\n",
" \"\"\"\n",
" d : corresponds to dimension of the time series\n",
" \"\"\"\n",
" rng = rng = np.random.default_rng(seed)\n",
" coeff = np.eye(d*2) * phi\n",
" covar = np.eye(d*2) * (1 - (phi ** 2))\n",
" errors = np.random.multivariate_normal(np.zeros(d * 2), covar, n)\n",
"\n",
" Y = np.zeros((n, d * 2))\n",
" Y[0] = 0\n",
"\n",
" for t in range(1, n):\n",
" Y[t] = np.dot(coeff, Y[t - 1]) + errors[t]\n",
"\n",
" series1 = Y[:, :d]\n",
" series2 = Y[:, d:]\n",
"\n",
" return series1, series2"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "5f28ebc4",
"metadata": {},
"outputs": [],
"source": [
"fname = \"6-independent_var_n\"\n",
"\n",
"n = 200\n",
"d = 100\n",
"reps = 300\n",
"\n",
"phi = 0.5\n",
"\n",
"datas = [indep_var(n, d, phi, seed=1) for _ in range(reps)]\n",
"\n",
"X = np.stack([data[0] for data in datas])\n",
"Y = np.stack([data[1] for data in datas])\n",
"\n",
"savedict = {\n",
" 'X' : X,\n",
" 'Y' : Y,\n",
"}\n",
"\n",
"# save to disk\n",
"sp.io.savemat(f'{p}{fname}.mat', savedict, do_compression=True)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "b8583276",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(300, 200, 100)"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4b0ac59d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand All @@ -247,7 +334,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
"version": "3.10.13"
}
},
"nbformat": 4,
Expand Down
146 changes: 105 additions & 41 deletions _sources/figure1.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -5,40 +5,38 @@
"id": "fd860b83-29b8-4689-ba75-79b11670a5dc",
"metadata": {},
"source": [
"# Comparing the False Positive Rate (Type I) Under the Null \n",
"# Comparing the False Positive Rate (Type I) Under the Null\n",
"\n",
"The purpose of this simulation is to confirm the validity of the test, and thus, we expect the testing power to be close to the significance level $\\alpha$. Here we use the independent AR(1) processes, and the sampling process is:\n",
"\n",
"\\begin{equation}\n",
" \\begin{bmatrix}\n",
" X_t\\\\\n",
" Y_t\n",
" \\end{bmatrix}\n",
" =\n",
" \\begin{bmatrix}\n",
" \\phi & 0\\\\\n",
" 0 & \\phi\n",
" \\end{bmatrix}\n",
" \\begin{bmatrix}\n",
" X_{t-1}\\\\\n",
" Y_{t-1}\n",
" \\end{bmatrix}\n",
" +\n",
" \\begin{bmatrix}\n",
" \\epsilon_t\\\\\n",
" \\eta_t\n",
" \\end{bmatrix},\n",
" \\label{eqn:nodep}\n",
"\\begin{bmatrix}\n",
"X_t\\\\\n",
"Y_t\n",
"\\end{bmatrix}\n",
"=\n",
"\\begin{bmatrix}\n",
"\\phi & 0\\\\\n",
"0 & \\phi\n",
"\\end{bmatrix}\n",
"\\begin{bmatrix}\n",
"X_{t-1}\\\\\n",
"Y_{t-1}\n",
"\\end{bmatrix} +\n",
"\\begin{bmatrix}\n",
"\\epsilon_t\\\\\n",
"\\eta_t\n",
"\\end{bmatrix},\n",
"\\end{equation}\n",
"\n",
"where $(\\epsilon_t,\\eta_t)$ is the noise generated by standard normal. For first experiment, we vary the length of time series from $n\\in \\{10, 20, 30, \\ldots, 200\\}$ with $\\phi=0.5$. For second experiment, we vary the AR coefficient $\\phi\\in\\{0.2, 0.25,\\ldots, 0.95\\}$ with $n=1200$. We use 1000 permutation per replication with 300 replications in total.\n",
"where $(\\epsilon_t,\\eta_t)$ is the noise generated by standard normal. For first experiment, we vary the length of time series from $n\\in \\{10, 20, 30, \\ldots, 200\\}$ with $\\phi=0.5$. For second experiment, we vary the AR coefficient $\\phi\\in\\{0.2, 0.25,\\ldots, 0.95\\}$ with $n=1200$. We use 1000 permutation per replication with 300 replications in total.\n",
"\n",
"See here for wildHSIC and shiftHSIC computation done in matlab, and see here for data generation notebook."
"See here for wildHSIC and shiftHSIC computation done in matlab, and see here for data generation notebook.\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 16,
"id": "86756329-8d2b-4794-998c-c7cc3174d8d9",
"metadata": {
"tags": [
Expand All @@ -47,47 +45,113 @@
},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt"
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import scipy as sp\n",
"from pathlib import Path\n",
"from hyppo.time_series import DcorrX, MGCX, LjungBox\n",
"from joblib import Parallel, delayed"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "0f8f0546-9cb7-4b0c-9cf4-3c4043cfada2",
"execution_count": 12,
"id": "4d04d001-cea3-42d4-9085-267074f5932a",
"metadata": {},
"outputs": [],
"source": [
"import scipy as sp\n",
"from pathlib import Path\n",
"data = sp.io.loadmat(\"./data/1-independent_ar_n.mat\")\n",
"\n",
"from hyppo.time_series import DcorrX, MGCX\n",
"X = data[\"X\"]\n",
"Y = data[\"Y\"]\n",
"\n",
"from joblib import Parallel, delayed"
"n_reps = X.shape[0]\n",
"\n",
"ns = list(range(10, 201, 10))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "4d04d001-cea3-42d4-9085-267074f5932a",
"execution_count": 21,
"id": "c7fbd536-8461-4e76-8b09-b7a6b2144b34",
"metadata": {},
"outputs": [],
"source": [
"data = sp.io.loadmat(\"./data/1-independent_ar_n.mat\")\n",
"def worker(X, Y, test, reps=1000):\n",
" n, d = X.shape\n",
"\n",
"X = data['X']\n",
"Y = data['Y']"
" res = test.test(X, Y, reps=reps)\n",
" return n, d, res[1] # pvalue"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c7fbd536-8461-4e76-8b09-b7a6b2144b34",
"execution_count": 22,
"id": "dbb6d66d",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=-2)]: Using backend LokyBackend with 11 concurrent workers.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=-2)]: Done 2 out of 6 | elapsed: 1.7s remaining: 3.4s\n",
"[Parallel(n_jobs=-2)]: Done 3 out of 6 | elapsed: 1.7s remaining: 1.7s\n",
"[Parallel(n_jobs=-2)]: Done 4 out of 6 | elapsed: 1.7s remaining: 0.9s\n",
"[Parallel(n_jobs=-2)]: Done 6 out of 6 | elapsed: 1.7s remaining: 0.0s\n",
"[Parallel(n_jobs=-2)]: Done 6 out of 6 | elapsed: 1.7s finished\n",
"[Parallel(n_jobs=-2)]: Using backend LokyBackend with 11 concurrent workers.\n",
"[Parallel(n_jobs=-2)]: Done 2 out of 6 | elapsed: 20.7s remaining: 41.4s\n",
"[Parallel(n_jobs=-2)]: Done 3 out of 6 | elapsed: 20.8s remaining: 20.8s\n",
"[Parallel(n_jobs=-2)]: Done 4 out of 6 | elapsed: 20.8s remaining: 10.4s\n",
"[Parallel(n_jobs=-2)]: Done 6 out of 6 | elapsed: 20.9s remaining: 0.0s\n",
"[Parallel(n_jobs=-2)]: Done 6 out of 6 | elapsed: 20.9s finished\n",
"[Parallel(n_jobs=-2)]: Using backend LokyBackend with 11 concurrent workers.\n",
"[Parallel(n_jobs=-2)]: Done 2 out of 6 | elapsed: 8.4s remaining: 16.9s\n",
"[Parallel(n_jobs=-2)]: Done 3 out of 6 | elapsed: 8.5s remaining: 8.5s\n",
"[Parallel(n_jobs=-2)]: Done 4 out of 6 | elapsed: 10.6s remaining: 5.3s\n",
"[Parallel(n_jobs=-2)]: Done 6 out of 6 | elapsed: 11.1s remaining: 0.0s\n",
"[Parallel(n_jobs=-2)]: Done 6 out of 6 | elapsed: 11.1s finished\n"
]
}
],
"source": [
"def worker():\n",
" pass"
"test_dict = {\n",
" \"LjungBox\": LjungBox,\n",
" \"DcorrX\": DcorrX,\n",
" \"MGCX\": MGCX\n",
"}\n",
"\n",
"dfs = []\n",
"\n",
"for test_name, test in test_dict.items():\n",
" # if test_name == \"LjungBox\":\n",
" # auto=True\n",
" # else:\n",
" # auto=False\n",
" results = Parallel(-2, verbose=1)(delayed(worker)(X[i, :n, :], Y[i, :n, :], test(max_lag=1)) for n in [10, 20] for i in range(3))\n",
"\n",
" df = pd.DataFrame(results, columns=[\"n\", \"d\", 'pval'])\n",
" df['test'] = test_name\n",
" dfs.append(df)\n",
"\n",
"df = pd.concat(dfs, axis=0, ignore_index=True)\n",
"df.to_csv(\"./outs/indep_ar_n.csv\", index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "60cbfb7e",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand All @@ -106,7 +170,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
"version": "3.10.13"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit 312fae5

Please sign in to comment.