You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
import math
from collections import defaultdict
from itertools import product
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from hyppo.conditional import ConditionalDcorr
from joblib import Parallel, delayed
from sklearn.model_selection import StratifiedShuffleSplit
from sktree.stats.utils import (
METRIC_FUNCTIONS,
POSITIVE_METRICS,
POSTERIOR_FUNCTIONS,
REGRESSOR_METRICS,
_compute_null_distribution_coleman,
_non_nan_samples,
)
seed = 12345
rng = np.random.default_rng(seed)
def _run_parallel_posterior_sim(
idx,
n_samples,
n_features,
class_probs,
seed,
n_features_2,
test_size,
max_fpr,
sim_type,
):
n_jobs = 1
results = defaultdict(list)
rng = np.random.default_rng(seed)
n_features_ends = [100, None]
if sim_type == "confounder":
npy_data = np.load(
f"/Users/adam2392/Desktop/cancer/confounder/confounder_{idx}.npz"
)
elif sim_type == "collider":
npy_data = np.load(
f"/Users/adam2392/Desktop/cancer/collider/collider_{idx}.npz"
)
elif sim_type == "mediator":
npy_data = np.load(
f"/Users/adam2392/Desktop/cancer/mediator/mediator_{idx}.npz"
)
elif sim_type == "direct-indirect":
npy_data = np.load(
f"/Users/adam2392/Desktop/cancer/direct-indirect/direct-indirect_{idx}.npz"
)
elif sim_type == "independent":
npy_data = np.load(
f"/Users/adam2392/Desktop/cancer/independent/independent_{idx}.npz"
)
X = npy_data["X"]
y = npy_data["y"]
# print(X.shape, y.shape)
X = X[:, : 100 + n_features_2]
if n_samples < X.shape[0]:
cv = StratifiedShuffleSplit(n_splits=1, train_size=n_samples)
for train_idx, _ in cv.split(X, y):
continue
X = X[train_idx, :]
y = y[train_idx, ...].squeeze()
assert len(X) == len(y)
assert len(y) == n_samples
n_features_ends[1] = X.shape[1]
posteriors_dict = dict()
# now compute the pvalue when shuffling X2
covariate_index = np.arange(n_features_ends[0], n_features_ends[1])
# estimate (conditional) mutual information using KSG
Z = X[:, covariate_index]
mask_array = np.ones(X.shape[1])
mask_array[covariate_index] = 0
mask_array = mask_array.astype(bool)
X_minus_Z = X[:, mask_array]
cdcorr = ConditionalDcorr(bandwidth="silverman")
print(Z, np.var(Z))
# print(X_minus_Z, np.var(X_minus_Z))
print(Z.shape, X_minus_Z.shape, X.shape, y.shape)
try:
cdcorr_stat, cdcorr_pvalue = cdcorr.test(
x=X_minus_Z.copy().astype(np.float64),
y=y.copy().astype(np.float64),
z=Z.copy().astype(np.float64),
)
except Exception as e:
errmsg = f"{idx, n_samples, n_features, n_features_2, np.var(Z), X_minus_Z.shape, y.shape, Z.shape}"
e.args += (errmsg,)
raise (e)
np.savez(
f"./varying-samples/{sim_type}/conddcorr_{n_samples}_{n_features_2}_{idx}.npz",
n_samples=n_samples,
n_features_2=n_features_2,
y_true=y,
cdcorr_state=cdcorr_stat,
cdcorr_pvalue=cdcorr_pvalue,
)
# results["cdcorr_pvalue_x2"].append(cdcorr_pvalue)
# results["cdcorr_stat_x2"].append(cdcorr_stat)
# results["mvrf_posteriors"].append(comight_posteriors_x2)
# results["mvrf_null_posteriors"].append(comight_null_posteriors_x2)
return results
# number of features in the first view
n_features = 10
noise_dims = 90
n_samples = 256
max_features = 0.3
n_jobs = -1
test_size = 0.2
max_fpr = 0.1
# number of features in the second view
pows = np.arange(2, 13, dtype=int)
n_features_2_list = [0] + [2**pow for pow in pows]
n_features_2_list = [2**pow for pow in pows]
print(n_features_2_list)
class_probs = [0.5, 0.5]
_results_ind = Parallel(n_jobs=-1)(
delayed(_run_parallel_posterior_sim)(
idx_,
n_samples,
n_features,
class_probs,
seed,
n_features_2_,
test_size,
max_fpr,
"independent",
)
for (idx_, n_features_2_) in product(range(n_repeats), n_features_2_list)
)
Error message
---------------------------------------------------------------------------
_RemoteTraceback Traceback (most recent call last)
_RemoteTraceback:
"""
Traceback (most recent call last):
File "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/joblib/externals/loky/process_executor.py", line 463, in _process_worker
r = call_item()
File "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/joblib/externals/loky/process_executor.py", line 291, in __call__
return self.fn(*self.args, **self.kwargs)
File "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/joblib/parallel.py", line 589, in __call__
return [func(*args, **kwargs)
File "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/joblib/parallel.py", line 589, in <listcomp>
return [func(*args, **kwargs)
File "/var/folders/6_/sl83qtkd68x3_mvfys07_6qm0000gn/T/ipykernel_73186/1504525807.py", line 79, in _run_parallel_posterior_sim
File "/var/folders/6_/sl83qtkd68x3_mvfys07_6qm0000gn/T/ipykernel_73186/1504525807.py", line 71, in _run_parallel_posterior_sim
File "/Users/adam2392/Documents/hyppo/hyppo/conditional/cdcorr.py", line 181, in test
stat, pvalue, null_dist = perm_test(
File "/Users/adam2392/Documents/hyppo/hyppo/tools/common.py", line 613, in perm_test
stat = calc_stat(x, y, z)
File "/Users/adam2392/Documents/hyppo/hyppo/conditional/cdcorr.py", line 112, in statistic
x, y, z = check_input()
File "/Users/adam2392/Documents/hyppo/hyppo/conditional/_utils.py", line 24, in __call__
self._check_variance()
File "/Users/adam2392/Documents/hyppo/hyppo/conditional/_utils.py", line 88, in _check_variance
raise ValueError(f"Test cannot be run, one of the inputs has 0 variance {np.var(self.x)}, {np.var(self.y)}, {np.var(self.z)}, {self.z}, {self.z.shape}")
ValueError: ('Test cannot be run, one of the inputs has 0 variance 3.309443477874101, 0.25, 0.0, [[5.77545356e-203 0.00000000e+000 0.00000000e+000 ... 0.00000000e+000\n 0.00000000e+000 0.00000000e+000]\n [0.00000000e+000 5.77545356e-203 0.00000000e+000 ... 0.00000000e+000\n 0.00000000e+000 0.00000000e+000]\n [0.00000000e+000 0.00000000e+000 5.77545356e-203 ... 0.00000000e+000\n 0.00000000e+000 0.00000000e+000]\n ...\n [0.00000000e+000 0.00000000e+000 0.00000000e+000 ... 5.77545356e-203\n 0.00000000e+000 0.00000000e+000]\n [0.00000000e+000 0.00000000e+000 0.00000000e+000 ... 0.00000000e+000\n 5.77545356e-203 0.00000000e+000]\n [0.00000000e+000 0.00000000e+000 0.00000000e+000 ... 0.00000000e+000\n 0.00000000e+000 5.77545356e-203]], (256, 256)', '(0, 256, 10, 512, 1.0062427978699584, (256, 100), (256,), (256, 512))')
"""
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
Cell In[34], line 1
----> 1 _results_ind = Parallel(n_jobs=-1)(
2 delayed(_run_parallel_posterior_sim)(
3 idx_,
4 n_samples,
5 n_features,
6 class_probs,
7 seed,
8 n_features_2_,
9 test_size,
10 max_fpr,
11 "independent",
12 )
13 for (idx_, n_features_2_) in product(range(n_repeats), n_features_2_list)
14 )
File ~/miniforge3/envs/sktree/lib/python3.9/site-packages/joblib/parallel.py:1952, in Parallel.__call__(self, iterable)
1946 # The first item from the output is blank, but it makes the interpreter
1947 # progress until it enters the Try/Except block of the generator and
1948 # reach the first `yield` statement. This starts the aynchronous
1949 # dispatch of the tasks to the workers.
1950 next(output)
-> 1952 return output if self.return_generator else list(output)
File ~/miniforge3/envs/sktree/lib/python3.9/site-packages/joblib/parallel.py:1595, in Parallel._get_outputs(self, iterator, pre_dispatch)
1592 yield
1594 with self._backend.retrieval_context():
-> 1595 yield from self._retrieve()
1597 except GeneratorExit:
1598 # The generator has been garbage collected before being fully
1599 # consumed. This aborts the remaining tasks if possible and warn
1600 # the user if necessary.
1601 self._exception = True
File ~/miniforge3/envs/sktree/lib/python3.9/site-packages/joblib/parallel.py:1699, in Parallel._retrieve(self)
1692 while self._wait_retrieval():
1693
1694 # If the callback thread of a worker has signaled that its task
1695 # triggered an exception, or if the retrieval loop has raised an
1696 # exception (e.g. `GeneratorExit`), exit the loop and surface the
1697 # worker traceback.
1698 if self._aborting:
-> 1699 self._raise_error_fast()
1700 break
1702 # If the next job is not ready for retrieval yet, we just wait for
1703 # async callbacks to progress.
File ~/miniforge3/envs/sktree/lib/python3.9/site-packages/joblib/parallel.py:1734, in Parallel._raise_error_fast(self)
1730 # If this error job exists, immediatly raise the error by
1731 # calling get_result. This job might not exists if abort has been
1732 # called directly or if the generator is gc'ed.
1733 if error_job is not None:
-> 1734 error_job.get_result(self.timeout)
File ~/miniforge3/envs/sktree/lib/python3.9/site-packages/joblib/parallel.py:736, in BatchCompletionCallBack.get_result(self, timeout)
730 backend = self.parallel._backend
732 if backend.supports_retrieve_callback:
733 # We assume that the result has already been retrieved by the
734 # callback thread, and is stored internally. It's just waiting to
735 # be returned.
--> 736 return self._return_or_raise()
738 # For other backends, the main thread needs to run the retrieval step.
739 try:
File ~/miniforge3/envs/sktree/lib/python3.9/site-packages/joblib/parallel.py:754, in BatchCompletionCallBack._return_or_raise(self)
752 try:
753 if self.status == TASK_ERROR:
--> 754 raise self._result
755 return self._result
756 finally:
ValueError: ('Test cannot be run, one of the inputs has 0 variance
Reproducing code example:
Download files here: https://www.dropbox.com/scl/fo/iwaer0ai8dnk27mb5obnu/h?rlkey=hcmpmc6mojb7zn5zodl1nmnt2&dl=0
Error message
Version information
The text was updated successfully, but these errors were encountered: