You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
from obnb.data import GOBP, BioGRID
from obnb.dataset import Dataset as OBNBDataset
from obnb.label.filters import Compose, EntityExistenceFilter, LabelsetRangeFilterSize, LabelsetRangeFilterSplit, NegativeGeneratorHypergeom
from obnb.label.split import RatioPartition
from obnb.util.converter import GenePropertyConverter
function_labels = GOBP(root='data')
network = BioGRID(root='data')
pubmedcnt_converter = GenePropertyConverter(root='data', name="PubMedCount")
sb_splitter = RatioPartition(0.6, 0.2, 0.2, ascending=False,property_converter=pubmedcnt_converter)
negatives_p_thresh = 0.05
def make_dataset(graph, labels, splitter):
labels.iapply(
Compose(
# Only use genes that are present in the network
EntityExistenceFilter(list(graph.node_ids)),
# Remove any labelsets with less than 15 network genes
LabelsetRangeFilterSize(min_val=15),
# Selective negatives using hyper-geom test
NegativeGeneratorHypergeom(p_thresh=negatives_p_thresh),
# Make sure each split has at least 5 positive examples
LabelsetRangeFilterSplit(min_val=5, splitter=splitter),
),
)
return OBNBDataset(
graph=graph,
feature=graph.to_dense_graph().to_feature(),
transform='Node2Vec',
label=labels,
splitter=splitter,
resolve=True)
gobp_sb = make_dataset(graph=network, labels=function_labels, splitter=sb_splitter)
error:
---------------------------------------------------------------------------
IDNotExistError Traceback (most recent call last)
Cell In[8], line 1
----> 1 gobp_sb = make_dataset(graph=network, labels=function_labels, splitter=sb_splitter)
Cell In[7], line 8, in make_dataset(graph, labels, splitter)
7 def make_dataset(graph, labels, splitter):
----> 8 labels.iapply(
9 Compose(
10 # Only use genes that are present in the network
11 EntityExistenceFilter(list(graph.node_ids)),
12 # Remove any labelsets with less than 15 network genes
13 LabelsetRangeFilterSize(min_val=15),
14 # Selective negatives using hyper-geom test
15 NegativeGeneratorHypergeom(p_thresh=negatives_p_thresh),
16 # Make sure each split has at least 5 positive examples
17 LabelsetRangeFilterSplit(min_val=5, splitter=splitter),
18 ),
19 )
20 return OBNBDataset(
21 graph=graph,
22 feature=graph.to_dense_graph().to_feature(),
(...)
25 splitter=splitter,
26 resolve=True)
File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/label/collection.py:492, in LabelsetCollection.iapply(self, filter_func, progress_bar)
486 def iapply(self, filter_func, progress_bar: bool = False):
487 """Apply filter to labelsets inplace.
488
489 This is a shortcut for calling self.apply(filter_func, inplace=True).
490
491 """
--> 492 self.apply(filter_func, inplace=True, progress_bar=progress_bar)
File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/label/collection.py:483, in LabelsetCollection.apply(self, filter_func, inplace, progress_bar)
481 checkers.checkType("inplace", bool, inplace)
482 obj = self if inplace else self.copy()
--> 483 filter_func(obj, progress_bar)
484 return obj
File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/label/filters/base.py:113, in Compose.__call__(self, lsc, progress_bar)
111 def __call__(self, lsc, progress_bar):
112 for filter_ in self.filters:
--> 113 filter_.__call__(lsc, progress_bar)
114 self.logger.info(lsc.stats())
File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/label/filters/base.py:81, in BaseFilter.__call__(self, lsc, progress_bar)
79 pbar = tqdm(entity_ids, desc=f"{self!r}", disable=not progress_bar)
80 for entity_id in pbar:
---> 81 if self.criterion(val_getter(entity_id)):
82 mod_fun(entity_id)
83 self.logger.debug(
84 f"Modification ({self.mod_name}) criterion met for "
85 f"{entity_id!r}",
86 )
File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/label/filters/range_filter.py:166, in LabelsetRangeFilterSplit.get_val_getter.<locals>.val_getter(label_id)
164 def val_getter(label_id):
165 y_all, masks = lsc.split(self.splitter, **self.kwargs)
--> 166 neg_idx = lsc.entity[lsc.get_negative(label_id)]
167 self.logger.debug(f"{label_id = } {neg_idx = }")
168 # TODO: make label_ids to index mapping?
File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/util/idhandler.py:87, in IDlst.__getitem__(self, identifier)
85 return self._getitem_sinlge(identifier)
86 elif isinstance(identifier, Iterable):
---> 87 return self._getitem_multiple(identifier)
88 else:
89 raise TypeError(
90 f"ID key(s) must be string or iterables of string, "
91 f"not {type(identifier)!r}",
92 )
File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/util/idhandler.py:102, in IDlst._getitem_multiple(self, identifiers)
100 idx_lst = []
101 for identifier in identifiers:
--> 102 idx_lst.append(self._getitem_sinlge(identifier))
103 return np.array(idx_lst)
File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/util/idhandler.py:202, in IDmap._getitem_sinlge(self, identifier)
201 def _getitem_sinlge(self, identifier):
--> 202 self._check_ID_existence(identifier, True)
203 return self._map[identifier]
File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/util/idhandler.py:111, in IDlst._check_ID_existence(self, identifier, existence)
109 raise IDExistsError(f"Existing ID {identifier!r}")
110 elif existence & (identifier not in self):
--> 111 raise IDNotExistError(f"Unknown ID {identifier!r}")
IDNotExistError: Unknown ID '5557'
I have test multiple versions of the above function and without NegativeGeneratorHypergeom it works fine but anytime this is included it gives the error, even if it is the only filtering performed
The text was updated successfully, but these errors were encountered:
code:
error:
I have test multiple versions of the above function and without
NegativeGeneratorHypergeom
it works fine but anytime this is included it gives the error, even if it is the only filtering performedThe text was updated successfully, but these errors were encountered: