We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Not sure if this is an issue caused by the issues we were having with Mondo having terms with a missing field or if it's a new problem.
code:
from obnb.data import DisGeNET disease_labels = DisGeNET(root='data')
output:
--------------------------------------------------------------------------- BadGzipFile Traceback (most recent call last) Cell In[9], line 1 ----> 1 disease_labels = DisGeNET(root='data') File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotated_ontology/disgenet.py:33, in DisGeNET.__init__(self, root, dsi_min, dsi_max, dpi_min, dpi_max, min_size, max_size, overlap, jaccard, data_sources, gene_id_converter, **kwargs) 30 self.jaccard = jaccard 31 self.overlap = overlap ---> 33 super().__init__( 34 root, 35 annotation_factory=DisGeNETAnnotation, 36 ontology_factory=MondoDiseaseOntology, 37 annotation_kwargs={ 38 "data_sources": data_sources, 39 "dsi_min": dsi_min, 40 "dsi_max": dsi_max, 41 "dpi_min": dpi_min, 42 "dpi_max": dpi_max, 43 "gene_id_converter": gene_id_converter, 44 }, 45 ontology_kwargs={"xref_prefix": "UMLS"}, 46 **kwargs, 47 ) File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotated_ontology/base.py:30, in BaseAnnotatedOntologyData.__init__(self, root, annotation_factory, ontology_factory, annotation_kwargs, ontology_kwargs, **kwargs) 28 self.annotation_kwargs = annotation_kwargs 29 self.ontology_kwargs = ontology_kwargs ---> 30 super().__init__(root, **kwargs) File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/base.py:96, in BaseData.__init__(self, root, version, redownload, reprocess, retransform, log_level, pre_transform, transform, cache_transform, download_cache, gene_id_converter, **kwargs) 94 with log_file_context(self.plogger, self.info_log_path): 95 self._download() ---> 96 self._process() 97 else: 98 self._download_archive() File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/base.py:274, in BaseData._process(self) 272 # Process data 273 self.plogger.info(f"Start processing {self.classname}...") --> 274 self.process() 276 # Pre-transform data 277 if self.pre_transform is not None: File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotated_ontology/base.py:53, in BaseAnnotatedOntologyData.process(self) 45 # NOTE: Reprocess is not a valid option for annotation and ontology 46 # data objects as we do not save the processed data. Similarly, 47 # retransform is invalid as there is not transformation for them yet. 48 opts = { 49 "redownload": self.redownload, 50 "version": self.version, 51 "log_level": self.log_level, 52 } ---> 53 ann = self.annotation_factory(self.root, **self.annotation_kwargs, **opts) 54 ont = self.ontology_factory(self.root, **self.ontology_kwargs, **opts) 56 annot = ann.data File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotation/disgenet.py:89, in DisGeNETAnnotation.__init__(self, root, data_sources, dsi_min, dsi_max, dpi_min, dpi_max, **kwargs) 87 self.dpi_min = dpi_min 88 self.dpi_max = dpi_max ---> 89 super().__init__(root, **kwargs) File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotation/base.py:14, in BaseAnnotationData.__init__(self, root, **kwargs) 12 def __init__(self, root: str, **kwargs): 13 """Initialize BaseAnnotationData.""" ---> 14 super().__init__(root, **kwargs) File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/base.py:95, in BaseData.__init__(self, root, version, redownload, reprocess, retransform, log_level, pre_transform, transform, cache_transform, download_cache, gene_id_converter, **kwargs) 93 if version == "latest": 94 with log_file_context(self.plogger, self.info_log_path): ---> 95 self._download() 96 self._process() 97 else: File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/base.py:260, in BaseData._download(self) 258 if self.redownload or not self.download_completed(): 259 self.plogger.info(f"Start downloading {self.classname}...") --> 260 self.download() File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotation/base.py:30, in BaseAnnotationData.download(self) 23 """Download raw annotation table. 24 25 Note: 26 The raw file is assumed to be gzipped. 27 28 """ 29 self.plogger.info(f"Download annotation from: {self.annotation_url}") ---> 30 download_unzip( 31 self.annotation_url, 32 self.raw_dir, 33 zip_type=self.annotation_file_zip_type, 34 rename=self.raw_files[0], 35 logger=self.plogger, 36 ) File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/util/download.py:115, in download_unzip(url, root, zip_type, rename, logger) 113 elif zip_type == "gzip": 114 with open(path := osp.join(root, filename), "wb") as f: --> 115 f.write(gzip.decompress(content)) 116 logger.info(f"File saved to {path!r}") 117 elif zip_type == "none": File ~/miniconda3/envs/study_bias/lib/python3.12/gzip.py:627, in decompress(data) 625 while True: 626 fp = io.BytesIO(data) --> 627 if _read_gzip_header(fp) is None: 628 return b"".join(decompressed_members) 629 # Use a zlib raw deflate compressor File ~/miniconda3/envs/study_bias/lib/python3.12/gzip.py:456, in _read_gzip_header(fp) 453 return None 455 if magic != b'\037\213': --> 456 raise BadGzipFile('Not a gzipped file (%r)' % magic) 458 (method, flag, last_mtime) = struct.unpack("<BBIxx", _read_exact(fp, 8)) 459 if method != 8: BadGzipFile: Not a gzipped file (b'<!')
Other potentially helpful info:
contents of data/DisGeNET/info/run.log:
data/DisGeNET/info/run.log
[INFO][2025-02-03 11:53:54,869][base][_process] Start processing DisGeNET... [INFO][2025-02-03 11:53:54,872][base][_download] Start downloading DisGeNETAnnotation... [INFO][2025-02-03 11:53:54,872][base][download] Download annotation from: https://www.disgenet.org/static/disgenet_ap1/files/downloads/all_gene_disease_associations.tsv.gz [INFO][2025-02-03 11:53:54,873][download][download_unzip] Downloading zip archive from https://www.disgenet.org/static/disgenet_ap1/files/downloads/all_gene_disease_associations.tsv.gz [INFO][2025-02-03 11:53:56,125][download][download_unzip] Download completed, start unpacking...
contents of data/DisGeNETAnnotation/info:
data/DisGeNETAnnotation/info
[INFO][2025-02-03 11:53:54,872][base][_download] Start downloading DisGeNETAnnotation... [INFO][2025-02-03 11:53:54,872][base][download] Download annotation from: https://www.disgenet.org/static/disgenet_ap1/files/downloads/all_gene_disease_associations.tsv.gz [INFO][2025-02-03 11:53:54,873][download][download_unzip] Downloading zip archive from https://www.disgenet.org/static/disgenet_ap1/files/downloads/all_gene_disease_associations.tsv.gz [INFO][2025-02-03 11:53:56,125][download][download_unzip] Download completed, start unpacking...
data/DisGeNETAnnotation/raw/all_gene_disease_associations.tsv file exits. data/DisGeNET/raw/ is empty
data/DisGeNETAnnotation/raw/all_gene_disease_associations.tsv
data/DisGeNET/raw/
The text was updated successfully, but these errors were encountered:
No branches or pull requests
Not sure if this is an issue caused by the issues we were having with Mondo having terms with a missing field or if it's a new problem.
code:
output:
Other potentially helpful info:
contents of
data/DisGeNET/info/run.log
:contents of
data/DisGeNETAnnotation/info
:data/DisGeNETAnnotation/raw/all_gene_disease_associations.tsv
file exits.data/DisGeNET/raw/
is emptyThe text was updated successfully, but these errors were encountered: