Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mondo Ontology not downloading #503

Open
kmanpearl opened this issue Feb 3, 2025 · 0 comments
Open

Mondo Ontology not downloading #503

kmanpearl opened this issue Feb 3, 2025 · 0 comments

Comments

@kmanpearl
Copy link

Not sure if this is an issue caused by the issues we were having with Mondo having terms with a missing field or if it's a new problem.

code:

from obnb.data import DisGeNET
disease_labels = DisGeNET(root='data')

output:

---------------------------------------------------------------------------
BadGzipFile                               Traceback (most recent call last)
Cell In[9], line 1
----> 1 disease_labels = DisGeNET(root='data')

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotated_ontology/disgenet.py:33, in DisGeNET.__init__(self, root, dsi_min, dsi_max, dpi_min, dpi_max, min_size, max_size, overlap, jaccard, data_sources, gene_id_converter, **kwargs)
     30 self.jaccard = jaccard
     31 self.overlap = overlap
---> 33 super().__init__(
     34     root,
     35     annotation_factory=DisGeNETAnnotation,
     36     ontology_factory=MondoDiseaseOntology,
     37     annotation_kwargs={
     38         "data_sources": data_sources,
     39         "dsi_min": dsi_min,
     40         "dsi_max": dsi_max,
     41         "dpi_min": dpi_min,
     42         "dpi_max": dpi_max,
     43         "gene_id_converter": gene_id_converter,
     44     },
     45     ontology_kwargs={"xref_prefix": "UMLS"},
     46     **kwargs,
     47 )

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotated_ontology/base.py:30, in BaseAnnotatedOntologyData.__init__(self, root, annotation_factory, ontology_factory, annotation_kwargs, ontology_kwargs, **kwargs)
     28 self.annotation_kwargs = annotation_kwargs
     29 self.ontology_kwargs = ontology_kwargs
---> 30 super().__init__(root, **kwargs)

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/base.py:96, in BaseData.__init__(self, root, version, redownload, reprocess, retransform, log_level, pre_transform, transform, cache_transform, download_cache, gene_id_converter, **kwargs)
     94     with log_file_context(self.plogger, self.info_log_path):
     95         self._download()
---> 96         self._process()
     97 else:
     98     self._download_archive()

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/base.py:274, in BaseData._process(self)
    272 # Process data
    273 self.plogger.info(f"Start processing {self.classname}...")
--> 274 self.process()
    276 # Pre-transform data
    277 if self.pre_transform is not None:

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotated_ontology/base.py:53, in BaseAnnotatedOntologyData.process(self)
     45 # NOTE: Reprocess is not a valid option for annotation and ontology
     46 # data objects as we do not save the processed data. Similarly,
     47 # retransform is invalid as there is not transformation for them yet.
     48 opts = {
     49     "redownload": self.redownload,
     50     "version": self.version,
     51     "log_level": self.log_level,
     52 }
---> 53 ann = self.annotation_factory(self.root, **self.annotation_kwargs, **opts)
     54 ont = self.ontology_factory(self.root, **self.ontology_kwargs, **opts)
     56 annot = ann.data

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotation/disgenet.py:89, in DisGeNETAnnotation.__init__(self, root, data_sources, dsi_min, dsi_max, dpi_min, dpi_max, **kwargs)
     87 self.dpi_min = dpi_min
     88 self.dpi_max = dpi_max
---> 89 super().__init__(root, **kwargs)

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotation/base.py:14, in BaseAnnotationData.__init__(self, root, **kwargs)
     12 def __init__(self, root: str, **kwargs):
     13     """Initialize BaseAnnotationData."""
---> 14     super().__init__(root, **kwargs)

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/base.py:95, in BaseData.__init__(self, root, version, redownload, reprocess, retransform, log_level, pre_transform, transform, cache_transform, download_cache, gene_id_converter, **kwargs)
     93 if version == "latest":
     94     with log_file_context(self.plogger, self.info_log_path):
---> 95         self._download()
     96         self._process()
     97 else:

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/base.py:260, in BaseData._download(self)
    258 if self.redownload or not self.download_completed():
    259     self.plogger.info(f"Start downloading {self.classname}...")
--> 260     self.download()

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotation/base.py:30, in BaseAnnotationData.download(self)
     23 """Download raw annotation table.
     24 
     25 Note:
     26     The raw file is assumed to be gzipped.
     27 
     28 """
     29 self.plogger.info(f"Download annotation from: {self.annotation_url}")
---> 30 download_unzip(
     31     self.annotation_url,
     32     self.raw_dir,
     33     zip_type=self.annotation_file_zip_type,
     34     rename=self.raw_files[0],
     35     logger=self.plogger,
     36 )

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/util/download.py:115, in download_unzip(url, root, zip_type, rename, logger)
    113 elif zip_type == "gzip":
    114     with open(path := osp.join(root, filename), "wb") as f:
--> 115         f.write(gzip.decompress(content))
    116     logger.info(f"File saved to {path!r}")
    117 elif zip_type == "none":

File ~/miniconda3/envs/study_bias/lib/python3.12/gzip.py:627, in decompress(data)
    625 while True:
    626     fp = io.BytesIO(data)
--> 627     if _read_gzip_header(fp) is None:
    628         return b"".join(decompressed_members)
    629     # Use a zlib raw deflate compressor

File ~/miniconda3/envs/study_bias/lib/python3.12/gzip.py:456, in _read_gzip_header(fp)
    453     return None
    455 if magic != b'\037\213':
--> 456     raise BadGzipFile('Not a gzipped file (%r)' % magic)
    458 (method, flag, last_mtime) = struct.unpack("<BBIxx", _read_exact(fp, 8))
    459 if method != 8:

BadGzipFile: Not a gzipped file (b'<!')

Other potentially helpful info:

contents of data/DisGeNET/info/run.log:

[INFO][2025-02-03 11:53:54,869][base][_process] Start processing DisGeNET...
[INFO][2025-02-03 11:53:54,872][base][_download] Start downloading DisGeNETAnnotation...
[INFO][2025-02-03 11:53:54,872][base][download] Download annotation from: https://www.disgenet.org/static/disgenet_ap1/files/downloads/all_gene_disease_associations.tsv.gz
[INFO][2025-02-03 11:53:54,873][download][download_unzip] Downloading zip archive from https://www.disgenet.org/static/disgenet_ap1/files/downloads/all_gene_disease_associations.tsv.gz
[INFO][2025-02-03 11:53:56,125][download][download_unzip] Download completed, start unpacking...

contents of data/DisGeNETAnnotation/info:

[INFO][2025-02-03 11:53:54,872][base][_download] Start downloading DisGeNETAnnotation...
[INFO][2025-02-03 11:53:54,872][base][download] Download annotation from: https://www.disgenet.org/static/disgenet_ap1/files/downloads/all_gene_disease_associations.tsv.gz
[INFO][2025-02-03 11:53:54,873][download][download_unzip] Downloading zip archive from https://www.disgenet.org/static/disgenet_ap1/files/downloads/all_gene_disease_associations.tsv.gz
[INFO][2025-02-03 11:53:56,125][download][download_unzip] Download completed, start unpacking...

data/DisGeNETAnnotation/raw/all_gene_disease_associations.tsv file exits. data/DisGeNET/raw/ is empty

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant