Skip to content

Commit

Permalink
Merge pull request #52 from cthoyt/add-source
Browse files Browse the repository at this point in the history
Add source column to merged context CSVs
  • Loading branch information
sierra-moxon authored Nov 21, 2023
2 parents aef6fb5 + f79de4d commit b8a2bbd
Show file tree
Hide file tree
Showing 9 changed files with 9,456 additions and 9,392 deletions.
2 changes: 2 additions & 0 deletions src/prefixmaps/data/bioregistry.csv
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,7 @@ bioregistry,INO,http://purl.obolibrary.org/obo/INO_,canonical
bioregistry,insdc,http://identifiers.org/insdc/,canonical
bioregistry,insdc.cds,http://identifiers.org/ncbiprotein/,namespace_alias
bioregistry,insdc.gca,http://identifiers.org/assembly/,namespace_alias
bioregistry,insdc.gcf,http://identifiers.org/assembly/,namespace_alias
bioregistry,insdc.run,https://www.ebi.ac.uk/ena/browser/view/,canonical
bioregistry,insdc.sra,http://identifiers.org/insdc.sra/,canonical
bioregistry,intact,http://identifiers.org/intact/,canonical
Expand Down Expand Up @@ -1034,6 +1035,7 @@ bioregistry,NCBI_gi,https://www.ncbi.nlm.nih.gov/nucleotide/,namespace_alias
bioregistry,NCBI_taxid,http://purl.obolibrary.org/obo/NCBITaxon_,namespace_alias
bioregistry,NCBI_Taxon_ID,http://purl.obolibrary.org/obo/NCBITaxon_,namespace_alias
bioregistry,ncbibook,https://www.ncbi.nlm.nih.gov/books/,canonical
bioregistry,ncbidrs,http://identifiers.org/ncbidrs/,canonical
bioregistry,NCBIGene,http://identifiers.org/ncbigene/,canonical
bioregistry,ncbigi,https://www.ncbi.nlm.nih.gov/nucleotide/,namespace_alias
bioregistry,NCBIProtein,http://identifiers.org/ncbiprotein/,canonical
Expand Down
2 changes: 2 additions & 0 deletions src/prefixmaps/data/bioregistry.upper.csv
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,7 @@ bioregistry,INO,http://purl.obolibrary.org/obo/INO_,canonical
bioregistry,INSDC,http://identifiers.org/insdc/,canonical
bioregistry,insdc.cds,http://identifiers.org/ncbiprotein/,namespace_alias
bioregistry,INSDC.GCA,http://identifiers.org/assembly/,namespace_alias
bioregistry,INSDC.GCF,http://identifiers.org/assembly/,namespace_alias
bioregistry,INSDC.RUN,https://www.ebi.ac.uk/ena/browser/view/,canonical
bioregistry,INSDC.SRA,http://identifiers.org/insdc.sra/,canonical
bioregistry,INTACT,http://identifiers.org/intact/,canonical
Expand Down Expand Up @@ -1034,6 +1035,7 @@ bioregistry,NCBI_GI,https://www.ncbi.nlm.nih.gov/nucleotide/,namespace_alias
bioregistry,NCBI_taxid,http://purl.obolibrary.org/obo/NCBITaxon_,namespace_alias
bioregistry,NCBI_Taxon_ID,http://purl.obolibrary.org/obo/NCBITaxon_,namespace_alias
bioregistry,NCBIBOOK,https://www.ncbi.nlm.nih.gov/books/,canonical
bioregistry,NCBIDRS,http://identifiers.org/ncbidrs/,canonical
bioregistry,NCBIGene,http://identifiers.org/ncbigene/,canonical
bioregistry,NCBIGI,https://www.ncbi.nlm.nih.gov/nucleotide/,namespace_alias
bioregistry,NCBIProtein,http://identifiers.org/ncbiprotein/,canonical
Expand Down
9,397 changes: 4,704 additions & 4,693 deletions src/prefixmaps/data/merged.csv

Large diffs are not rendered by default.

9,397 changes: 4,704 additions & 4,693 deletions src/prefixmaps/data/merged.oak.csv

Large diffs are not rendered by default.

13 changes: 11 additions & 2 deletions src/prefixmaps/data/prefixcc.csv
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ prefixcc,biol,http://purl.org/NET/biol/ns#,canonical
prefixcc,biolink,https://w3id.org/biolink/vocab/,namespace_alias
prefixcc,biopax,http://www.biopax.org/release/biopax-level3.owl#,canonical
prefixcc,biordf,http://purl.org/net/biordfmicroarray/ns#,canonical
prefixcc,bioschemas,https://bioschemas.org/,canonical
prefixcc,bioskos,http://eulersharp.sourceforge.net/2003/03swap/bioSKOSSchemes#,canonical
prefixcc,biotop,http://purl.org/biotop/biotop.owl#,canonical
prefixcc,biro,http://purl.org/spar/biro/,canonical
Expand Down Expand Up @@ -287,6 +288,7 @@ prefixcc,cdtype,http://purl.org/cld/cdtype/,canonical
prefixcc,centrifuge,http://purl.org/twc/vocab/centrifuge#,canonical
prefixcc,ceo,https://linkeddata.cultureelerfgoed.nl/def/ceo#,canonical
prefixcc,ceox,https://linkeddata.cultureelerfgoed.nl/def/ceox#,canonical
prefixcc,cercabib,https://cercabib.ub.edu/,canonical
prefixcc,cerealstoo,http://rdf.ag/o/cerealstoo#,canonical
prefixcc,cerif,http://spi-fm.uca.es/neologism/cerif#,canonical
prefixcc,cert,http://www.w3.org/ns/auth/cert#,canonical
Expand Down Expand Up @@ -773,6 +775,7 @@ prefixcc,evset,http://dsnotify.org/vocab/eventset/0.1/,canonical
prefixcc,ewg,http://ethoinformatics.org/,canonical
prefixcc,ex,http://example.org/,canonical
prefixcc,example,http://www.example.org/rdf#,canonical
prefixcc,exekg,https://raw.githubusercontent.com/nsai-uio/ExeKGOntology/main/ds_exeKGOntology.ttl#,canonical
prefixcc,exif,http://www.w3.org/2003/12/exif/ns#,canonical
prefixcc,exo,https://w3id.org/example#,canonical
prefixcc,experts,http://emmo.info/emmo/application/maeo/experts#,canonical
Expand Down Expand Up @@ -1407,6 +1410,7 @@ prefixcc,mi,http://www.marineinfo.org/ns/ontology#,canonical
prefixcc,mibc,http://marineinfo.org/ns/library/bibcodes#,canonical
prefixcc,mibt,https://marineinfo.org/ns/library/bibtypes#,canonical
prefixcc,mico,http://www.mico-project.eu/ns/platform/1.0/schema#,canonical
prefixcc,mifesto,https://w3id.org/mifesto#,canonical
prefixcc,mil,http://rdf.muninn-project.org/ontologies/military#,canonical
prefixcc,mime,https://www.iana.org/assignments/media-types/,canonical
prefixcc,mindat,https://www.mindat.org/,canonical
Expand Down Expand Up @@ -1489,8 +1493,8 @@ prefixcc,ncal,http://www.semanticdesktop.org/ontologies/2007/04/02/ncal#,canonic
prefixcc,ncbi,https://www.ncbi.nlm.nih.gov/,canonical
prefixcc,ncbigene,http://identifiers.org/ncbigene/,canonical
prefixcc,ncbitaxon,http://purl.org/obo/owl/NCBITaxon#,canonical
prefixcc,ncicp,http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#,namespace_alias
prefixcc,ncit,http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#,canonical
prefixcc,ncicp,http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#,canonical
prefixcc,ncit,https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&ns=ncit&code=,canonical
prefixcc,nco,http://www.semanticdesktop.org/ontologies/2007/03/22/nco#,canonical
prefixcc,ndl,http://schemas.ogf.org/nml/2013/05/base#,canonical
prefixcc,ndnp,http://chroniclingamerica.loc.gov/terms#,canonical
Expand Down Expand Up @@ -1779,6 +1783,7 @@ prefixcc,pattern,http://www.essepuntato.it/2008/12/pattern#,canonical
prefixcc,pav,http://purl.org/pav/,canonical
prefixcc,pay,http://reference.data.gov.uk/def/payment#,namespace_alias
prefixcc,payment,http://reference.data.gov.uk/def/payment#,canonical
prefixcc,pbac,https://w3id.org/pbac#,canonical
prefixcc,pbo,http://purl.org/ontology/pbo/core#,canonical
prefixcc,pbody,http://reference.data.gov.uk/def/public-body/,canonical
prefixcc,pc,http://purl.org/procurement/public-contracts#,canonical
Expand Down Expand Up @@ -1955,6 +1960,7 @@ prefixcc,qud,http://qudt.org/1.1/schema/qudt#,canonical
prefixcc,qudt,http://qudt.org/schema/qudt/,canonical
prefixcc,quest,https://rb.gy/ntg7l/,canonical
prefixcc,quid,https://w3id.org/quid/,canonical
prefixcc,quit,http://quit.aksw.org/vocab/,canonical
prefixcc,quran,http://khalidaloufi.sa/quran#,canonical
prefixcc,quty,http://www.telegraphis.net/ontology/measurement/quantity#,canonical
prefixcc,qvoc,http://mlode.nlp2rdf.org/quranvocab#,canonical
Expand Down Expand Up @@ -2259,6 +2265,7 @@ prefixcc,security,http://securitytoolbox.appspot.com/securityMain#,canonical
prefixcc,sede,http://eventography.org/sede/0.1/,canonical
prefixcc,seeds,http://deductions.github.io/seeds.owl.ttl#,canonical
prefixcc,sem,http://semanticweb.cs.vu.nl/2009/11/sem/,canonical
prefixcc,semapv,https://w3id.org/semapv/vocab/,canonical
prefixcc,semio,http://www.lingvoj.org/semio#,canonical
prefixcc,semiot,http://w3id.org/semiot/ontologies/semiot#,canonical
prefixcc,semsur,http://purl.org/SemSur/,canonical
Expand Down Expand Up @@ -2601,6 +2608,7 @@ prefixcc,states,http://www.w3.org/2005/07/aaa#,canonical
prefixcc,static,http://vocab-ld.org/vocab/static-ld#,canonical
prefixcc,stats,http://purl.org/rdfstats/stats#,canonical
prefixcc,status,http://www.w3.org/2003/06/sw-vocab-status/ns#,namespace_alias
prefixcc,stax,https://w3id.org/stax/ontology#,canonical
prefixcc,steel,http://ontorule-project.eu/resources/steel-30#,canonical
prefixcc,stencila,http://schema.stenci.la/,canonical
prefixcc,step,http://purl.org/net/step#,canonical
Expand Down Expand Up @@ -2937,6 +2945,7 @@ prefixcc,wotc,http://purl.org/wot-catalogue#,canonical
prefixcc,wotsec,https://www.w3.org/2019/wot/security#,canonical
prefixcc,wp,http://vocabularies.wikipathways.org/wp#,canonical
prefixcc,wro,http://purl.org/net/wf4ever/ro#,canonical
prefixcc,wrroc,https://w3id.org/ro/terms/workflow-run#,canonical
prefixcc,ws,http://www.w3.org/ns/pim/space#,namespace_alias
prefixcc,wsc,http://www.openk.org/wscaim.owl#,canonical
prefixcc,wscaim,http://www.openk.org/wscaim.owl#,namespace_alias
Expand Down
11 changes: 11 additions & 0 deletions src/prefixmaps/data/w3id.csv
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ w3id,cerrotti,https://w3id.org/cerrotti/,canonical
w3id,certainty_nanopubs,https://w3id.org/certainty_nanopubs/,canonical
w3id,cevo,https://w3id.org/cevo/,canonical
w3id,chainpoint,https://w3id.org/chainpoint/,canonical
w3id,chalkgrp,https://w3id.org/chalkgrp/,canonical
w3id,character-computing,https://w3id.org/character-computing/,canonical
w3id,charity-organization,https://w3id.org/charity-organization/,canonical
w3id,charta77,https://w3id.org/charta77/,canonical
Expand All @@ -161,6 +162,7 @@ w3id,cld,https://w3id.org/cld/,canonical
w3id,clinga,https://w3id.org/clinga/,canonical
w3id,clipc,https://w3id.org/clipc/,canonical
w3id,clodg,https://w3id.org/clodg/,canonical
w3id,CMECS,https://w3id.org/CMECS/,canonical
w3id,cmip6dr,https://w3id.org/cmip6dr/,canonical
w3id,cntf,https://w3id.org/cntf/,canonical
w3id,cocoon,https://w3id.org/cocoon/,canonical
Expand Down Expand Up @@ -312,6 +314,7 @@ w3id,ecodigit,https://w3id.org/ecodigit/,canonical
w3id,ecsel-dr,https://w3id.org/ecsel-dr/,canonical
w3id,ecsel-dr-prc-PMV,https://w3id.org/ecsel-dr-prc-PMV/,canonical
w3id,ecsel-dr-sn-SSP,https://w3id.org/ecsel-dr-sn-SSP/,canonical
w3id,EDH_Ctagged,https://w3id.org/EDH_Ctagged/,canonical
w3id,edu-sharing,https://w3id.org/edu-sharing/,canonical
w3id,education,https://w3id.org/education/,canonical
w3id,edukg,https://w3id.org/edukg/,canonical
Expand Down Expand Up @@ -631,6 +634,7 @@ w3id,mgkb,https://w3id.org/mgkb/,canonical
w3id,mica,https://w3id.org/mica/,canonical
w3id,midas-catalog,https://w3id.org/midas-catalog/,canonical
w3id,midas-metadata,https://w3id.org/midas-metadata/,canonical
w3id,mifesto,https://w3id.org/mifesto/,canonical
w3id,minerva,https://w3id.org/minerva/,canonical
w3id,mint,https://w3id.org/mint/,canonical
w3id,minte,https://w3id.org/minte/,canonical
Expand Down Expand Up @@ -787,10 +791,12 @@ w3id,paradise,https://w3id.org/paradise/,canonical
w3id,patent_ontologies,https://w3id.org/patent_ontologies/,canonical
w3id,payments,https://w3id.org/payments/,canonical
w3id,payswarm,https://w3id.org/payswarm/,canonical
w3id,pbac,https://w3id.org/pbac/,canonical
w3id,pbs,https://w3id.org/pbs/,canonical
w3id,pc,https://w3id.org/pc/,canonical
w3id,peco,https://w3id.org/peco/,canonical
w3id,pedigree,https://w3id.org/pedigree/,canonical
w3id,peh,https://w3id.org/peh/,canonical
w3id,people,https://w3id.org/people/,canonical
w3id,pep,https://w3id.org/pep/,canonical
w3id,per,https://w3id.org/per/,canonical
Expand Down Expand Up @@ -834,6 +840,7 @@ w3id,qb4solap,https://w3id.org/qb4solap/,canonical
w3id,quality,https://w3id.org/quality/,canonical
w3id,qudt,https://w3id.org/qudt/,canonical
w3id,quid,https://w3id.org/quid/,canonical
w3id,r74n,https://w3id.org/r74n/,canonical
w3id,rai,https://w3id.org/rai/,canonical
w3id,rail,https://w3id.org/rail/,canonical
w3id,rains,https://w3id.org/rains/,canonical
Expand Down Expand Up @@ -929,6 +936,7 @@ w3id,seneca,https://w3id.org/seneca/,canonical
w3id,sense,https://w3id.org/sense/,canonical
w3id,sentitrack,https://w3id.org/sentitrack/,canonical
w3id,seo,https://w3id.org/seo/,canonical
w3id,seovoc,https://w3id.org/seovoc/,canonical
w3id,sepses,https://w3id.org/sepses/,canonical
w3id,serdif,https://w3id.org/serdif/,canonical
w3id,sfs-ontology,https://w3id.org/sfs-ontology/,canonical
Expand Down Expand Up @@ -975,6 +983,7 @@ w3id,SpOTy,https://w3id.org/SpOTy/,canonical
w3id,sqo,https://w3id.org/sqo/,canonical
w3id,squap,https://w3id.org/squap/,canonical
w3id,squirrel,https://w3id.org/squirrel/,canonical
w3id,sri,https://w3id.org/sri/,canonical
w3id,sri-lanka,https://w3id.org/sri-lanka/,canonical
w3id,srmo,https://w3id.org/srmo/,canonical
w3id,srr,https://w3id.org/srr/,canonical
Expand All @@ -984,6 +993,7 @@ w3id,sssom,https://w3id.org/sssom/,canonical
w3id,stahl,https://w3id.org/stahl/,canonical
w3id,stargate-h2020,https://w3id.org/stargate-h2020/,canonical
w3id,stav,https://w3id.org/stav/,canonical
w3id,stax,https://w3id.org/stax/,canonical
w3id,steel,https://w3id.org/steel/,canonical
w3id,stirdata,https://w3id.org/stirdata/,canonical
w3id,stlab,https://w3id.org/stlab/,canonical
Expand Down Expand Up @@ -1033,6 +1043,7 @@ w3id,tso,https://w3id.org/tso/,canonical
w3id,tsso,https://w3id.org/tsso/,canonical
w3id,ttla,https://w3id.org/ttla/,canonical
w3id,TTRpg,https://w3id.org/TTRpg/,canonical
w3id,tvstationjp,https://w3id.org/tvstationjp/,canonical
w3id,twins,https://w3id.org/twins/,canonical
w3id,uco,https://w3id.org/uco/,canonical
w3id,ufo,https://w3id.org/ufo/,canonical
Expand Down
10 changes: 9 additions & 1 deletion src/prefixmaps/datamodel/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ class PrefixExpansion:
status: StatusType
"""Indicates whether the expansion is canonical, a prefix alias, a namespace alias, or both."""

expansion_source: Optional[str] = None
"""Indicates the source of the prefix expansion."""

def canonical(self) -> bool:
"""
True if this is the canonical mapping in both directions.
Expand Down Expand Up @@ -153,14 +156,15 @@ def combine(self, context: "Context"):
:return:
"""
for pe in context.prefix_expansions:
self.add_prefix(pe.prefix, pe.namespace, pe.status)
self.add_prefix(pe.prefix, pe.namespace, pe.status, expansion_source=context.name)

def add_prefix(
self,
prefix: PREFIX,
namespace: NAMESPACE,
status: StatusType = StatusType.canonical,
preferred: bool = False,
expansion_source: Optional[str] = None,
):
"""
Adds a prefix expansion to this context.
Expand All @@ -176,6 +180,9 @@ def add_prefix(
:param namespace: namespace to be added
:param status: the status of the prefix being added
:param preferred:
:param expansion_source: An optional annotation to be used when merging contexts together.
The source will keep track of the original context that a given prefix
expansion came from. This is used in :meth:`Context.combine`.
:return:
"""
# TODO: check status
Expand Down Expand Up @@ -203,6 +210,7 @@ def add_prefix(
prefix=prefix,
namespace=namespace,
status=status,
expansion_source=expansion_source,
)
)

Expand Down
2 changes: 1 addition & 1 deletion src/prefixmaps/ingest/etl_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def run_etl(output_directory: Union[str, Path]) -> None:
# Write all contexts
for name, context in contexts.items():
with output_directory.joinpath(f"{name}.csv").open("w", encoding="UTF-8") as file:
context_to_file(context, file)
context_to_file(context, file, include_expansion_source=context.name in COMBINED)


@click.command
Expand Down
14 changes: 12 additions & 2 deletions src/prefixmaps/io/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,27 @@ def _key(pe: PrefixExpansion):
return pe.prefix.casefold(), STATUS_TYPE_ORDER[pe.status]


def context_to_file(context: Context, file: TextIO) -> None:
def context_to_file(
context: Context, file: TextIO, *, include_expansion_source: bool = False
) -> None:
"""
Writes a context to a file
:param context:
:param file:
:param include_expansion_source: If true, include a "source" column. This is useful for
writing merged contexts since it says the highest priority simple context
from which the row corresponding to a :class:`PrefixExpansion` came.
:return:
"""
writer = DictWriter(file, fieldnames=["context", "prefix", "namespace", "status"])
field_names = ["context", "prefix", "namespace", "status"]
if include_expansion_source:
field_names.append("expansion_source")
writer = DictWriter(file, fieldnames=field_names)
writer.writeheader()
for pe in sorted(context.prefix_expansions, key=_key):
row = vars(pe)
row["status"] = pe.status.value
if not include_expansion_source:
row.pop("expansion_source", None)
writer.writerow(row)

0 comments on commit b8a2bbd

Please sign in to comment.