From 60f52d26873248a596602dd977236b2ffb5d74b4 Mon Sep 17 00:00:00 2001 From: Paco Nathan Date: Thu, 24 Feb 2022 15:17:52 -0800 Subject: [PATCH 1/7] WIP integration of morph-kgc --- README.md | 9 +- changelog.txt | 7 + dat/mapping.csv.ttl | 962 +++++++++++++++++++++++++++++++++++++++ docs/build.md | 1 + docs/depend.md | 3 +- examples/ex2_1.ipynb | 167 +++++++ examples/ex6_2.ipynb | 117 ----- kglab/decorators.py | 10 +- kglab/esp.py | 17 +- kglab/external_import.py | 14 +- kglab/gpviz.py | 7 +- kglab/kglab.py | 209 +++++---- kglab/pkg_types.py | 14 +- kglab/srl.py | 16 +- kglab/subg.py | 17 +- kglab/topo.py | 14 +- kglab/util.py | 12 +- kglab/version.py | 9 +- requirements-dev.txt | 1 + requirements.txt | 1 + test.py | 15 +- 21 files changed, 1351 insertions(+), 271 deletions(-) create mode 100644 dat/mapping.csv.ttl create mode 100644 examples/ex2_1.ipynb delete mode 100644 examples/ex6_2.ipynb diff --git a/README.md b/README.md index 9082a86..ff5216f 100644 --- a/README.md +++ b/README.md @@ -134,8 +134,13 @@ See: width="231" /> - ## Test - It is possible to test against IPython Notebooks with: `pytest --nbmake examples/*ipynb` + +## Testing + +It's possible to run tests with any of the Jupyter notebooks using: +```python +python3 -m pytest --nbmake examples/*ipynb +``` ## License and Copyright diff --git a/changelog.txt b/changelog.txt index 0b675d9..b475625 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,5 +1,12 @@ # `kglab` changelog +## 0.4.4 + +2022-02-24 + + * integration of `morph-kgc`; kudos @Mec-iS and many thanks to @ArenasGuerreroJulian + + ## 0.4.3 2022-02-10 diff --git a/dat/mapping.csv.ttl b/dat/mapping.csv.ttl new file mode 100644 index 0000000..d88cd1d --- /dev/null +++ b/dat/mapping.csv.ttl @@ -0,0 +1,962 @@ +@prefix rr: . +@prefix rdf: . +@prefix rdfs: . +@prefix fnml: . +@prefix fno: . +@prefix d2rq: . +@prefix void: . +@prefix dc: . +@prefix foaf: . +@prefix rml: . +@prefix ql: . +@prefix : . +@prefix xsd: . +@prefix rev: . +@prefix gtfs: . +@prefix geo: . +@prefix schema: . +@prefix dct: . + +:rules_000 a void:Dataset; + void:exampleResource :map_stoptimes_000. +:map_stoptimes_000 rml:logicalSource :source_000. +:source_000 a rml:LogicalSource; + rml:source "data/STOP_TIMES.csv"; + rml:referenceFormulation ql:CSV. +:map_stoptimes_000 a rr:TriplesMap; + rdfs:label "stoptimes". +:s_000 a rr:SubjectMap. +:map_stoptimes_000 rr:subjectMap :s_000. +:s_000 rr:template "http://transport.linkeddata.es/madrid/metro/stoptimes/{trip_id}-{stop_id}-{arrival_time}". +:pom_000 a rr:PredicateObjectMap. +:map_stoptimes_000 rr:predicateObjectMap :pom_000. +:pm_000 a rr:PredicateMap. +:pom_000 rr:predicateMap :pm_000. +:pm_000 rr:constant rdf:type. +:pom_000 rr:objectMap :om_000. +:om_000 a rr:ObjectMap; + rr:constant "http://vocab.gtfs.org/terms#StopTime"; + rr:termType rr:IRI. +:pom_001 a rr:PredicateObjectMap. +:map_stoptimes_000 rr:predicateObjectMap :pom_001. +:pm_001 a rr:PredicateMap. +:pom_001 rr:predicateMap :pm_001. +:pm_001 rr:constant gtfs:arrivalTime. +:pom_001 rr:objectMap :om_001. +:om_001 a rr:ObjectMap; + rml:reference "arrival_time"; + rr:termType rr:Literal; + rr:datatype xsd:duration. +:pom_002 a rr:PredicateObjectMap. +:map_stoptimes_000 rr:predicateObjectMap :pom_002. +:pm_002 a rr:PredicateMap. +:pom_002 rr:predicateMap :pm_002. +:pm_002 rr:constant gtfs:departureTime. +:pom_002 rr:objectMap :om_002. +:om_002 a rr:ObjectMap; + rml:reference "departure_time"; + rr:termType rr:Literal; + rr:datatype xsd:duration. +:pom_003 a rr:PredicateObjectMap. +:map_stoptimes_000 rr:predicateObjectMap :pom_003. +:pm_003 a rr:PredicateMap. +:pom_003 rr:predicateMap :pm_003. +:pm_003 rr:constant gtfs:stopSequence. +:pom_003 rr:objectMap :om_003. +:om_003 a rr:ObjectMap; + rml:reference "stop_sequence"; + rr:termType rr:Literal; + rr:datatype xsd:integer. +:pom_004 a rr:PredicateObjectMap. +:map_stoptimes_000 rr:predicateObjectMap :pom_004. +:pm_004 a rr:PredicateMap. +:pom_004 rr:predicateMap :pm_004. +:pm_004 rr:constant gtfs:headsign. +:pom_004 rr:objectMap :om_004. +:om_004 a rr:ObjectMap; + rml:reference "stop_headsign"; + rr:termType rr:Literal. +:pom_005 a rr:PredicateObjectMap. +:map_stoptimes_000 rr:predicateObjectMap :pom_005. +:pm_005 a rr:PredicateMap. +:pom_005 rr:predicateMap :pm_005. +:pm_005 rr:constant gtfs:pickupType. +:pom_005 rr:objectMap :om_005. +:om_005 a rr:ObjectMap; + rr:template "http://transport.linkeddata.es/resource/PickupType/{pickup_type}"; + rr:termType rr:IRI. +:pom_006 a rr:PredicateObjectMap. +:map_stoptimes_000 rr:predicateObjectMap :pom_006. +:pm_006 a rr:PredicateMap. +:pom_006 rr:predicateMap :pm_006. +:pm_006 rr:constant gtfs:dropOffType. +:pom_006 rr:objectMap :om_006. +:om_006 a rr:ObjectMap; + rr:template "http://transport.linkeddata.es/resource/DropOffType/{drop_off_type}"; + rr:termType rr:IRI. +:pom_007 a rr:PredicateObjectMap. +:map_stoptimes_000 rr:predicateObjectMap :pom_007. +:pm_007 a rr:PredicateMap. +:pom_007 rr:predicateMap :pm_007. +:pm_007 rr:constant gtfs:distanceTraveled. +:pom_007 rr:objectMap :om_007. +:om_007 a rr:ObjectMap; + rml:reference "shape_dist_traveled"; + rr:termType rr:Literal. +:pom_008 a rr:PredicateObjectMap. +:map_stoptimes_000 rr:predicateObjectMap :pom_008. +:pm_008 a rr:PredicateMap. +:pom_008 rr:predicateMap :pm_008. +:pm_008 rr:constant gtfs:trip. +:pom_008 rr:objectMap :om_008. +:pom_009 a rr:PredicateObjectMap. +:map_stoptimes_000 rr:predicateObjectMap :pom_009. +:pm_009 a rr:PredicateMap. +:pom_009 rr:predicateMap :pm_009. +:pm_009 rr:constant gtfs:stop. +:pom_009 rr:objectMap :om_009. +:rules_000 void:exampleResource :map_trips_000. +:map_trips_000 rml:logicalSource :source_001. +:source_001 a rml:LogicalSource; + rml:source "data/TRIPS.csv"; + rml:referenceFormulation ql:CSV. +:map_trips_000 a rr:TriplesMap; + rdfs:label "trips". +:s_001 a rr:SubjectMap. +:map_trips_000 rr:subjectMap :s_001. +:s_001 rr:template "http://transport.linkeddata.es/madrid/metro/trips/{trip_id}". +:pom_010 a rr:PredicateObjectMap. +:map_trips_000 rr:predicateObjectMap :pom_010. +:pm_010 a rr:PredicateMap. +:pom_010 rr:predicateMap :pm_010. +:pm_010 rr:constant rdf:type. +:pom_010 rr:objectMap :om_010. +:om_010 a rr:ObjectMap; + rr:constant "http://vocab.gtfs.org/terms#Trip"; + rr:termType rr:IRI. +:pom_011 a rr:PredicateObjectMap. +:map_trips_000 rr:predicateObjectMap :pom_011. +:pm_011 a rr:PredicateMap. +:pom_011 rr:predicateMap :pm_011. +:pm_011 rr:constant gtfs:headsign. +:pom_011 rr:objectMap :om_011. +:om_011 a rr:ObjectMap; + rml:reference "trip_headsign"; + rr:termType rr:Literal. +:pom_012 a rr:PredicateObjectMap. +:map_trips_000 rr:predicateObjectMap :pom_012. +:pm_012 a rr:PredicateMap. +:pom_012 rr:predicateMap :pm_012. +:pm_012 rr:constant gtfs:shortName. +:pom_012 rr:objectMap :om_012. +:om_012 a rr:ObjectMap; + rml:reference "trip_short_name"; + rr:termType rr:Literal. +:pom_013 a rr:PredicateObjectMap. +:map_trips_000 rr:predicateObjectMap :pom_013. +:pm_013 a rr:PredicateMap. +:pom_013 rr:predicateMap :pm_013. +:pm_013 rr:constant gtfs:direction. +:pom_013 rr:objectMap :om_013. +:om_013 a rr:ObjectMap; + rml:reference "direction_id"; + rr:termType rr:Literal. +:pom_014 a rr:PredicateObjectMap. +:map_trips_000 rr:predicateObjectMap :pom_014. +:pm_014 a rr:PredicateMap. +:pom_014 rr:predicateMap :pm_014. +:pm_014 rr:constant gtfs:block. +:pom_014 rr:objectMap :om_014. +:om_014 a rr:ObjectMap; + rml:reference "block_id"; + rr:termType rr:Literal. +:pom_015 a rr:PredicateObjectMap. +:map_trips_000 rr:predicateObjectMap :pom_015. +:pm_015 a rr:PredicateMap. +:pom_015 rr:predicateMap :pm_015. +:pm_015 rr:constant gtfs:wheelchairAccessible. +:pom_015 rr:objectMap :om_015. +:om_015 a rr:ObjectMap; + rr:template "http://transport.linkeddata.es/resource/WheelchairBoardingStatus/{wheelchair_accessible}"; + rr:termType rr:IRI. +:pom_016 a rr:PredicateObjectMap. +:map_trips_000 rr:predicateObjectMap :pom_016. +:pm_016 a rr:PredicateMap. +:pom_016 rr:predicateMap :pm_016. +:pm_016 rr:constant gtfs:service. +:pom_016 rr:objectMap :om_016, :om_017. +:pom_017 a rr:PredicateObjectMap. +:map_trips_000 rr:predicateObjectMap :pom_017. +:pm_017 a rr:PredicateMap. +:pom_017 rr:predicateMap :pm_017. +:pm_017 rr:constant gtfs:route. +:pom_017 rr:objectMap :om_018. +:pom_018 a rr:PredicateObjectMap. +:map_trips_000 rr:predicateObjectMap :pom_018. +:pm_018 a rr:PredicateMap. +:pom_018 rr:predicateMap :pm_018. +:pm_018 rr:constant gtfs:shape. +:pom_018 rr:objectMap :om_019. +:rules_000 void:exampleResource :map_routes_000. +:map_routes_000 rml:logicalSource :source_002. +:source_002 a rml:LogicalSource; + rml:source "data/ROUTES.csv"; + rml:referenceFormulation ql:CSV. +:map_routes_000 a rr:TriplesMap; + rdfs:label "routes". +:s_002 a rr:SubjectMap. +:map_routes_000 rr:subjectMap :s_002. +:s_002 rr:template "http://transport.linkeddata.es/madrid/metro/routes/{route_id}". +:pom_019 a rr:PredicateObjectMap. +:map_routes_000 rr:predicateObjectMap :pom_019. +:pm_019 a rr:PredicateMap. +:pom_019 rr:predicateMap :pm_019. +:pm_019 rr:constant rdf:type. +:pom_019 rr:objectMap :om_020. +:om_020 a rr:ObjectMap; + rr:constant "http://vocab.gtfs.org/terms#Route"; + rr:termType rr:IRI. +:pom_020 a rr:PredicateObjectMap. +:map_routes_000 rr:predicateObjectMap :pom_020. +:pm_020 a rr:PredicateMap. +:pom_020 rr:predicateMap :pm_020. +:pm_020 rr:constant gtfs:shortName. +:pom_020 rr:objectMap :om_021. +:om_021 a rr:ObjectMap; + rml:reference "route_short_name"; + rr:termType rr:Literal. +:pom_021 a rr:PredicateObjectMap. +:map_routes_000 rr:predicateObjectMap :pom_021. +:pm_021 a rr:PredicateMap. +:pom_021 rr:predicateMap :pm_021. +:pm_021 rr:constant gtfs:longName. +:pom_021 rr:objectMap :om_022. +:om_022 a rr:ObjectMap; + rml:reference "route_long_name"; + rr:termType rr:Literal. +:pom_022 a rr:PredicateObjectMap. +:map_routes_000 rr:predicateObjectMap :pom_022. +:pm_022 a rr:PredicateMap. +:pom_022 rr:predicateMap :pm_022. +:pm_022 rr:constant dct:description. +:pom_022 rr:objectMap :om_023. +:om_023 a rr:ObjectMap; + rml:reference "route_desc"; + rr:termType rr:Literal. +:pom_023 a rr:PredicateObjectMap. +:map_routes_000 rr:predicateObjectMap :pom_023. +:pm_023 a rr:PredicateMap. +:pom_023 rr:predicateMap :pm_023. +:pm_023 rr:constant gtfs:routeType. +:pom_023 rr:objectMap :om_024. +:om_024 a rr:ObjectMap; + rr:template "http://transport.linkeddata.es/resource/RouteType/{route_type}"; + rr:termType rr:IRI. +:pom_024 a rr:PredicateObjectMap. +:map_routes_000 rr:predicateObjectMap :pom_024. +:pm_024 a rr:PredicateMap. +:pom_024 rr:predicateMap :pm_024. +:pm_024 rr:constant gtfs:routeUrl. +:pom_024 rr:objectMap :om_025. +:om_025 a rr:ObjectMap; + rml:reference "route_url"; + rr:termType rr:IRI. +:pom_025 a rr:PredicateObjectMap. +:map_routes_000 rr:predicateObjectMap :pom_025. +:pm_025 a rr:PredicateMap. +:pom_025 rr:predicateMap :pm_025. +:pm_025 rr:constant gtfs:color. +:pom_025 rr:objectMap :om_026. +:om_026 a rr:ObjectMap; + rml:reference "route_color"; + rr:termType rr:Literal. +:pom_026 a rr:PredicateObjectMap. +:map_routes_000 rr:predicateObjectMap :pom_026. +:pm_026 a rr:PredicateMap. +:pom_026 rr:predicateMap :pm_026. +:pm_026 rr:constant gtfs:textColor. +:pom_026 rr:objectMap :om_027. +:om_027 a rr:ObjectMap; + rml:reference "route_text_color"; + rr:termType rr:Literal. +:pom_027 a rr:PredicateObjectMap. +:map_routes_000 rr:predicateObjectMap :pom_027. +:pm_027 a rr:PredicateMap. +:pom_027 rr:predicateMap :pm_027. +:pm_027 rr:constant gtfs:agency. +:pom_027 rr:objectMap :om_028. +:rules_000 void:exampleResource :map_agency_000. +:map_agency_000 rml:logicalSource :source_003. +:source_003 a rml:LogicalSource; + rml:source "data/AGENCY.csv"; + rml:referenceFormulation ql:CSV. +:map_agency_000 a rr:TriplesMap; + rdfs:label "agency". +:s_003 a rr:SubjectMap. +:map_agency_000 rr:subjectMap :s_003. +:s_003 rr:template "http://transport.linkeddata.es/madrid/agency/{agency_id}". +:pom_028 a rr:PredicateObjectMap. +:map_agency_000 rr:predicateObjectMap :pom_028. +:pm_028 a rr:PredicateMap. +:pom_028 rr:predicateMap :pm_028. +:pm_028 rr:constant rdf:type. +:pom_028 rr:objectMap :om_029. +:om_029 a rr:ObjectMap; + rr:constant "http://vocab.gtfs.org/terms#Agency"; + rr:termType rr:IRI. +:pom_029 a rr:PredicateObjectMap. +:map_agency_000 rr:predicateObjectMap :pom_029. +:pm_029 a rr:PredicateMap. +:pom_029 rr:predicateMap :pm_029. +:pm_029 rr:constant foaf:page. +:pom_029 rr:objectMap :om_030. +:om_030 a rr:ObjectMap; + rml:reference "agency_url"; + rr:termType rr:IRI. +:pom_030 a rr:PredicateObjectMap. +:map_agency_000 rr:predicateObjectMap :pom_030. +:pm_030 a rr:PredicateMap. +:pom_030 rr:predicateMap :pm_030. +:pm_030 rr:constant foaf:name. +:pom_030 rr:objectMap :om_031. +:om_031 a rr:ObjectMap; + rml:reference "agency_name"; + rr:termType rr:Literal. +:pom_031 a rr:PredicateObjectMap. +:map_agency_000 rr:predicateObjectMap :pom_031. +:pm_031 a rr:PredicateMap. +:pom_031 rr:predicateMap :pm_031. +:pm_031 rr:constant gtfs:timeZone. +:pom_031 rr:objectMap :om_032. +:om_032 a rr:ObjectMap; + rml:reference "agency_timezone"; + rr:termType rr:Literal. +:pom_032 a rr:PredicateObjectMap. +:map_agency_000 rr:predicateObjectMap :pom_032. +:pm_032 a rr:PredicateMap. +:pom_032 rr:predicateMap :pm_032. +:pm_032 rr:constant dct:language. +:pom_032 rr:objectMap :om_033. +:om_033 a rr:ObjectMap; + rml:reference "agency_lang"; + rr:termType rr:Literal. +:pom_033 a rr:PredicateObjectMap. +:map_agency_000 rr:predicateObjectMap :pom_033. +:pm_033 a rr:PredicateMap. +:pom_033 rr:predicateMap :pm_033. +:pm_033 rr:constant foaf:phone. +:pom_033 rr:objectMap :om_034. +:om_034 a rr:ObjectMap; + rml:reference "agency_phone"; + rr:termType rr:Literal. +:pom_034 a rr:PredicateObjectMap. +:map_agency_000 rr:predicateObjectMap :pom_034. +:pm_034 a rr:PredicateMap. +:pom_034 rr:predicateMap :pm_034. +:pm_034 rr:constant gtfs:fareUrl. +:pom_034 rr:objectMap :om_035. +:om_035 a rr:ObjectMap; + rml:reference "agency_fare_url"; + rr:termType rr:IRI. +:rules_000 void:exampleResource :map_stops_000. +:map_stops_000 rml:logicalSource :source_004. +:source_004 a rml:LogicalSource; + rml:source "data/STOPS.csv"; + rml:referenceFormulation ql:CSV. +:map_stops_000 a rr:TriplesMap; + rdfs:label "stops". +:s_004 a rr:SubjectMap. +:map_stops_000 rr:subjectMap :s_004. +:s_004 rr:template "http://transport.linkeddata.es/madrid/metro/stops/{stop_id}". +:pom_035 a rr:PredicateObjectMap. +:map_stops_000 rr:predicateObjectMap :pom_035. +:pm_035 a rr:PredicateMap. +:pom_035 rr:predicateMap :pm_035. +:pm_035 rr:constant rdf:type. +:pom_035 rr:objectMap :om_036. +:om_036 a rr:ObjectMap; + rr:constant "http://vocab.gtfs.org/terms#Stop"; + rr:termType rr:IRI. +:pom_036 a rr:PredicateObjectMap. +:map_stops_000 rr:predicateObjectMap :pom_036. +:pm_036 a rr:PredicateMap. +:pom_036 rr:predicateMap :pm_036. +:pm_036 rr:constant gtfs:code. +:pom_036 rr:objectMap :om_037. +:om_037 a rr:ObjectMap; + rml:reference "stop_code"; + rr:termType rr:Literal. +:pom_037 a rr:PredicateObjectMap. +:map_stops_000 rr:predicateObjectMap :pom_037. +:pm_037 a rr:PredicateMap. +:pom_037 rr:predicateMap :pm_037. +:pm_037 rr:constant dct:identifier. +:pom_037 rr:objectMap :om_038. +:om_038 a rr:ObjectMap; + rml:reference "stop_id"; + rr:termType rr:Literal. +:pom_038 a rr:PredicateObjectMap. +:map_stops_000 rr:predicateObjectMap :pom_038. +:pm_038 a rr:PredicateMap. +:pom_038 rr:predicateMap :pm_038. +:pm_038 rr:constant foaf:name. +:pom_038 rr:objectMap :om_039. +:om_039 a rr:ObjectMap; + rml:reference "stop_name"; + rr:termType rr:Literal. +:pom_039 a rr:PredicateObjectMap. +:map_stops_000 rr:predicateObjectMap :pom_039. +:pm_039 a rr:PredicateMap. +:pom_039 rr:predicateMap :pm_039. +:pm_039 rr:constant dct:description. +:pom_039 rr:objectMap :om_040. +:om_040 a rr:ObjectMap; + rml:reference "stop_desc"; + rr:termType rr:Literal. +:pom_040 a rr:PredicateObjectMap. +:map_stops_000 rr:predicateObjectMap :pom_040. +:pm_040 a rr:PredicateMap. +:pom_040 rr:predicateMap :pm_040. +:pm_040 rr:constant geo:lat. +:pom_040 rr:objectMap :om_041. +:om_041 a rr:ObjectMap; + rml:reference "stop_lat"; + rr:termType rr:Literal; + rr:datatype xsd:double. +:pom_041 a rr:PredicateObjectMap. +:map_stops_000 rr:predicateObjectMap :pom_041. +:pm_041 a rr:PredicateMap. +:pom_041 rr:predicateMap :pm_041. +:pm_041 rr:constant geo:long. +:pom_041 rr:objectMap :om_042. +:om_042 a rr:ObjectMap; + rml:reference "stop_lon"; + rr:termType rr:Literal; + rr:datatype xsd:double. +:pom_042 a rr:PredicateObjectMap. +:map_stops_000 rr:predicateObjectMap :pom_042. +:pm_042 a rr:PredicateMap. +:pom_042 rr:predicateMap :pm_042. +:pm_042 rr:constant gtfs:zone. +:pom_042 rr:objectMap :om_043. +:om_043 a rr:ObjectMap; + rml:reference "zone_id"; + rr:termType rr:Literal. +:pom_043 a rr:PredicateObjectMap. +:map_stops_000 rr:predicateObjectMap :pom_043. +:pm_043 a rr:PredicateMap. +:pom_043 rr:predicateMap :pm_043. +:pm_043 rr:constant foaf:page. +:pom_043 rr:objectMap :om_044. +:om_044 a rr:ObjectMap; + rml:reference "stop_url"; + rr:termType rr:IRI. +:pom_044 a rr:PredicateObjectMap. +:map_stops_000 rr:predicateObjectMap :pom_044. +:pm_044 a rr:PredicateMap. +:pom_044 rr:predicateMap :pm_044. +:pm_044 rr:constant gtfs:locationType. +:pom_044 rr:objectMap :om_045. +:om_045 a rr:ObjectMap; + rr:template "http://transport.linkeddata.es/resource/LocationType/{location_type}"; + rr:termType rr:IRI. +:pom_045 a rr:PredicateObjectMap. +:map_stops_000 rr:predicateObjectMap :pom_045. +:pm_045 a rr:PredicateMap. +:pom_045 rr:predicateMap :pm_045. +:pm_045 rr:constant gtfs:timeZone. +:pom_045 rr:objectMap :om_046. +:om_046 a rr:ObjectMap; + rml:reference "stop_timezone"; + rr:termType rr:Literal. +:pom_046 a rr:PredicateObjectMap. +:map_stops_000 rr:predicateObjectMap :pom_046. +:pm_046 a rr:PredicateMap. +:pom_046 rr:predicateMap :pm_046. +:pm_046 rr:constant gtfs:wheelchairAccessible. +:pom_046 rr:objectMap :om_047. +:om_047 a rr:ObjectMap; + rr:template "http://transport.linkeddata.es/resource/WheelchairBoardingStatus/{wheelchair_boarding}"; + rr:termType rr:IRI. +:pom_047 a rr:PredicateObjectMap. +:map_stops_000 rr:predicateObjectMap :pom_047. +:pm_047 a rr:PredicateMap. +:pom_047 rr:predicateMap :pm_047. +:pm_047 rr:constant gtfs:parentStation. +:pom_047 rr:objectMap :om_048. +:rules_000 void:exampleResource :map_services1_000. +:map_services1_000 rml:logicalSource :source_005. +:source_005 a rml:LogicalSource; + rml:source "data/CALENDAR.csv"; + rml:referenceFormulation ql:CSV. +:map_services1_000 a rr:TriplesMap; + rdfs:label "services1". +:s_005 a rr:SubjectMap. +:map_services1_000 rr:subjectMap :s_005. +:s_005 rr:template "http://transport.linkeddata.es/madrid/metro/services/{service_id}". +:pom_048 a rr:PredicateObjectMap. +:map_services1_000 rr:predicateObjectMap :pom_048. +:pm_048 a rr:PredicateMap. +:pom_048 rr:predicateMap :pm_048. +:pm_048 rr:constant rdf:type. +:pom_048 rr:objectMap :om_049. +:om_049 a rr:ObjectMap; + rr:constant "http://vocab.gtfs.org/terms#Service"; + rr:termType rr:IRI. +:pom_049 a rr:PredicateObjectMap. +:map_services1_000 rr:predicateObjectMap :pom_049. +:pm_049 a rr:PredicateMap. +:pom_049 rr:predicateMap :pm_049. +:pm_049 rr:constant gtfs:serviceRule. +:pom_049 rr:objectMap :om_050. +:rules_000 void:exampleResource :map_services2_000. +:map_services2_000 rml:logicalSource :source_006. +:source_006 a rml:LogicalSource; + rml:source "data/CALENDAR_DATES.csv"; + rml:referenceFormulation ql:CSV. +:map_services2_000 a rr:TriplesMap; + rdfs:label "services2". +:s_006 a rr:SubjectMap. +:map_services2_000 rr:subjectMap :s_006. +:s_006 rr:template "http://transport.linkeddata.es/madrid/metro/services/{service_id}". +:pom_050 a rr:PredicateObjectMap. +:map_services2_000 rr:predicateObjectMap :pom_050. +:pm_050 a rr:PredicateMap. +:pom_050 rr:predicateMap :pm_050. +:pm_050 rr:constant rdf:type. +:pom_050 rr:objectMap :om_051. +:om_051 a rr:ObjectMap; + rr:constant "http://vocab.gtfs.org/terms#Service"; + rr:termType rr:IRI. +:pom_051 a rr:PredicateObjectMap. +:map_services2_000 rr:predicateObjectMap :pom_051. +:pm_051 a rr:PredicateMap. +:pom_051 rr:predicateMap :pm_051. +:pm_051 rr:constant gtfs:serviceRule. +:pom_051 rr:objectMap :om_052. +:rules_000 void:exampleResource :map_calendar_date_rules_000. +:map_calendar_date_rules_000 rml:logicalSource :source_007. +:source_007 a rml:LogicalSource; + rml:source "data/CALENDAR_DATES.csv"; + rml:referenceFormulation ql:CSV. +:map_calendar_date_rules_000 a rr:TriplesMap; + rdfs:label "calendar_date_rules". +:s_007 a rr:SubjectMap. +:map_calendar_date_rules_000 rr:subjectMap :s_007. +:s_007 rr:template "http://transport.linkeddata.es/madrid/metro/calendar_date_rule/{service_id}-{date}". +:pom_052 a rr:PredicateObjectMap. +:map_calendar_date_rules_000 rr:predicateObjectMap :pom_052. +:pm_052 a rr:PredicateMap. +:pom_052 rr:predicateMap :pm_052. +:pm_052 rr:constant rdf:type. +:pom_052 rr:objectMap :om_053. +:om_053 a rr:ObjectMap; + rr:constant "http://vocab.gtfs.org/terms#CalendarDateRule"; + rr:termType rr:IRI. +:pom_053 a rr:PredicateObjectMap. +:map_calendar_date_rules_000 rr:predicateObjectMap :pom_053. +:pm_053 a rr:PredicateMap. +:pom_053 rr:predicateMap :pm_053. +:pm_053 rr:constant dct:date. +:pom_053 rr:objectMap :om_054. +:om_054 a rr:ObjectMap; + rml:reference "date"; + rr:termType rr:Literal; + rr:datatype xsd:date. +:pom_054 a rr:PredicateObjectMap. +:map_calendar_date_rules_000 rr:predicateObjectMap :pom_054. +:pm_054 a rr:PredicateMap. +:pom_054 rr:predicateMap :pm_054. +:pm_054 rr:constant gtfs:dateAddition. +:pom_054 rr:objectMap :om_055. +:om_055 a rr:ObjectMap; + rml:reference "exception_type"; + rr:termType rr:Literal; + rr:datatype xsd:boolean. +:rules_000 void:exampleResource :map_calendar_rules_000. +:map_calendar_rules_000 rml:logicalSource :source_008. +:source_008 a rml:LogicalSource; + rml:source "data/CALENDAR.csv"; + rml:referenceFormulation ql:CSV. +:map_calendar_rules_000 a rr:TriplesMap; + rdfs:label "calendar_rules". +:s_008 a rr:SubjectMap. +:map_calendar_rules_000 rr:subjectMap :s_008. +:s_008 rr:template "http://transport.linkeddata.es/madrid/metro/calendar_rules/{service_id}". +:pom_055 a rr:PredicateObjectMap. +:map_calendar_rules_000 rr:predicateObjectMap :pom_055. +:pm_055 a rr:PredicateMap. +:pom_055 rr:predicateMap :pm_055. +:pm_055 rr:constant rdf:type. +:pom_055 rr:objectMap :om_056. +:om_056 a rr:ObjectMap; + rr:constant "http://vocab.gtfs.org/terms#CalendarRule"; + rr:termType rr:IRI. +:pom_056 a rr:PredicateObjectMap. +:map_calendar_rules_000 rr:predicateObjectMap :pom_056. +:pm_056 a rr:PredicateMap. +:pom_056 rr:predicateMap :pm_056. +:pm_056 rr:constant gtfs:monday. +:pom_056 rr:objectMap :om_057. +:om_057 a rr:ObjectMap; + rml:reference "monday"; + rr:termType rr:Literal; + rr:datatype xsd:boolean. +:pom_057 a rr:PredicateObjectMap. +:map_calendar_rules_000 rr:predicateObjectMap :pom_057. +:pm_057 a rr:PredicateMap. +:pom_057 rr:predicateMap :pm_057. +:pm_057 rr:constant gtfs:tuesday. +:pom_057 rr:objectMap :om_058. +:om_058 a rr:ObjectMap; + rml:reference "tuesday"; + rr:termType rr:Literal; + rr:datatype xsd:boolean. +:pom_058 a rr:PredicateObjectMap. +:map_calendar_rules_000 rr:predicateObjectMap :pom_058. +:pm_058 a rr:PredicateMap. +:pom_058 rr:predicateMap :pm_058. +:pm_058 rr:constant gtfs:wednesday. +:pom_058 rr:objectMap :om_059. +:om_059 a rr:ObjectMap; + rml:reference "wednesday"; + rr:termType rr:Literal; + rr:datatype xsd:boolean. +:pom_059 a rr:PredicateObjectMap. +:map_calendar_rules_000 rr:predicateObjectMap :pom_059. +:pm_059 a rr:PredicateMap. +:pom_059 rr:predicateMap :pm_059. +:pm_059 rr:constant gtfs:thursday. +:pom_059 rr:objectMap :om_060. +:om_060 a rr:ObjectMap; + rml:reference "thursday"; + rr:termType rr:Literal; + rr:datatype xsd:boolean. +:pom_060 a rr:PredicateObjectMap. +:map_calendar_rules_000 rr:predicateObjectMap :pom_060. +:pm_060 a rr:PredicateMap. +:pom_060 rr:predicateMap :pm_060. +:pm_060 rr:constant gtfs:friday. +:pom_060 rr:objectMap :om_061. +:om_061 a rr:ObjectMap; + rml:reference "friday"; + rr:termType rr:Literal; + rr:datatype xsd:boolean. +:pom_061 a rr:PredicateObjectMap. +:map_calendar_rules_000 rr:predicateObjectMap :pom_061. +:pm_061 a rr:PredicateMap. +:pom_061 rr:predicateMap :pm_061. +:pm_061 rr:constant gtfs:saturday. +:pom_061 rr:objectMap :om_062. +:om_062 a rr:ObjectMap; + rml:reference "saturday"; + rr:termType rr:Literal; + rr:datatype xsd:boolean. +:pom_062 a rr:PredicateObjectMap. +:map_calendar_rules_000 rr:predicateObjectMap :pom_062. +:pm_062 a rr:PredicateMap. +:pom_062 rr:predicateMap :pm_062. +:pm_062 rr:constant gtfs:sunday. +:pom_062 rr:objectMap :om_063. +:om_063 a rr:ObjectMap; + rml:reference "sunday"; + rr:termType rr:Literal; + rr:datatype xsd:boolean. +:pom_063 a rr:PredicateObjectMap. +:map_calendar_rules_000 rr:predicateObjectMap :pom_063. +:pm_063 a rr:PredicateMap. +:pom_063 rr:predicateMap :pm_063. +:pm_063 rr:constant schema:startDate. +:pom_063 rr:objectMap :om_064. +:om_064 a rr:ObjectMap; + rml:reference "start_date"; + rr:termType rr:Literal; + rr:datatype xsd:date. +:pom_064 a rr:PredicateObjectMap. +:map_calendar_rules_000 rr:predicateObjectMap :pom_064. +:pm_064 a rr:PredicateMap. +:pom_064 rr:predicateMap :pm_064. +:pm_064 rr:constant schema:endDate. +:pom_064 rr:objectMap :om_065. +:om_065 a rr:ObjectMap; + rml:reference "end_date"; + rr:termType rr:Literal; + rr:datatype xsd:date. +:rules_000 void:exampleResource :map_feed_000. +:map_feed_000 rml:logicalSource :source_009. +:source_009 a rml:LogicalSource; + rml:source "data/FEED_INFO.csv"; + rml:referenceFormulation ql:CSV. +:map_feed_000 a rr:TriplesMap; + rdfs:label "feed". +:s_009 a rr:SubjectMap. +:map_feed_000 rr:subjectMap :s_009. +:s_009 rr:template "http://transport.linkeddata.es/madrid/metro/feed/{feed_publisher_name}". +:pom_065 a rr:PredicateObjectMap. +:map_feed_000 rr:predicateObjectMap :pom_065. +:pm_065 a rr:PredicateMap. +:pom_065 rr:predicateMap :pm_065. +:pm_065 rr:constant rdf:type. +:pom_065 rr:objectMap :om_066. +:om_066 a rr:ObjectMap; + rr:constant "http://vocab.gtfs.org/terms#Feed"; + rr:termType rr:IRI. +:pom_066 a rr:PredicateObjectMap. +:map_feed_000 rr:predicateObjectMap :pom_066. +:pm_066 a rr:PredicateMap. +:pom_066 rr:predicateMap :pm_066. +:pm_066 rr:constant dct:publisher. +:pom_066 rr:objectMap :om_067. +:om_067 a rr:ObjectMap; + rml:reference "feed_publisher_name"; + rr:termType rr:Literal. +:pom_067 a rr:PredicateObjectMap. +:map_feed_000 rr:predicateObjectMap :pom_067. +:pm_067 a rr:PredicateMap. +:pom_067 rr:predicateMap :pm_067. +:pm_067 rr:constant foaf:page. +:pom_067 rr:objectMap :om_068. +:om_068 a rr:ObjectMap; + rml:reference "feed_publisher_url"; + rr:termType rr:IRI. +:pom_068 a rr:PredicateObjectMap. +:map_feed_000 rr:predicateObjectMap :pom_068. +:pm_068 a rr:PredicateMap. +:pom_068 rr:predicateMap :pm_068. +:pm_068 rr:constant dct:language. +:pom_068 rr:objectMap :om_069. +:om_069 a rr:ObjectMap; + rml:reference "feed_lang"; + rr:termType rr:Literal. +:pom_069 a rr:PredicateObjectMap. +:map_feed_000 rr:predicateObjectMap :pom_069. +:pm_069 a rr:PredicateMap. +:pom_069 rr:predicateMap :pm_069. +:pm_069 rr:constant schema:startDate. +:pom_069 rr:objectMap :om_070. +:om_070 a rr:ObjectMap; + rml:reference "feed_start_date"; + rr:termType rr:Literal; + rr:datatype xsd:date. +:pom_070 a rr:PredicateObjectMap. +:map_feed_000 rr:predicateObjectMap :pom_070. +:pm_070 a rr:PredicateMap. +:pom_070 rr:predicateMap :pm_070. +:pm_070 rr:constant schema:endDate. +:pom_070 rr:objectMap :om_071. +:om_071 a rr:ObjectMap; + rml:reference "feed_end_date"; + rr:termType rr:Literal; + rr:datatype xsd:date. +:pom_071 a rr:PredicateObjectMap. +:map_feed_000 rr:predicateObjectMap :pom_071. +:pm_071 a rr:PredicateMap. +:pom_071 rr:predicateMap :pm_071. +:pm_071 rr:constant schema:version. +:pom_071 rr:objectMap :om_072. +:om_072 a rr:ObjectMap; + rml:reference "feed_version"; + rr:termType rr:Literal. +:rules_000 void:exampleResource :map_shapes_000. +:map_shapes_000 rml:logicalSource :source_010. +:source_010 a rml:LogicalSource; + rml:source "data/SHAPES.csv"; + rml:referenceFormulation ql:CSV. +:map_shapes_000 a rr:TriplesMap; + rdfs:label "shapes". +:s_010 a rr:SubjectMap. +:map_shapes_000 rr:subjectMap :s_010. +:s_010 rr:template "http://transport.linkeddata.es/madrid/metro/shape/{shape_id}". +:pom_072 a rr:PredicateObjectMap. +:map_shapes_000 rr:predicateObjectMap :pom_072. +:pm_072 a rr:PredicateMap. +:pom_072 rr:predicateMap :pm_072. +:pm_072 rr:constant rdf:type. +:pom_072 rr:objectMap :om_073. +:om_073 a rr:ObjectMap; + rr:constant "http://vocab.gtfs.org/terms#Shape"; + rr:termType rr:IRI. +:pom_073 a rr:PredicateObjectMap. +:map_shapes_000 rr:predicateObjectMap :pom_073. +:pm_073 a rr:PredicateMap. +:pom_073 rr:predicateMap :pm_073. +:pm_073 rr:constant gtfs:shapePoint. +:pom_073 rr:objectMap :om_074. +:om_074 a rr:ObjectMap; + rr:template "http://transport.linkeddata.es/madrid/metro/shape_point/{shape_id}-{shape_pt_sequence}"; + rr:termType rr:Literal. +:rules_000 void:exampleResource :map_shapePoints_000. +:map_shapePoints_000 rml:logicalSource :source_011. +:source_011 a rml:LogicalSource; + rml:source "data/SHAPES.csv"; + rml:referenceFormulation ql:CSV. +:map_shapePoints_000 a rr:TriplesMap; + rdfs:label "shapePoints". +:s_011 a rr:SubjectMap. +:map_shapePoints_000 rr:subjectMap :s_011. +:s_011 rr:template "http://transport.linkeddata.es/madrid/metro/shape_point/{shape_id}-{shape_pt_sequence}". +:pom_074 a rr:PredicateObjectMap. +:map_shapePoints_000 rr:predicateObjectMap :pom_074. +:pm_074 a rr:PredicateMap. +:pom_074 rr:predicateMap :pm_074. +:pm_074 rr:constant rdf:type. +:pom_074 rr:objectMap :om_075. +:om_075 a rr:ObjectMap; + rr:constant "http://vocab.gtfs.org/terms#ShapePoint"; + rr:termType rr:IRI. +:pom_075 a rr:PredicateObjectMap. +:map_shapePoints_000 rr:predicateObjectMap :pom_075. +:pm_075 a rr:PredicateMap. +:pom_075 rr:predicateMap :pm_075. +:pm_075 rr:constant geo:lat. +:pom_075 rr:objectMap :om_076. +:om_076 a rr:ObjectMap; + rml:reference "shape_pt_lat"; + rr:termType rr:Literal; + rr:datatype xsd:double. +:pom_076 a rr:PredicateObjectMap. +:map_shapePoints_000 rr:predicateObjectMap :pom_076. +:pm_076 a rr:PredicateMap. +:pom_076 rr:predicateMap :pm_076. +:pm_076 rr:constant geo:long. +:pom_076 rr:objectMap :om_077. +:om_077 a rr:ObjectMap; + rml:reference "shape_pt_lon"; + rr:termType rr:Literal; + rr:datatype xsd:double. +:pom_077 a rr:PredicateObjectMap. +:map_shapePoints_000 rr:predicateObjectMap :pom_077. +:pm_077 a rr:PredicateMap. +:pom_077 rr:predicateMap :pm_077. +:pm_077 rr:constant gtfs:pointSequence. +:pom_077 rr:objectMap :om_078. +:om_078 a rr:ObjectMap; + rml:reference "shape_pt_sequence"; + rr:termType rr:Literal. +:pom_078 a rr:PredicateObjectMap. +:map_shapePoints_000 rr:predicateObjectMap :pom_078. +:pm_078 a rr:PredicateMap. +:pom_078 rr:predicateMap :pm_078. +:pm_078 rr:constant gtfs:distanceTraveled. +:pom_078 rr:objectMap :om_079. +:om_079 a rr:ObjectMap; + rml:reference "shape_dist_traveled"; + rr:termType rr:Literal. +:rules_000 void:exampleResource :map_frequencies_000. +:map_frequencies_000 rml:logicalSource :source_012. +:source_012 a rml:LogicalSource; + rml:source "data/FREQUENCIES.csv"; + rml:referenceFormulation ql:CSV. +:map_frequencies_000 a rr:TriplesMap; + rdfs:label "frequencies". +:s_012 a rr:SubjectMap. +:map_frequencies_000 rr:subjectMap :s_012. +:s_012 rr:template "http://transport.linkeddata.es/madrid/metro/frequency/{trip_id}-{start_time}". +:pom_079 a rr:PredicateObjectMap. +:map_frequencies_000 rr:predicateObjectMap :pom_079. +:pm_079 a rr:PredicateMap. +:pom_079 rr:predicateMap :pm_079. +:pm_079 rr:constant rdf:type. +:pom_079 rr:objectMap :om_080. +:om_080 a rr:ObjectMap; + rr:constant "http://vocab.gtfs.org/terms#Frequency"; + rr:termType rr:IRI. +:pom_080 a rr:PredicateObjectMap. +:map_frequencies_000 rr:predicateObjectMap :pom_080. +:pm_080 a rr:PredicateMap. +:pom_080 rr:predicateMap :pm_080. +:pm_080 rr:constant gtfs:startTime. +:pom_080 rr:objectMap :om_081. +:om_081 a rr:ObjectMap; + rml:reference "start_time"; + rr:termType rr:Literal. +:pom_081 a rr:PredicateObjectMap. +:map_frequencies_000 rr:predicateObjectMap :pom_081. +:pm_081 a rr:PredicateMap. +:pom_081 rr:predicateMap :pm_081. +:pm_081 rr:constant gtfs:endTime. +:pom_081 rr:objectMap :om_082. +:om_082 a rr:ObjectMap; + rml:reference "end_time"; + rr:termType rr:Literal. +:pom_082 a rr:PredicateObjectMap. +:map_frequencies_000 rr:predicateObjectMap :pom_082. +:pm_082 a rr:PredicateMap. +:pom_082 rr:predicateMap :pm_082. +:pm_082 rr:constant gtfs:headwaySeconds. +:pom_082 rr:objectMap :om_083. +:om_083 a rr:ObjectMap; + rml:reference "headway_secs"; + rr:termType rr:Literal; + rr:datatype xsd:integer. +:pom_083 a rr:PredicateObjectMap. +:map_frequencies_000 rr:predicateObjectMap :pom_083. +:pm_083 a rr:PredicateMap. +:pom_083 rr:predicateMap :pm_083. +:pm_083 rr:constant gtfs:exactTimes. +:pom_083 rr:objectMap :om_084. +:om_084 a rr:ObjectMap; + rml:reference "exact_times"; + rr:termType rr:Literal; + rr:datatype xsd:boolean. +:pom_084 a rr:PredicateObjectMap. +:map_frequencies_000 rr:predicateObjectMap :pom_084. +:pm_084 a rr:PredicateMap. +:pom_084 rr:predicateMap :pm_084. +:pm_084 rr:constant gtfs:trip. +:pom_084 rr:objectMap :om_085. +:om_008 a rr:ObjectMap; + rr:parentTriplesMap :map_trips_000; + rr:joinCondition :jc_000. +:jc_000 rr:child "trip_id"; + rr:parent "trip_id". +:om_009 a rr:ObjectMap; + rr:parentTriplesMap :map_stops_000; + rr:joinCondition :jc_001. +:jc_001 rr:child "stop_id"; + rr:parent "stop_id". +:om_016 a rr:ObjectMap; + rr:parentTriplesMap :map_services1_000; + rr:joinCondition :jc_002. +:jc_002 rr:child "service_id"; + rr:parent "service_id". +:om_017 a rr:ObjectMap; + rr:parentTriplesMap :map_services2_000; + rr:joinCondition :jc_003. +:jc_003 rr:child "service_id"; + rr:parent "service_id". +:om_018 a rr:ObjectMap; + rr:parentTriplesMap :map_routes_000; + rr:joinCondition :jc_004. +:jc_004 rr:child "route_id"; + rr:parent "route_id". +:om_019 a rr:ObjectMap; + rr:parentTriplesMap :map_shapes_000; + rr:joinCondition :jc_005. +:jc_005 rr:child "shape_id"; + rr:parent "shape_id". +:om_028 a rr:ObjectMap; + rr:parentTriplesMap :map_agency_000; + rr:joinCondition :jc_006. +:jc_006 rr:child "agency_id"; + rr:parent "agency_id". +:om_048 a rr:ObjectMap; + rr:parentTriplesMap :map_stops_000; + rr:joinCondition :jc_007. +:jc_007 rr:child "parent_station"; + rr:parent "stop_id". +:om_050 a rr:ObjectMap; + rr:parentTriplesMap :map_calendar_rules_000; + rr:joinCondition :jc_008. +:jc_008 rr:child "service_id"; + rr:parent "service_id". +:om_052 a rr:ObjectMap; + rr:parentTriplesMap :map_calendar_date_rules_000; + rr:joinCondition :jc_009. +:jc_009 rr:child "service_id"; + rr:parent "service_id". +:om_085 a rr:ObjectMap; + rr:parentTriplesMap :map_trips_000; + rr:joinCondition :jc_010. +:jc_010 rr:child "trip_id"; + rr:parent "trip_id". \ No newline at end of file diff --git a/docs/build.md b/docs/build.md index 273e0de..cc6ede4 100644 --- a/docs/build.md +++ b/docs/build.md @@ -35,6 +35,7 @@ and for *type checking*. To run these tests specifically: ``` +mypy --install-types mypy kglab/*.py ``` diff --git a/docs/depend.md b/docs/depend.md index b9bbb84..8501fcd 100644 --- a/docs/depend.md +++ b/docs/depend.md @@ -2,7 +2,7 @@ API by Adnen Kadri from the Noun Project -The **kglab** package requires [Python 3.6+](https://www.python.org/downloads/). +The **kglab** package requires [Python 3.7+](https://www.python.org/downloads/). ## Base Support @@ -17,6 +17,7 @@ include: - [gensim](https://radimrehurek.com/gensim/) - [icecream](https://github.com/gruns/icecream) - [matplotlib](https://matplotlib.org/) +- [Morph-KGC](https://github.com/oeg-upm/morph-kgc) - [NetworkX](https://networkx.org/) - [NumPy](https://numpy.org/)[^2] - [OWL-RL](https://owl-rl.readthedocs.io/) diff --git a/examples/ex2_1.ipynb b/examples/ex2_1.ipynb new file mode 100644 index 0000000..8d074ce --- /dev/null +++ b/examples/ex2_1.ipynb @@ -0,0 +1,167 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# for use in tutorial and development; do not include this `sys.path` change in production:\n", + "import sys ; sys.path.insert(0, \"../\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**WIP** during integration" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Load data via Morph-KGC\n", + "\n", + "> [`morph-kgc`](https://github.com/oeg-upm/morph-kgc) is an engine that constructs RDF knowledge graphs from heterogeneous data sources with R2RML and RML mapping languages. Morph-KGC is built on top of pandas and it leverages mapping partitions to significantly reduce execution times and memory consumption for large data sources.\n", + "\n", + "For documentation see " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, let's set up our recipe KG:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from icecream import ic\n", + "from os.path import dirname\n", + "import kglab\n", + "import os\n", + "\n", + "namespaces = {\n", + " \"nom\": \"http://example.org/#\",\n", + " \"wtm\": \"http://purl.org/heals/food/\",\n", + " \"ind\": \"http://purl.org/heals/ingredient/\",\n", + " \"skos\": \"http://www.w3.org/2004/02/skos/core#\",\n", + " }\n", + "\n", + "kg = kglab.KnowledgeGraph(\n", + " name = \"A recipe KG example based on Food.com\",\n", + " base_uri = \"https://www.food.com/recipe/\",\n", + " namespaces = namespaces,\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's use `morph-kgc` to load a file, based on a given _configuration_ for the RML transform.\n", + "\n", + "Data can be loaded from multiple text formats, and also through different ORMs (e.g., `SQLAlchemy`), via a config file with extension `.ini`" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ic| dirname(os.getcwd()): '/Users/paco/src/kglab'\n", + "ic| config_path: '/Users/paco/src/kglab/dat/morph-default-config.ini'\n" + ] + }, + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: './dat/recipes.ttl'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/var/folders/dp/q971mmvs2m98ypxb3sb0xmxc0000gn/T/ipykernel_45097/3189966681.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0mic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconfig_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m \u001b[0mkg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmaterialize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconfig_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/src/kglab/kglab/kglab.py\u001b[0m in \u001b[0;36mmaterialize\u001b[0;34m(self, config)\u001b[0m\n\u001b[1;32m 904\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_g\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 905\u001b[0m \u001b[0;31m# generate the triples and load them to an RDFlib graph\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 906\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_g\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmorph_kgc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmaterialize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 907\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 908\u001b[0m \u001b[0;31m# merge\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/src/subgraph/venv/lib/python3.7/site-packages/morph_kgc/__init__.py\u001b[0m in \u001b[0;36mmaterialize\u001b[0;34m(config)\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0msetup_oracle\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 26\u001b[0;31m \u001b[0mmappings_df\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mretrieve_mappings\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 27\u001b[0m \u001b[0msubject_maps_df\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_subject_maps\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmappings_df\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 28\u001b[0m \u001b[0mmapping_partitions\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mgroup\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgroup\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmappings_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mby\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'mapping_partition'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/src/subgraph/venv/lib/python3.7/site-packages/morph_kgc/engine.py\u001b[0m in \u001b[0;36mretrieve_mappings\u001b[0;34m(config)\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 28\u001b[0;31m \u001b[0mmappings\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmappings_parser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse_mappings\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 29\u001b[0m \u001b[0mlogging\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Mappings processed in '\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mget_delta_time\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstart_time\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m' seconds.'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 30\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/src/subgraph/venv/lib/python3.7/site-packages/morph_kgc/mapping/mapping_parser.py\u001b[0m in \u001b[0;36mparse_mappings\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 293\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mparse_mappings\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 295\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_from_r2_rml\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 296\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_preprocess_mappings\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 297\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_infer_datatypes\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/src/subgraph/venv/lib/python3.7/site-packages/morph_kgc/mapping/mapping_parser.py\u001b[0m in \u001b[0;36m_get_from_r2_rml\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 323\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 324\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0msection_name\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_data_sources_sections\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 325\u001b[0;31m \u001b[0mdata_source_mappings_df\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_parse_data_source_mapping_files\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msection_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 326\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmappings_df\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmappings_df\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata_source_mappings_df\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 327\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/src/subgraph/venv/lib/python3.7/site-packages/morph_kgc/mapping/mapping_parser.py\u001b[0m in \u001b[0;36m_parse_data_source_mapping_files\u001b[0;34m(self, section_name)\u001b[0m\n\u001b[1;32m 339\u001b[0m \u001b[0mmapping_graph\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrdflib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mGraph\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 340\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 341\u001b[0;31m \u001b[0mmapping_file_paths\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_mappings_files\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msection_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 342\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 343\u001b[0m \u001b[0;31m# load mapping rules to the graph\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/src/subgraph/venv/lib/python3.7/site-packages/morph_kgc/config.py\u001b[0m in \u001b[0;36mget_mappings_files\u001b[0;34m(self, source_section)\u001b[0m\n\u001b[1;32m 337\u001b[0m \u001b[0mmapping_file_paths\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmapping_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 338\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 339\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mFileNotFoundError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merrno\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mENOENT\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrerror\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merrno\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mENOENT\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmapping_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 340\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 341\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mmapping_file_paths\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: './dat/recipes.ttl'" + ] + } + ], + "source": [ + "#data_path = dirname(os.getcwd()) + \"/dat/recipes.ttl\"\n", + "config_path = dirname(os.getcwd()) + \"/dat/morph-default-config.ini\"\n", + "\n", + "ic(dirname(os.getcwd()))\n", + "\n", + "# config = f\"\"\"[DataSource1]\n", + "# mappings={datapath}\"\"\"\n", + "\n", + "ic(config_path)\n", + "\n", + "kg.materialize(config_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's try to query." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "sparql = \"\"\"\n", + " SELECT ?subject ?object\n", + " WHERE {\n", + " ?subject rdf:type wtm:Recipe .\n", + " ?subject wtm:hasIngredient ?object .\n", + " }\n", + " \"\"\"\n", + "\n", + "for row in kg._g.query(sparql):\n", + " ic(row.asdict())" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/examples/ex6_2.ipynb b/examples/ex6_2.ipynb deleted file mode 100644 index 24a4f14..0000000 --- a/examples/ex6_2.ipynb +++ /dev/null @@ -1,117 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# for use in tutorial and development; do not include this `sys.path` change in production:\n", - "import sys ; sys.path.insert(0, \"../\")\n", - "\n", - "from icecream import ic" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Load data via Morph-KGC\n", - "\n", - "> [`morph-kgc`](https://github.com/oeg-upm/morph-kgc) is an engine that constructs RDF knowledge graphs from heterogeneous data sources with R2RML and RML mapping languages. Morph-KGC is built on top of pandas and it leverages mapping partitions to significantly reduce execution times and memory consumption for large data sources.\n", - "\n", - "Data can be loaded from multiple text format but also via different ORMs (i.e. SQLAlchemy), via a config file with extension `.ini`.\n", - "\n", - "For documentation see [USAGE](https://github.com/oeg-upm/Morph-KGC/wiki/Usage)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "First, let's load our recipe KG:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from os.path import dirname\n", - "import kglab\n", - "import os\n", - "\n", - "namespaces = {\n", - " \"nom\": \"http://example.org/#\",\n", - " \"wtm\": \"http://purl.org/heals/food/\",\n", - " \"ind\": \"http://purl.org/heals/ingredient/\",\n", - " \"skos\": \"http://www.w3.org/2004/02/skos/core#\",\n", - " }\n", - "\n", - "kg = kglab.KnowledgeGraph(\n", - " name = \"A recipe KG example based on Food.com\",\n", - " base_uri = \"https://www.food.com/recipe/\",\n", - " namespaces = namespaces,\n", - " )\n", - "\n", - "datapath = dirname(os.getcwd()) + \"/dat/recipes.ttl\"\n", - "configpath = dirname(os.getcwd()) + \"/dat/morph-default-config.ini\"\n", - "\n", - "# config = f\"\"\"[DataSource1]\n", - "# mappings={datapath}\"\"\"\n", - "\n", - "print(configpath)\n", - "kg.materialize(configpath)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's try to query." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "sparql = \"\"\"\n", - " SELECT ?subject ?object\n", - " WHERE {\n", - " ?subject rdf:type wtm:Recipe .\n", - " ?subject wtm:hasIngredient ?object .\n", - " }\n", - " \"\"\"\n", - "\n", - "for row in kg._g.query(sparql):\n", - " ic(row.asdict())" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/kglab/decorators.py b/kglab/decorators.py index d81197b..fca7742 100644 --- a/kglab/decorators.py +++ b/kglab/decorators.py @@ -1,16 +1,20 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -###################################################################### -# Decorator to handle multiple paths +""" +Decorators: + + * `@multifile` handles specifying multiple paths in serialization methods. +""" from copy import deepcopy from functools import wraps from glob import glob -from icecream import ic # type: ignore # pylint: disable=W0611,E0401 import inspect import pathlib import typing + +from icecream import ic # type: ignore # pylint: disable=W0611,E0401 import urlpath # type: ignore # pylint: disable=E0401 diff --git a/kglab/esp.py b/kglab/esp.py index 5f45114..1788604 100644 --- a/kglab/esp.py +++ b/kglab/esp.py @@ -4,18 +4,21 @@ # mypy: ignore-errors # pylint: disable-all -###################################################################### -## Evolunationary Shape Prediction +""" +WIP: Evolunationary Shape Prediction +""" + +from collections import deque +import random +import typing + +import pandas as pd # type: ignore +import rdflib # type: ignore from kglab import KnowledgeGraph, Measure, Subgraph from kglab.pkg_types import RDF_Node, SPARQL_Bindings import kglab.util -from collections import deque -import pandas as pd # type: ignore -import random -import rdflib # type: ignore -import typing SerializedEvoEdge = typing.Tuple[int, int] SerializedEvoNode = typing.Tuple[int, typing.List[SerializedEvoEdge]] diff --git a/kglab/external_import.py b/kglab/external_import.py index 157ad47..f003368 100644 --- a/kglab/external_import.py +++ b/kglab/external_import.py @@ -4,24 +4,22 @@ """ Provide support for importing RDF data from multiple existing graph -databases, preferably triplestores: +databases, including: * neo4j - * Ontotext-GraphDB - * Blazegraph - * DataStax """ import json +import urllib.parse + import requests # pylint: disable=E0401 import rdflib # type: ignore # pylint: disable=E0401 -import urllib.parse def import_from_neo4j ( username: str, password: str, - dbname: str, + dbname: str, # pylint: disable=W0613 host: str = "localhost", port: str = "7474" ) -> rdflib.Graph: @@ -62,8 +60,8 @@ def import_from_neo4j ( # construct the export URL u = urllib.parse.urlparse(host) - netloc = "{}:{}".format(u.netloc, port) - path = "/rdf/{}/cypher".format(dbname) + netloc = f"{u.netloc}:{port}" + path = "/rdf/{dbname}/cypher" url = urllib.parse.urlunparse((u.scheme, netloc, path, "", "", "",)) try: diff --git a/kglab/gpviz.py b/kglab/gpviz.py index b7f19d3..5f380d1 100644 --- a/kglab/gpviz.py +++ b/kglab/gpviz.py @@ -1,13 +1,18 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +""" +Visualization of SPARQL queries. +""" + import collections import copy +import typing + import pyvis # type: ignore # pylint: disable=E0401 import rdflib.paths # type: ignore # pylint: disable=E0401 import rdflib.plugins.sparql # type: ignore # pylint: disable=E0401 import rdflib.term # type: ignore # pylint: disable=E0401 -import typing class GPViz: diff --git a/kglab/kglab.py b/kglab/kglab.py index aa476e0..8522d30 100644 --- a/kglab/kglab.py +++ b/kglab/kglab.py @@ -1,42 +1,50 @@ -""" KG Lab main class +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# see license https://github.com/DerwenAI/kglab#license-and-copyright -see license https://github.com/DerwenAI/kglab#license-and-copyright """ -###################################################################### -## kglab - core classes +kglab main class definition. +""" -from kglab.decorators import multifile -from kglab.pkg_types import PathLike, IOPathLike, GraphLike, RDF_Node -from kglab.gpviz import GPViz -from kglab.util import get_gpu_count -from kglab.version import _check_version -_check_version() +import codecs +import datetime +import io +import json +import pathlib +import traceback +import typing -import rdflib # type: ignore # pylint: disable=E0401 -import rdflib.plugin # type: ignore # pylint: disable=E0401 -import rdflib.plugins.parsers.notation3 as rdf_n3 # type: ignore # pylint: disable=E0401 -#rdflib.plugin.register("json-ld", rdflib.plugin.Parser, "rdflib_jsonld.parser", "JsonLDParser") -#rdflib.plugin.register("json-ld", rdflib.plugin.Serializer, "rdflib_jsonld.serializer", "JsonLDSerializer") +### third-parties bindings from icecream import ic # type: ignore # pylint: disable=E0401 import chocolate # type: ignore # pylint: disable=E0401 -import codecs import csvwlib # type: ignore # pylint: disable=E0401 -import datetime import dateutil.parser as dup # pylint: disable=E0401 -import io -import json +import morph_kgc # type: ignore # pylint: disable=E0401 import owlrl # type: ignore # pylint: disable=E0401 import pandas as pd # type: ignore # pylint: disable=E0401 -import pathlib import pyshacl # type: ignore # pylint: disable=E0401 import pyvis # type: ignore # pylint: disable=E0401 -import traceback -import typing import urlpath # type: ignore # pylint: disable=E0401 -### third-parties bindings -import morph_kgc +import rdflib # type: ignore # pylint: disable=E0401 +import rdflib.plugin # type: ignore # pylint: disable=E0401 +import rdflib.plugins.parsers.notation3 as rdf_n3 # type: ignore # pylint: disable=E0401 +#rdflib.plugin.register("json-ld", rdflib.plugin.Parser, "rdflib_jsonld.parser", "JsonLDParser") +#rdflib.plugin.register("json-ld", rdflib.plugin.Serializer, "rdflib_jsonld.serializer", "JsonLDSerializer") + + +###################################################################### +## kglab - core classes + +from kglab.decorators import multifile +from kglab.pkg_types import PathLike, IOPathLike, GraphLike, RDF_Node +from kglab.gpviz import GPViz +from kglab.util import get_gpu_count +from kglab.version import _check_version + + +_check_version() if get_gpu_count() > 0: import cudf # type: ignore # pylint: disable=E0401 @@ -429,7 +437,7 @@ def _check_format ( try: rdflib.plugin.get(format, rdflib.serializer.Serializer) except Exception: - raise TypeError("unknown format: {}".format(format)) + raise TypeError("unknown format: {format}") @classmethod @@ -671,7 +679,7 @@ def save_rdf_text ( if not base and self.base_uri: base = self.base_uri - return self._g.serialize( + return self._g.serialize( # type: ignore destination=None, format=format, base=base, @@ -804,7 +812,10 @@ def load_parquet ( ) df.apply( - lambda row: self._g.parse(data="{} {} {} .".format(row[0], row[1], row[2]), format="ttl"), + lambda row: self._g.parse( + data="{} {} {} .".format(row[0], row[1], row[2]), + format="ttl", + ), axis=1, ) @@ -877,6 +888,34 @@ def load_csv ( return self.load_rdf_text(new_rdf) + def materialize ( + self, + config: str, + ) -> "KnowledgeGraph": + """ +Binding to the morph-kgc `materialize()` method. + + config: +path to a morph-kgc configuration file; see + + returns: +this `KnowledgeGraph` object – used for method chaining + """ + if len(self._g) == 0: + # generate the triples and load them to an RDFlib graph + self._g = morph_kgc.materialize(config) + else: + # merge + # for caveats about merging this way: + # + self._g.parse(morph_kgc.materialize(config)) + + return self + + + ###################################################################### + ## Roam Research integration + def _walk_roam_graph ( self, obj: dict, @@ -976,58 +1015,6 @@ def import_roam ( return uid_list - def n3fy ( - self, - node: RDF_Node, - *, - pythonify: bool = True, - ) -> typing.Any: - """ -Wrapper for RDFlib [`n3()`](https://rdflib.readthedocs.io/en/stable/utilities.html?highlight=n3#serializing-a-single-term-to-n3) and [`toPython()`](https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html?highlight=toPython#rdflib.Variable.toPython) to serialize a node into a human-readable representation using N3 format. - - node: -must be a [`rdflib.term.Node`](https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html?highlight=Node#rdflib.term.Node) - - pythonify: -flag to force instances of [`rdflib.term.Literal`](https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html?highlight=Literal#rdflib.term.Identifier) to their Python literal representation - - returns: -text (or Python objects) for the serialized node - """ - if pythonify and isinstance(node, rdflib.term.Literal): - serialized = node.toPython() - else: - serialized = node.n3(self._g.namespace_manager) - - return serialized - - - def n3fy_row ( - self, - row_dict: dict, - *, - pythonify: bool = True, - ) -> dict: - """ -Wrapper for RDFlib [`n3()`](https://rdflib.readthedocs.io/en/stable/utilities.html?highlight=n3#serializing-a-single-term-to-n3) and [`toPython()`](https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html?highlight=toPython#rdflib.Variable.toPython) to serialize one row of a result set from a SPARQL query into a human-readable representation for each term using N3 format. - - row_dict: -one row of a SPARQL query results, as a `dict` - - pythonify: -flag to force instances of [`rdflib.term.Literal`](https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html?highlight=Literal#rdflib.term.Identifier) to their Python literal representation - - returns: -a dictionary of serialized row bindings - """ - bindings = { - k: self.n3fy(v, pythonify=pythonify) - for k, v in row_dict.items() - } - - return bindings - - ###################################################################### ## SPARQL queries @@ -1124,6 +1111,58 @@ def visualize_query ( return GPViz(sparql, self._ns).visualize_query(notebook=notebook) + def n3fy ( + self, + node: RDF_Node, + *, + pythonify: bool = True, + ) -> typing.Any: + """ +Wrapper for RDFlib [`n3()`](https://rdflib.readthedocs.io/en/stable/utilities.html?highlight=n3#serializing-a-single-term-to-n3) and [`toPython()`](https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html?highlight=toPython#rdflib.Variable.toPython) to serialize a node into a human-readable representation using N3 format. + + node: +must be a [`rdflib.term.Node`](https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html?highlight=Node#rdflib.term.Node) + + pythonify: +flag to force instances of [`rdflib.term.Literal`](https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html?highlight=Literal#rdflib.term.Identifier) to their Python literal representation + + returns: +text (or Python objects) for the serialized node + """ + if pythonify and isinstance(node, rdflib.term.Literal): + serialized = node.toPython() + else: + serialized = node.n3(self._g.namespace_manager) # type: ignore + + return serialized + + + def n3fy_row ( + self, + row_dict: dict, + *, + pythonify: bool = True, + ) -> dict: + """ +Wrapper for RDFlib [`n3()`](https://rdflib.readthedocs.io/en/stable/utilities.html?highlight=n3#serializing-a-single-term-to-n3) and [`toPython()`](https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html?highlight=toPython#rdflib.Variable.toPython) to serialize one row of a result set from a SPARQL query into a human-readable representation for each term using N3 format. + + row_dict: +one row of a SPARQL query results, as a `dict` + + pythonify: +flag to force instances of [`rdflib.term.Literal`](https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html?highlight=Literal#rdflib.term.Identifier) to their Python literal representation + + returns: +a dictionary of serialized row bindings + """ + bindings = { + k: self.n3fy(v, pythonify=pythonify) + for k, v in row_dict.items() + } + + return bindings + + ###################################################################### ## SHACL validation @@ -1452,15 +1491,3 @@ def infer_skos_hierarchical_mappings ( self.add(s, _skos.narrower, o) else: self.remove(s, _skos.narrowMatch, o) - - def materialize(self, config: str) -> rdflib.Graph: - """ Binding to morph-kgc `materialize()` """ - - if len(self._g) == 0: - # generate the triples and load them to an RDFlib graph - self._g = morph_kgc.materialize(config) - else: - # merge - # for caveats about merging this way: - # - self._g.parse(morph_kgc.materialize(config)) diff --git a/kglab/pkg_types.py b/kglab/pkg_types.py index e2064db..b19264c 100644 --- a/kglab/pkg_types.py +++ b/kglab/pkg_types.py @@ -2,20 +2,22 @@ # -*- coding: utf-8 -*- # see license https://github.com/DerwenAI/kglab#license-and-copyright -import pandas as pd # type: ignore # pylint: disable=E0401 +""" +Shared type definitions. +""" + import pathlib -import rdflib # type: ignore # pylint: disable=E0401 import typing -import urlpath # type: ignore # pylint: disable=E0401 +import pandas as pd # type: ignore # pylint: disable=E0401 +import rdflib # type: ignore # pylint: disable=E0401 +import urlpath # type: ignore # pylint: disable=E0401 -###################################################################### -## shared type definitions PathLike = typing.Union[ str, pathlib.Path, urlpath.URL ] IOPathLike = typing.Union[ PathLike, typing.IO ] -RDF_Node = typing.Union[ rdflib.term.URIRef, rdflib.term.Literal, rdflib.term.BNode ] +RDF_Node = typing.Union[ rdflib.term.Node, rdflib.term.URIRef, rdflib.term.Literal, rdflib.term.BNode ] RDF_Triple = typing.Tuple[ RDF_Node, RDF_Node, RDF_Node ] NodeLike = typing.Union[ typing.Optional[str], RDF_Node ] diff --git a/kglab/srl.py b/kglab/srl.py index fcbf7be..d859c08 100644 --- a/kglab/srl.py +++ b/kglab/srl.py @@ -2,17 +2,19 @@ # -*- coding: utf-8 -*- # see license https://github.com/DerwenAI/kglab#license-and-copyright -###################################################################### -## classes to support models for statistical relational learning +""" +Support for use of statistical relational learning. +""" + +import pathlib +import typing from icecream import ic # type: ignore # pylint: disable=E0401 import pandas as pd # type: ignore # pylint: disable=E0401 -import pathlib import pslpython.model # type: ignore # pylint: disable=E0401 import pslpython.partition # type: ignore # pylint: disable=E0401 import pslpython.predicate # type: ignore # pylint: disable=E0401 import pslpython.rule # type: ignore # pylint: disable=E0401 -import typing class PSLModel: @@ -154,8 +156,8 @@ def add_rule ( @classmethod def _raise_model_error ( cls, - obj: str, - msg: str, + obj: str, # pylint: disable=W0613 + msg: str, # pylint: disable=W0613 ) -> None: """ Semiprivate helper function to format and raise a `ModelError` exception. @@ -166,7 +168,7 @@ def _raise_model_error ( msg: the exception message to use """ - error = "{}: {}".format(msg, obj) + error = "{msg}: {obj}" raise pslpython.model.ModelError(error) diff --git a/kglab/subg.py b/kglab/subg.py index 51de9c3..af83dc8 100644 --- a/kglab/subg.py +++ b/kglab/subg.py @@ -2,13 +2,11 @@ # -*- coding: utf-8 -*- # see license https://github.com/DerwenAI/kglab#license-and-copyright -###################################################################### -## subgraph transforms for visualization, graph algorithms, etc. +""" +Subgraph transforms for visualization, graph algorithms, etc. +""" -from kglab import KnowledgeGraph -from kglab.topo import Measure -from kglab.pkg_types import NodeLike, RDF_Node, RDF_Triple -from kglab.util import get_gpu_count +import typing from icecream import ic # type: ignore # pylint: disable=W0611,E0401 from tqdm import tqdm # type: ignore # pylint: disable=E0401 @@ -16,7 +14,12 @@ import pyvis.network # type: ignore # pylint: disable=E0401 import networkx as nx # type: ignore # pylint: disable=E0401 import torch # type: ignore # pylint: disable=E0401 -import typing + +from kglab import KnowledgeGraph +from kglab.topo import Measure +from kglab.pkg_types import NodeLike, RDF_Node, RDF_Triple +from kglab.util import get_gpu_count + if get_gpu_count() > 0: import cudf # type: ignore # pylint: disable=E0401 diff --git a/kglab/topo.py b/kglab/topo.py index dce8baf..608ca1b 100644 --- a/kglab/topo.py +++ b/kglab/topo.py @@ -2,16 +2,18 @@ # -*- coding: utf-8 -*- # see license https://github.com/DerwenAI/kglab#license-and-copyright -###################################################################### -## graph topology - -from kglab import KnowledgeGraph -from kglab.pkg_types import Census_Item, Census_Dyad_Tally +""" +Graph topology. +""" from collections import defaultdict +import typing + import pandas as pd # type: ignore # pylint: disable=E0401 import rdflib # type: ignore # pylint: disable=E0401 -import typing + +from kglab import KnowledgeGraph +from kglab.pkg_types import Census_Item, Census_Dyad_Tally class Simplex0: diff --git a/kglab/util.py b/kglab/util.py index 9a2a9ff..ebe9fad 100644 --- a/kglab/util.py +++ b/kglab/util.py @@ -2,10 +2,12 @@ # -*- coding: utf-8 -*- # see license https://github.com/DerwenAI/kglab#license-and-copyright -###################################################################### -## utilities +""" +Utility functions used within `kglab` and related applications. +""" import math + import numpy as np # type: ignore # pylint: disable=E0401 import pandas as pd # type: ignore # pylint: disable=E0401 @@ -21,7 +23,7 @@ def get_gpu_count () -> int: returns: count of available GPUs, where `0` means none or disabled. """ - global GPU_COUNT + global GPU_COUNT # pylint: disable=W0603 if GPU_COUNT < 0: return 0 @@ -40,9 +42,9 @@ def get_gpu_count () -> int: if get_gpu_count() > 0: try: import cudf # type: ignore # pylint: disable=E0401 - except Exception as e: # pylint: disable=W0703 + except Exception as gpu_e: # pylint: disable=W0703 # turn off GPU usage - #print(e) + #print(gpu_e) GPU_COUNT = -1 diff --git a/kglab/version.py b/kglab/version.py index 2e36668..a6d453f 100644 --- a/kglab/version.py +++ b/kglab/version.py @@ -2,15 +2,16 @@ # -*- coding: utf-8 -*- # see license https://github.com/DerwenAI/kglab#license-and-copyright +""" +Version checking for a Python module. +""" + import sys import typing -###################################################################### -## Python version checking - MIN_PY_VERSION: typing.Tuple = (3, 7,) -__version__: str = "0.4.3" +__version__: str = "0.4.4" def _versify ( diff --git a/requirements-dev.txt b/requirements-dev.txt index 57892fd..b805163 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -20,5 +20,6 @@ pymdown-extensions pytest selenium twine +types-python-dateutil wheel diff --git a/requirements.txt b/requirements.txt index 070451c..04a64f4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +aiohttp >= 3.8 chocolate >= 0.0.2 csvwlib >= 0.3.2 decorator >= 4.4.2 diff --git a/test.py b/test.py index bd56d9a..30c327b 100755 --- a/test.py +++ b/test.py @@ -1,16 +1,19 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from sklearn import datasets -import kglab import os -import pandas as pd import pathlib import tempfile -import urlpath -import unittest import warnings -# from icecream import ic +import unittest + +from icecream import ic +from sklearn import datasets +import pandas as pd +import urlpath + +import kglab + class TestKG (unittest.TestCase): def test_load_save_measure (self): From e8bc5e13b673f50e098605893be042c74d8581cf Mon Sep 17 00:00:00 2001 From: "Lorenzo (Mec-iS)" Date: Fri, 25 Feb 2022 11:51:27 +0000 Subject: [PATCH 2/7] Add RML file --- dat/recipes.rml.ttl | 1 + 1 file changed, 1 insertion(+) create mode 100644 dat/recipes.rml.ttl diff --git a/dat/recipes.rml.ttl b/dat/recipes.rml.ttl new file mode 100644 index 0000000..46df5ca --- /dev/null +++ b/dat/recipes.rml.ttl @@ -0,0 +1 @@ +# working on defining RML mappings here # \ No newline at end of file From 046ad4722f752d7f967c3c7a46a823229094a32d Mon Sep 17 00:00:00 2001 From: "Lorenzo (Mec-iS)" Date: Fri, 25 Feb 2022 11:52:45 +0000 Subject: [PATCH 3/7] Add RML file --- dat/morph-default-config.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dat/morph-default-config.ini b/dat/morph-default-config.ini index 106d0d2..490d476 100644 --- a/dat/morph-default-config.ini +++ b/dat/morph-default-config.ini @@ -33,4 +33,4 @@ logs_file= [DataSource1] -mappings=${main_dir}/recipes.ttl \ No newline at end of file +mappings=${main_dir}/recipes.rml.csv \ No newline at end of file From b2a29dde0f17aab348f57d9c9d0587d2be44bc3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juli=C3=A1n=20Arenas-Guerrero?= Date: Fri, 25 Feb 2022 17:08:17 +0100 Subject: [PATCH 4/7] add basic Morph-KGC example --- dat/mapping.csv.ttl | 962 ----------------------------------- dat/morph-default-config.ini | 36 -- dat/student_sport.db | Bin 0 -> 20480 bytes dat/student_sport.r2rml.ttl | 63 +++ examples/ex2_1.ipynb | 181 +++++-- kglab/kglab.py | 2 +- requirements.txt | 1 + 7 files changed, 190 insertions(+), 1055 deletions(-) delete mode 100644 dat/mapping.csv.ttl delete mode 100644 dat/morph-default-config.ini create mode 100644 dat/student_sport.db create mode 100644 dat/student_sport.r2rml.ttl diff --git a/dat/mapping.csv.ttl b/dat/mapping.csv.ttl deleted file mode 100644 index d88cd1d..0000000 --- a/dat/mapping.csv.ttl +++ /dev/null @@ -1,962 +0,0 @@ -@prefix rr: . -@prefix rdf: . -@prefix rdfs: . -@prefix fnml: . -@prefix fno: . -@prefix d2rq: . -@prefix void: . -@prefix dc: . -@prefix foaf: . -@prefix rml: . -@prefix ql: . -@prefix : . -@prefix xsd: . -@prefix rev: . -@prefix gtfs: . -@prefix geo: . -@prefix schema: . -@prefix dct: . - -:rules_000 a void:Dataset; - void:exampleResource :map_stoptimes_000. -:map_stoptimes_000 rml:logicalSource :source_000. -:source_000 a rml:LogicalSource; - rml:source "data/STOP_TIMES.csv"; - rml:referenceFormulation ql:CSV. -:map_stoptimes_000 a rr:TriplesMap; - rdfs:label "stoptimes". -:s_000 a rr:SubjectMap. -:map_stoptimes_000 rr:subjectMap :s_000. -:s_000 rr:template "http://transport.linkeddata.es/madrid/metro/stoptimes/{trip_id}-{stop_id}-{arrival_time}". -:pom_000 a rr:PredicateObjectMap. -:map_stoptimes_000 rr:predicateObjectMap :pom_000. -:pm_000 a rr:PredicateMap. -:pom_000 rr:predicateMap :pm_000. -:pm_000 rr:constant rdf:type. -:pom_000 rr:objectMap :om_000. -:om_000 a rr:ObjectMap; - rr:constant "http://vocab.gtfs.org/terms#StopTime"; - rr:termType rr:IRI. -:pom_001 a rr:PredicateObjectMap. -:map_stoptimes_000 rr:predicateObjectMap :pom_001. -:pm_001 a rr:PredicateMap. -:pom_001 rr:predicateMap :pm_001. -:pm_001 rr:constant gtfs:arrivalTime. -:pom_001 rr:objectMap :om_001. -:om_001 a rr:ObjectMap; - rml:reference "arrival_time"; - rr:termType rr:Literal; - rr:datatype xsd:duration. -:pom_002 a rr:PredicateObjectMap. -:map_stoptimes_000 rr:predicateObjectMap :pom_002. -:pm_002 a rr:PredicateMap. -:pom_002 rr:predicateMap :pm_002. -:pm_002 rr:constant gtfs:departureTime. -:pom_002 rr:objectMap :om_002. -:om_002 a rr:ObjectMap; - rml:reference "departure_time"; - rr:termType rr:Literal; - rr:datatype xsd:duration. -:pom_003 a rr:PredicateObjectMap. -:map_stoptimes_000 rr:predicateObjectMap :pom_003. -:pm_003 a rr:PredicateMap. -:pom_003 rr:predicateMap :pm_003. -:pm_003 rr:constant gtfs:stopSequence. -:pom_003 rr:objectMap :om_003. -:om_003 a rr:ObjectMap; - rml:reference "stop_sequence"; - rr:termType rr:Literal; - rr:datatype xsd:integer. -:pom_004 a rr:PredicateObjectMap. -:map_stoptimes_000 rr:predicateObjectMap :pom_004. -:pm_004 a rr:PredicateMap. -:pom_004 rr:predicateMap :pm_004. -:pm_004 rr:constant gtfs:headsign. -:pom_004 rr:objectMap :om_004. -:om_004 a rr:ObjectMap; - rml:reference "stop_headsign"; - rr:termType rr:Literal. -:pom_005 a rr:PredicateObjectMap. -:map_stoptimes_000 rr:predicateObjectMap :pom_005. -:pm_005 a rr:PredicateMap. -:pom_005 rr:predicateMap :pm_005. -:pm_005 rr:constant gtfs:pickupType. -:pom_005 rr:objectMap :om_005. -:om_005 a rr:ObjectMap; - rr:template "http://transport.linkeddata.es/resource/PickupType/{pickup_type}"; - rr:termType rr:IRI. -:pom_006 a rr:PredicateObjectMap. -:map_stoptimes_000 rr:predicateObjectMap :pom_006. -:pm_006 a rr:PredicateMap. -:pom_006 rr:predicateMap :pm_006. -:pm_006 rr:constant gtfs:dropOffType. -:pom_006 rr:objectMap :om_006. -:om_006 a rr:ObjectMap; - rr:template "http://transport.linkeddata.es/resource/DropOffType/{drop_off_type}"; - rr:termType rr:IRI. -:pom_007 a rr:PredicateObjectMap. -:map_stoptimes_000 rr:predicateObjectMap :pom_007. -:pm_007 a rr:PredicateMap. -:pom_007 rr:predicateMap :pm_007. -:pm_007 rr:constant gtfs:distanceTraveled. -:pom_007 rr:objectMap :om_007. -:om_007 a rr:ObjectMap; - rml:reference "shape_dist_traveled"; - rr:termType rr:Literal. -:pom_008 a rr:PredicateObjectMap. -:map_stoptimes_000 rr:predicateObjectMap :pom_008. -:pm_008 a rr:PredicateMap. -:pom_008 rr:predicateMap :pm_008. -:pm_008 rr:constant gtfs:trip. -:pom_008 rr:objectMap :om_008. -:pom_009 a rr:PredicateObjectMap. -:map_stoptimes_000 rr:predicateObjectMap :pom_009. -:pm_009 a rr:PredicateMap. -:pom_009 rr:predicateMap :pm_009. -:pm_009 rr:constant gtfs:stop. -:pom_009 rr:objectMap :om_009. -:rules_000 void:exampleResource :map_trips_000. -:map_trips_000 rml:logicalSource :source_001. -:source_001 a rml:LogicalSource; - rml:source "data/TRIPS.csv"; - rml:referenceFormulation ql:CSV. -:map_trips_000 a rr:TriplesMap; - rdfs:label "trips". -:s_001 a rr:SubjectMap. -:map_trips_000 rr:subjectMap :s_001. -:s_001 rr:template "http://transport.linkeddata.es/madrid/metro/trips/{trip_id}". -:pom_010 a rr:PredicateObjectMap. -:map_trips_000 rr:predicateObjectMap :pom_010. -:pm_010 a rr:PredicateMap. -:pom_010 rr:predicateMap :pm_010. -:pm_010 rr:constant rdf:type. -:pom_010 rr:objectMap :om_010. -:om_010 a rr:ObjectMap; - rr:constant "http://vocab.gtfs.org/terms#Trip"; - rr:termType rr:IRI. -:pom_011 a rr:PredicateObjectMap. -:map_trips_000 rr:predicateObjectMap :pom_011. -:pm_011 a rr:PredicateMap. -:pom_011 rr:predicateMap :pm_011. -:pm_011 rr:constant gtfs:headsign. -:pom_011 rr:objectMap :om_011. -:om_011 a rr:ObjectMap; - rml:reference "trip_headsign"; - rr:termType rr:Literal. -:pom_012 a rr:PredicateObjectMap. -:map_trips_000 rr:predicateObjectMap :pom_012. -:pm_012 a rr:PredicateMap. -:pom_012 rr:predicateMap :pm_012. -:pm_012 rr:constant gtfs:shortName. -:pom_012 rr:objectMap :om_012. -:om_012 a rr:ObjectMap; - rml:reference "trip_short_name"; - rr:termType rr:Literal. -:pom_013 a rr:PredicateObjectMap. -:map_trips_000 rr:predicateObjectMap :pom_013. -:pm_013 a rr:PredicateMap. -:pom_013 rr:predicateMap :pm_013. -:pm_013 rr:constant gtfs:direction. -:pom_013 rr:objectMap :om_013. -:om_013 a rr:ObjectMap; - rml:reference "direction_id"; - rr:termType rr:Literal. -:pom_014 a rr:PredicateObjectMap. -:map_trips_000 rr:predicateObjectMap :pom_014. -:pm_014 a rr:PredicateMap. -:pom_014 rr:predicateMap :pm_014. -:pm_014 rr:constant gtfs:block. -:pom_014 rr:objectMap :om_014. -:om_014 a rr:ObjectMap; - rml:reference "block_id"; - rr:termType rr:Literal. -:pom_015 a rr:PredicateObjectMap. -:map_trips_000 rr:predicateObjectMap :pom_015. -:pm_015 a rr:PredicateMap. -:pom_015 rr:predicateMap :pm_015. -:pm_015 rr:constant gtfs:wheelchairAccessible. -:pom_015 rr:objectMap :om_015. -:om_015 a rr:ObjectMap; - rr:template "http://transport.linkeddata.es/resource/WheelchairBoardingStatus/{wheelchair_accessible}"; - rr:termType rr:IRI. -:pom_016 a rr:PredicateObjectMap. -:map_trips_000 rr:predicateObjectMap :pom_016. -:pm_016 a rr:PredicateMap. -:pom_016 rr:predicateMap :pm_016. -:pm_016 rr:constant gtfs:service. -:pom_016 rr:objectMap :om_016, :om_017. -:pom_017 a rr:PredicateObjectMap. -:map_trips_000 rr:predicateObjectMap :pom_017. -:pm_017 a rr:PredicateMap. -:pom_017 rr:predicateMap :pm_017. -:pm_017 rr:constant gtfs:route. -:pom_017 rr:objectMap :om_018. -:pom_018 a rr:PredicateObjectMap. -:map_trips_000 rr:predicateObjectMap :pom_018. -:pm_018 a rr:PredicateMap. -:pom_018 rr:predicateMap :pm_018. -:pm_018 rr:constant gtfs:shape. -:pom_018 rr:objectMap :om_019. -:rules_000 void:exampleResource :map_routes_000. -:map_routes_000 rml:logicalSource :source_002. -:source_002 a rml:LogicalSource; - rml:source "data/ROUTES.csv"; - rml:referenceFormulation ql:CSV. -:map_routes_000 a rr:TriplesMap; - rdfs:label "routes". -:s_002 a rr:SubjectMap. -:map_routes_000 rr:subjectMap :s_002. -:s_002 rr:template "http://transport.linkeddata.es/madrid/metro/routes/{route_id}". -:pom_019 a rr:PredicateObjectMap. -:map_routes_000 rr:predicateObjectMap :pom_019. -:pm_019 a rr:PredicateMap. -:pom_019 rr:predicateMap :pm_019. -:pm_019 rr:constant rdf:type. -:pom_019 rr:objectMap :om_020. -:om_020 a rr:ObjectMap; - rr:constant "http://vocab.gtfs.org/terms#Route"; - rr:termType rr:IRI. -:pom_020 a rr:PredicateObjectMap. -:map_routes_000 rr:predicateObjectMap :pom_020. -:pm_020 a rr:PredicateMap. -:pom_020 rr:predicateMap :pm_020. -:pm_020 rr:constant gtfs:shortName. -:pom_020 rr:objectMap :om_021. -:om_021 a rr:ObjectMap; - rml:reference "route_short_name"; - rr:termType rr:Literal. -:pom_021 a rr:PredicateObjectMap. -:map_routes_000 rr:predicateObjectMap :pom_021. -:pm_021 a rr:PredicateMap. -:pom_021 rr:predicateMap :pm_021. -:pm_021 rr:constant gtfs:longName. -:pom_021 rr:objectMap :om_022. -:om_022 a rr:ObjectMap; - rml:reference "route_long_name"; - rr:termType rr:Literal. -:pom_022 a rr:PredicateObjectMap. -:map_routes_000 rr:predicateObjectMap :pom_022. -:pm_022 a rr:PredicateMap. -:pom_022 rr:predicateMap :pm_022. -:pm_022 rr:constant dct:description. -:pom_022 rr:objectMap :om_023. -:om_023 a rr:ObjectMap; - rml:reference "route_desc"; - rr:termType rr:Literal. -:pom_023 a rr:PredicateObjectMap. -:map_routes_000 rr:predicateObjectMap :pom_023. -:pm_023 a rr:PredicateMap. -:pom_023 rr:predicateMap :pm_023. -:pm_023 rr:constant gtfs:routeType. -:pom_023 rr:objectMap :om_024. -:om_024 a rr:ObjectMap; - rr:template "http://transport.linkeddata.es/resource/RouteType/{route_type}"; - rr:termType rr:IRI. -:pom_024 a rr:PredicateObjectMap. -:map_routes_000 rr:predicateObjectMap :pom_024. -:pm_024 a rr:PredicateMap. -:pom_024 rr:predicateMap :pm_024. -:pm_024 rr:constant gtfs:routeUrl. -:pom_024 rr:objectMap :om_025. -:om_025 a rr:ObjectMap; - rml:reference "route_url"; - rr:termType rr:IRI. -:pom_025 a rr:PredicateObjectMap. -:map_routes_000 rr:predicateObjectMap :pom_025. -:pm_025 a rr:PredicateMap. -:pom_025 rr:predicateMap :pm_025. -:pm_025 rr:constant gtfs:color. -:pom_025 rr:objectMap :om_026. -:om_026 a rr:ObjectMap; - rml:reference "route_color"; - rr:termType rr:Literal. -:pom_026 a rr:PredicateObjectMap. -:map_routes_000 rr:predicateObjectMap :pom_026. -:pm_026 a rr:PredicateMap. -:pom_026 rr:predicateMap :pm_026. -:pm_026 rr:constant gtfs:textColor. -:pom_026 rr:objectMap :om_027. -:om_027 a rr:ObjectMap; - rml:reference "route_text_color"; - rr:termType rr:Literal. -:pom_027 a rr:PredicateObjectMap. -:map_routes_000 rr:predicateObjectMap :pom_027. -:pm_027 a rr:PredicateMap. -:pom_027 rr:predicateMap :pm_027. -:pm_027 rr:constant gtfs:agency. -:pom_027 rr:objectMap :om_028. -:rules_000 void:exampleResource :map_agency_000. -:map_agency_000 rml:logicalSource :source_003. -:source_003 a rml:LogicalSource; - rml:source "data/AGENCY.csv"; - rml:referenceFormulation ql:CSV. -:map_agency_000 a rr:TriplesMap; - rdfs:label "agency". -:s_003 a rr:SubjectMap. -:map_agency_000 rr:subjectMap :s_003. -:s_003 rr:template "http://transport.linkeddata.es/madrid/agency/{agency_id}". -:pom_028 a rr:PredicateObjectMap. -:map_agency_000 rr:predicateObjectMap :pom_028. -:pm_028 a rr:PredicateMap. -:pom_028 rr:predicateMap :pm_028. -:pm_028 rr:constant rdf:type. -:pom_028 rr:objectMap :om_029. -:om_029 a rr:ObjectMap; - rr:constant "http://vocab.gtfs.org/terms#Agency"; - rr:termType rr:IRI. -:pom_029 a rr:PredicateObjectMap. -:map_agency_000 rr:predicateObjectMap :pom_029. -:pm_029 a rr:PredicateMap. -:pom_029 rr:predicateMap :pm_029. -:pm_029 rr:constant foaf:page. -:pom_029 rr:objectMap :om_030. -:om_030 a rr:ObjectMap; - rml:reference "agency_url"; - rr:termType rr:IRI. -:pom_030 a rr:PredicateObjectMap. -:map_agency_000 rr:predicateObjectMap :pom_030. -:pm_030 a rr:PredicateMap. -:pom_030 rr:predicateMap :pm_030. -:pm_030 rr:constant foaf:name. -:pom_030 rr:objectMap :om_031. -:om_031 a rr:ObjectMap; - rml:reference "agency_name"; - rr:termType rr:Literal. -:pom_031 a rr:PredicateObjectMap. -:map_agency_000 rr:predicateObjectMap :pom_031. -:pm_031 a rr:PredicateMap. -:pom_031 rr:predicateMap :pm_031. -:pm_031 rr:constant gtfs:timeZone. -:pom_031 rr:objectMap :om_032. -:om_032 a rr:ObjectMap; - rml:reference "agency_timezone"; - rr:termType rr:Literal. -:pom_032 a rr:PredicateObjectMap. -:map_agency_000 rr:predicateObjectMap :pom_032. -:pm_032 a rr:PredicateMap. -:pom_032 rr:predicateMap :pm_032. -:pm_032 rr:constant dct:language. -:pom_032 rr:objectMap :om_033. -:om_033 a rr:ObjectMap; - rml:reference "agency_lang"; - rr:termType rr:Literal. -:pom_033 a rr:PredicateObjectMap. -:map_agency_000 rr:predicateObjectMap :pom_033. -:pm_033 a rr:PredicateMap. -:pom_033 rr:predicateMap :pm_033. -:pm_033 rr:constant foaf:phone. -:pom_033 rr:objectMap :om_034. -:om_034 a rr:ObjectMap; - rml:reference "agency_phone"; - rr:termType rr:Literal. -:pom_034 a rr:PredicateObjectMap. -:map_agency_000 rr:predicateObjectMap :pom_034. -:pm_034 a rr:PredicateMap. -:pom_034 rr:predicateMap :pm_034. -:pm_034 rr:constant gtfs:fareUrl. -:pom_034 rr:objectMap :om_035. -:om_035 a rr:ObjectMap; - rml:reference "agency_fare_url"; - rr:termType rr:IRI. -:rules_000 void:exampleResource :map_stops_000. -:map_stops_000 rml:logicalSource :source_004. -:source_004 a rml:LogicalSource; - rml:source "data/STOPS.csv"; - rml:referenceFormulation ql:CSV. -:map_stops_000 a rr:TriplesMap; - rdfs:label "stops". -:s_004 a rr:SubjectMap. -:map_stops_000 rr:subjectMap :s_004. -:s_004 rr:template "http://transport.linkeddata.es/madrid/metro/stops/{stop_id}". -:pom_035 a rr:PredicateObjectMap. -:map_stops_000 rr:predicateObjectMap :pom_035. -:pm_035 a rr:PredicateMap. -:pom_035 rr:predicateMap :pm_035. -:pm_035 rr:constant rdf:type. -:pom_035 rr:objectMap :om_036. -:om_036 a rr:ObjectMap; - rr:constant "http://vocab.gtfs.org/terms#Stop"; - rr:termType rr:IRI. -:pom_036 a rr:PredicateObjectMap. -:map_stops_000 rr:predicateObjectMap :pom_036. -:pm_036 a rr:PredicateMap. -:pom_036 rr:predicateMap :pm_036. -:pm_036 rr:constant gtfs:code. -:pom_036 rr:objectMap :om_037. -:om_037 a rr:ObjectMap; - rml:reference "stop_code"; - rr:termType rr:Literal. -:pom_037 a rr:PredicateObjectMap. -:map_stops_000 rr:predicateObjectMap :pom_037. -:pm_037 a rr:PredicateMap. -:pom_037 rr:predicateMap :pm_037. -:pm_037 rr:constant dct:identifier. -:pom_037 rr:objectMap :om_038. -:om_038 a rr:ObjectMap; - rml:reference "stop_id"; - rr:termType rr:Literal. -:pom_038 a rr:PredicateObjectMap. -:map_stops_000 rr:predicateObjectMap :pom_038. -:pm_038 a rr:PredicateMap. -:pom_038 rr:predicateMap :pm_038. -:pm_038 rr:constant foaf:name. -:pom_038 rr:objectMap :om_039. -:om_039 a rr:ObjectMap; - rml:reference "stop_name"; - rr:termType rr:Literal. -:pom_039 a rr:PredicateObjectMap. -:map_stops_000 rr:predicateObjectMap :pom_039. -:pm_039 a rr:PredicateMap. -:pom_039 rr:predicateMap :pm_039. -:pm_039 rr:constant dct:description. -:pom_039 rr:objectMap :om_040. -:om_040 a rr:ObjectMap; - rml:reference "stop_desc"; - rr:termType rr:Literal. -:pom_040 a rr:PredicateObjectMap. -:map_stops_000 rr:predicateObjectMap :pom_040. -:pm_040 a rr:PredicateMap. -:pom_040 rr:predicateMap :pm_040. -:pm_040 rr:constant geo:lat. -:pom_040 rr:objectMap :om_041. -:om_041 a rr:ObjectMap; - rml:reference "stop_lat"; - rr:termType rr:Literal; - rr:datatype xsd:double. -:pom_041 a rr:PredicateObjectMap. -:map_stops_000 rr:predicateObjectMap :pom_041. -:pm_041 a rr:PredicateMap. -:pom_041 rr:predicateMap :pm_041. -:pm_041 rr:constant geo:long. -:pom_041 rr:objectMap :om_042. -:om_042 a rr:ObjectMap; - rml:reference "stop_lon"; - rr:termType rr:Literal; - rr:datatype xsd:double. -:pom_042 a rr:PredicateObjectMap. -:map_stops_000 rr:predicateObjectMap :pom_042. -:pm_042 a rr:PredicateMap. -:pom_042 rr:predicateMap :pm_042. -:pm_042 rr:constant gtfs:zone. -:pom_042 rr:objectMap :om_043. -:om_043 a rr:ObjectMap; - rml:reference "zone_id"; - rr:termType rr:Literal. -:pom_043 a rr:PredicateObjectMap. -:map_stops_000 rr:predicateObjectMap :pom_043. -:pm_043 a rr:PredicateMap. -:pom_043 rr:predicateMap :pm_043. -:pm_043 rr:constant foaf:page. -:pom_043 rr:objectMap :om_044. -:om_044 a rr:ObjectMap; - rml:reference "stop_url"; - rr:termType rr:IRI. -:pom_044 a rr:PredicateObjectMap. -:map_stops_000 rr:predicateObjectMap :pom_044. -:pm_044 a rr:PredicateMap. -:pom_044 rr:predicateMap :pm_044. -:pm_044 rr:constant gtfs:locationType. -:pom_044 rr:objectMap :om_045. -:om_045 a rr:ObjectMap; - rr:template "http://transport.linkeddata.es/resource/LocationType/{location_type}"; - rr:termType rr:IRI. -:pom_045 a rr:PredicateObjectMap. -:map_stops_000 rr:predicateObjectMap :pom_045. -:pm_045 a rr:PredicateMap. -:pom_045 rr:predicateMap :pm_045. -:pm_045 rr:constant gtfs:timeZone. -:pom_045 rr:objectMap :om_046. -:om_046 a rr:ObjectMap; - rml:reference "stop_timezone"; - rr:termType rr:Literal. -:pom_046 a rr:PredicateObjectMap. -:map_stops_000 rr:predicateObjectMap :pom_046. -:pm_046 a rr:PredicateMap. -:pom_046 rr:predicateMap :pm_046. -:pm_046 rr:constant gtfs:wheelchairAccessible. -:pom_046 rr:objectMap :om_047. -:om_047 a rr:ObjectMap; - rr:template "http://transport.linkeddata.es/resource/WheelchairBoardingStatus/{wheelchair_boarding}"; - rr:termType rr:IRI. -:pom_047 a rr:PredicateObjectMap. -:map_stops_000 rr:predicateObjectMap :pom_047. -:pm_047 a rr:PredicateMap. -:pom_047 rr:predicateMap :pm_047. -:pm_047 rr:constant gtfs:parentStation. -:pom_047 rr:objectMap :om_048. -:rules_000 void:exampleResource :map_services1_000. -:map_services1_000 rml:logicalSource :source_005. -:source_005 a rml:LogicalSource; - rml:source "data/CALENDAR.csv"; - rml:referenceFormulation ql:CSV. -:map_services1_000 a rr:TriplesMap; - rdfs:label "services1". -:s_005 a rr:SubjectMap. -:map_services1_000 rr:subjectMap :s_005. -:s_005 rr:template "http://transport.linkeddata.es/madrid/metro/services/{service_id}". -:pom_048 a rr:PredicateObjectMap. -:map_services1_000 rr:predicateObjectMap :pom_048. -:pm_048 a rr:PredicateMap. -:pom_048 rr:predicateMap :pm_048. -:pm_048 rr:constant rdf:type. -:pom_048 rr:objectMap :om_049. -:om_049 a rr:ObjectMap; - rr:constant "http://vocab.gtfs.org/terms#Service"; - rr:termType rr:IRI. -:pom_049 a rr:PredicateObjectMap. -:map_services1_000 rr:predicateObjectMap :pom_049. -:pm_049 a rr:PredicateMap. -:pom_049 rr:predicateMap :pm_049. -:pm_049 rr:constant gtfs:serviceRule. -:pom_049 rr:objectMap :om_050. -:rules_000 void:exampleResource :map_services2_000. -:map_services2_000 rml:logicalSource :source_006. -:source_006 a rml:LogicalSource; - rml:source "data/CALENDAR_DATES.csv"; - rml:referenceFormulation ql:CSV. -:map_services2_000 a rr:TriplesMap; - rdfs:label "services2". -:s_006 a rr:SubjectMap. -:map_services2_000 rr:subjectMap :s_006. -:s_006 rr:template "http://transport.linkeddata.es/madrid/metro/services/{service_id}". -:pom_050 a rr:PredicateObjectMap. -:map_services2_000 rr:predicateObjectMap :pom_050. -:pm_050 a rr:PredicateMap. -:pom_050 rr:predicateMap :pm_050. -:pm_050 rr:constant rdf:type. -:pom_050 rr:objectMap :om_051. -:om_051 a rr:ObjectMap; - rr:constant "http://vocab.gtfs.org/terms#Service"; - rr:termType rr:IRI. -:pom_051 a rr:PredicateObjectMap. -:map_services2_000 rr:predicateObjectMap :pom_051. -:pm_051 a rr:PredicateMap. -:pom_051 rr:predicateMap :pm_051. -:pm_051 rr:constant gtfs:serviceRule. -:pom_051 rr:objectMap :om_052. -:rules_000 void:exampleResource :map_calendar_date_rules_000. -:map_calendar_date_rules_000 rml:logicalSource :source_007. -:source_007 a rml:LogicalSource; - rml:source "data/CALENDAR_DATES.csv"; - rml:referenceFormulation ql:CSV. -:map_calendar_date_rules_000 a rr:TriplesMap; - rdfs:label "calendar_date_rules". -:s_007 a rr:SubjectMap. -:map_calendar_date_rules_000 rr:subjectMap :s_007. -:s_007 rr:template "http://transport.linkeddata.es/madrid/metro/calendar_date_rule/{service_id}-{date}". -:pom_052 a rr:PredicateObjectMap. -:map_calendar_date_rules_000 rr:predicateObjectMap :pom_052. -:pm_052 a rr:PredicateMap. -:pom_052 rr:predicateMap :pm_052. -:pm_052 rr:constant rdf:type. -:pom_052 rr:objectMap :om_053. -:om_053 a rr:ObjectMap; - rr:constant "http://vocab.gtfs.org/terms#CalendarDateRule"; - rr:termType rr:IRI. -:pom_053 a rr:PredicateObjectMap. -:map_calendar_date_rules_000 rr:predicateObjectMap :pom_053. -:pm_053 a rr:PredicateMap. -:pom_053 rr:predicateMap :pm_053. -:pm_053 rr:constant dct:date. -:pom_053 rr:objectMap :om_054. -:om_054 a rr:ObjectMap; - rml:reference "date"; - rr:termType rr:Literal; - rr:datatype xsd:date. -:pom_054 a rr:PredicateObjectMap. -:map_calendar_date_rules_000 rr:predicateObjectMap :pom_054. -:pm_054 a rr:PredicateMap. -:pom_054 rr:predicateMap :pm_054. -:pm_054 rr:constant gtfs:dateAddition. -:pom_054 rr:objectMap :om_055. -:om_055 a rr:ObjectMap; - rml:reference "exception_type"; - rr:termType rr:Literal; - rr:datatype xsd:boolean. -:rules_000 void:exampleResource :map_calendar_rules_000. -:map_calendar_rules_000 rml:logicalSource :source_008. -:source_008 a rml:LogicalSource; - rml:source "data/CALENDAR.csv"; - rml:referenceFormulation ql:CSV. -:map_calendar_rules_000 a rr:TriplesMap; - rdfs:label "calendar_rules". -:s_008 a rr:SubjectMap. -:map_calendar_rules_000 rr:subjectMap :s_008. -:s_008 rr:template "http://transport.linkeddata.es/madrid/metro/calendar_rules/{service_id}". -:pom_055 a rr:PredicateObjectMap. -:map_calendar_rules_000 rr:predicateObjectMap :pom_055. -:pm_055 a rr:PredicateMap. -:pom_055 rr:predicateMap :pm_055. -:pm_055 rr:constant rdf:type. -:pom_055 rr:objectMap :om_056. -:om_056 a rr:ObjectMap; - rr:constant "http://vocab.gtfs.org/terms#CalendarRule"; - rr:termType rr:IRI. -:pom_056 a rr:PredicateObjectMap. -:map_calendar_rules_000 rr:predicateObjectMap :pom_056. -:pm_056 a rr:PredicateMap. -:pom_056 rr:predicateMap :pm_056. -:pm_056 rr:constant gtfs:monday. -:pom_056 rr:objectMap :om_057. -:om_057 a rr:ObjectMap; - rml:reference "monday"; - rr:termType rr:Literal; - rr:datatype xsd:boolean. -:pom_057 a rr:PredicateObjectMap. -:map_calendar_rules_000 rr:predicateObjectMap :pom_057. -:pm_057 a rr:PredicateMap. -:pom_057 rr:predicateMap :pm_057. -:pm_057 rr:constant gtfs:tuesday. -:pom_057 rr:objectMap :om_058. -:om_058 a rr:ObjectMap; - rml:reference "tuesday"; - rr:termType rr:Literal; - rr:datatype xsd:boolean. -:pom_058 a rr:PredicateObjectMap. -:map_calendar_rules_000 rr:predicateObjectMap :pom_058. -:pm_058 a rr:PredicateMap. -:pom_058 rr:predicateMap :pm_058. -:pm_058 rr:constant gtfs:wednesday. -:pom_058 rr:objectMap :om_059. -:om_059 a rr:ObjectMap; - rml:reference "wednesday"; - rr:termType rr:Literal; - rr:datatype xsd:boolean. -:pom_059 a rr:PredicateObjectMap. -:map_calendar_rules_000 rr:predicateObjectMap :pom_059. -:pm_059 a rr:PredicateMap. -:pom_059 rr:predicateMap :pm_059. -:pm_059 rr:constant gtfs:thursday. -:pom_059 rr:objectMap :om_060. -:om_060 a rr:ObjectMap; - rml:reference "thursday"; - rr:termType rr:Literal; - rr:datatype xsd:boolean. -:pom_060 a rr:PredicateObjectMap. -:map_calendar_rules_000 rr:predicateObjectMap :pom_060. -:pm_060 a rr:PredicateMap. -:pom_060 rr:predicateMap :pm_060. -:pm_060 rr:constant gtfs:friday. -:pom_060 rr:objectMap :om_061. -:om_061 a rr:ObjectMap; - rml:reference "friday"; - rr:termType rr:Literal; - rr:datatype xsd:boolean. -:pom_061 a rr:PredicateObjectMap. -:map_calendar_rules_000 rr:predicateObjectMap :pom_061. -:pm_061 a rr:PredicateMap. -:pom_061 rr:predicateMap :pm_061. -:pm_061 rr:constant gtfs:saturday. -:pom_061 rr:objectMap :om_062. -:om_062 a rr:ObjectMap; - rml:reference "saturday"; - rr:termType rr:Literal; - rr:datatype xsd:boolean. -:pom_062 a rr:PredicateObjectMap. -:map_calendar_rules_000 rr:predicateObjectMap :pom_062. -:pm_062 a rr:PredicateMap. -:pom_062 rr:predicateMap :pm_062. -:pm_062 rr:constant gtfs:sunday. -:pom_062 rr:objectMap :om_063. -:om_063 a rr:ObjectMap; - rml:reference "sunday"; - rr:termType rr:Literal; - rr:datatype xsd:boolean. -:pom_063 a rr:PredicateObjectMap. -:map_calendar_rules_000 rr:predicateObjectMap :pom_063. -:pm_063 a rr:PredicateMap. -:pom_063 rr:predicateMap :pm_063. -:pm_063 rr:constant schema:startDate. -:pom_063 rr:objectMap :om_064. -:om_064 a rr:ObjectMap; - rml:reference "start_date"; - rr:termType rr:Literal; - rr:datatype xsd:date. -:pom_064 a rr:PredicateObjectMap. -:map_calendar_rules_000 rr:predicateObjectMap :pom_064. -:pm_064 a rr:PredicateMap. -:pom_064 rr:predicateMap :pm_064. -:pm_064 rr:constant schema:endDate. -:pom_064 rr:objectMap :om_065. -:om_065 a rr:ObjectMap; - rml:reference "end_date"; - rr:termType rr:Literal; - rr:datatype xsd:date. -:rules_000 void:exampleResource :map_feed_000. -:map_feed_000 rml:logicalSource :source_009. -:source_009 a rml:LogicalSource; - rml:source "data/FEED_INFO.csv"; - rml:referenceFormulation ql:CSV. -:map_feed_000 a rr:TriplesMap; - rdfs:label "feed". -:s_009 a rr:SubjectMap. -:map_feed_000 rr:subjectMap :s_009. -:s_009 rr:template "http://transport.linkeddata.es/madrid/metro/feed/{feed_publisher_name}". -:pom_065 a rr:PredicateObjectMap. -:map_feed_000 rr:predicateObjectMap :pom_065. -:pm_065 a rr:PredicateMap. -:pom_065 rr:predicateMap :pm_065. -:pm_065 rr:constant rdf:type. -:pom_065 rr:objectMap :om_066. -:om_066 a rr:ObjectMap; - rr:constant "http://vocab.gtfs.org/terms#Feed"; - rr:termType rr:IRI. -:pom_066 a rr:PredicateObjectMap. -:map_feed_000 rr:predicateObjectMap :pom_066. -:pm_066 a rr:PredicateMap. -:pom_066 rr:predicateMap :pm_066. -:pm_066 rr:constant dct:publisher. -:pom_066 rr:objectMap :om_067. -:om_067 a rr:ObjectMap; - rml:reference "feed_publisher_name"; - rr:termType rr:Literal. -:pom_067 a rr:PredicateObjectMap. -:map_feed_000 rr:predicateObjectMap :pom_067. -:pm_067 a rr:PredicateMap. -:pom_067 rr:predicateMap :pm_067. -:pm_067 rr:constant foaf:page. -:pom_067 rr:objectMap :om_068. -:om_068 a rr:ObjectMap; - rml:reference "feed_publisher_url"; - rr:termType rr:IRI. -:pom_068 a rr:PredicateObjectMap. -:map_feed_000 rr:predicateObjectMap :pom_068. -:pm_068 a rr:PredicateMap. -:pom_068 rr:predicateMap :pm_068. -:pm_068 rr:constant dct:language. -:pom_068 rr:objectMap :om_069. -:om_069 a rr:ObjectMap; - rml:reference "feed_lang"; - rr:termType rr:Literal. -:pom_069 a rr:PredicateObjectMap. -:map_feed_000 rr:predicateObjectMap :pom_069. -:pm_069 a rr:PredicateMap. -:pom_069 rr:predicateMap :pm_069. -:pm_069 rr:constant schema:startDate. -:pom_069 rr:objectMap :om_070. -:om_070 a rr:ObjectMap; - rml:reference "feed_start_date"; - rr:termType rr:Literal; - rr:datatype xsd:date. -:pom_070 a rr:PredicateObjectMap. -:map_feed_000 rr:predicateObjectMap :pom_070. -:pm_070 a rr:PredicateMap. -:pom_070 rr:predicateMap :pm_070. -:pm_070 rr:constant schema:endDate. -:pom_070 rr:objectMap :om_071. -:om_071 a rr:ObjectMap; - rml:reference "feed_end_date"; - rr:termType rr:Literal; - rr:datatype xsd:date. -:pom_071 a rr:PredicateObjectMap. -:map_feed_000 rr:predicateObjectMap :pom_071. -:pm_071 a rr:PredicateMap. -:pom_071 rr:predicateMap :pm_071. -:pm_071 rr:constant schema:version. -:pom_071 rr:objectMap :om_072. -:om_072 a rr:ObjectMap; - rml:reference "feed_version"; - rr:termType rr:Literal. -:rules_000 void:exampleResource :map_shapes_000. -:map_shapes_000 rml:logicalSource :source_010. -:source_010 a rml:LogicalSource; - rml:source "data/SHAPES.csv"; - rml:referenceFormulation ql:CSV. -:map_shapes_000 a rr:TriplesMap; - rdfs:label "shapes". -:s_010 a rr:SubjectMap. -:map_shapes_000 rr:subjectMap :s_010. -:s_010 rr:template "http://transport.linkeddata.es/madrid/metro/shape/{shape_id}". -:pom_072 a rr:PredicateObjectMap. -:map_shapes_000 rr:predicateObjectMap :pom_072. -:pm_072 a rr:PredicateMap. -:pom_072 rr:predicateMap :pm_072. -:pm_072 rr:constant rdf:type. -:pom_072 rr:objectMap :om_073. -:om_073 a rr:ObjectMap; - rr:constant "http://vocab.gtfs.org/terms#Shape"; - rr:termType rr:IRI. -:pom_073 a rr:PredicateObjectMap. -:map_shapes_000 rr:predicateObjectMap :pom_073. -:pm_073 a rr:PredicateMap. -:pom_073 rr:predicateMap :pm_073. -:pm_073 rr:constant gtfs:shapePoint. -:pom_073 rr:objectMap :om_074. -:om_074 a rr:ObjectMap; - rr:template "http://transport.linkeddata.es/madrid/metro/shape_point/{shape_id}-{shape_pt_sequence}"; - rr:termType rr:Literal. -:rules_000 void:exampleResource :map_shapePoints_000. -:map_shapePoints_000 rml:logicalSource :source_011. -:source_011 a rml:LogicalSource; - rml:source "data/SHAPES.csv"; - rml:referenceFormulation ql:CSV. -:map_shapePoints_000 a rr:TriplesMap; - rdfs:label "shapePoints". -:s_011 a rr:SubjectMap. -:map_shapePoints_000 rr:subjectMap :s_011. -:s_011 rr:template "http://transport.linkeddata.es/madrid/metro/shape_point/{shape_id}-{shape_pt_sequence}". -:pom_074 a rr:PredicateObjectMap. -:map_shapePoints_000 rr:predicateObjectMap :pom_074. -:pm_074 a rr:PredicateMap. -:pom_074 rr:predicateMap :pm_074. -:pm_074 rr:constant rdf:type. -:pom_074 rr:objectMap :om_075. -:om_075 a rr:ObjectMap; - rr:constant "http://vocab.gtfs.org/terms#ShapePoint"; - rr:termType rr:IRI. -:pom_075 a rr:PredicateObjectMap. -:map_shapePoints_000 rr:predicateObjectMap :pom_075. -:pm_075 a rr:PredicateMap. -:pom_075 rr:predicateMap :pm_075. -:pm_075 rr:constant geo:lat. -:pom_075 rr:objectMap :om_076. -:om_076 a rr:ObjectMap; - rml:reference "shape_pt_lat"; - rr:termType rr:Literal; - rr:datatype xsd:double. -:pom_076 a rr:PredicateObjectMap. -:map_shapePoints_000 rr:predicateObjectMap :pom_076. -:pm_076 a rr:PredicateMap. -:pom_076 rr:predicateMap :pm_076. -:pm_076 rr:constant geo:long. -:pom_076 rr:objectMap :om_077. -:om_077 a rr:ObjectMap; - rml:reference "shape_pt_lon"; - rr:termType rr:Literal; - rr:datatype xsd:double. -:pom_077 a rr:PredicateObjectMap. -:map_shapePoints_000 rr:predicateObjectMap :pom_077. -:pm_077 a rr:PredicateMap. -:pom_077 rr:predicateMap :pm_077. -:pm_077 rr:constant gtfs:pointSequence. -:pom_077 rr:objectMap :om_078. -:om_078 a rr:ObjectMap; - rml:reference "shape_pt_sequence"; - rr:termType rr:Literal. -:pom_078 a rr:PredicateObjectMap. -:map_shapePoints_000 rr:predicateObjectMap :pom_078. -:pm_078 a rr:PredicateMap. -:pom_078 rr:predicateMap :pm_078. -:pm_078 rr:constant gtfs:distanceTraveled. -:pom_078 rr:objectMap :om_079. -:om_079 a rr:ObjectMap; - rml:reference "shape_dist_traveled"; - rr:termType rr:Literal. -:rules_000 void:exampleResource :map_frequencies_000. -:map_frequencies_000 rml:logicalSource :source_012. -:source_012 a rml:LogicalSource; - rml:source "data/FREQUENCIES.csv"; - rml:referenceFormulation ql:CSV. -:map_frequencies_000 a rr:TriplesMap; - rdfs:label "frequencies". -:s_012 a rr:SubjectMap. -:map_frequencies_000 rr:subjectMap :s_012. -:s_012 rr:template "http://transport.linkeddata.es/madrid/metro/frequency/{trip_id}-{start_time}". -:pom_079 a rr:PredicateObjectMap. -:map_frequencies_000 rr:predicateObjectMap :pom_079. -:pm_079 a rr:PredicateMap. -:pom_079 rr:predicateMap :pm_079. -:pm_079 rr:constant rdf:type. -:pom_079 rr:objectMap :om_080. -:om_080 a rr:ObjectMap; - rr:constant "http://vocab.gtfs.org/terms#Frequency"; - rr:termType rr:IRI. -:pom_080 a rr:PredicateObjectMap. -:map_frequencies_000 rr:predicateObjectMap :pom_080. -:pm_080 a rr:PredicateMap. -:pom_080 rr:predicateMap :pm_080. -:pm_080 rr:constant gtfs:startTime. -:pom_080 rr:objectMap :om_081. -:om_081 a rr:ObjectMap; - rml:reference "start_time"; - rr:termType rr:Literal. -:pom_081 a rr:PredicateObjectMap. -:map_frequencies_000 rr:predicateObjectMap :pom_081. -:pm_081 a rr:PredicateMap. -:pom_081 rr:predicateMap :pm_081. -:pm_081 rr:constant gtfs:endTime. -:pom_081 rr:objectMap :om_082. -:om_082 a rr:ObjectMap; - rml:reference "end_time"; - rr:termType rr:Literal. -:pom_082 a rr:PredicateObjectMap. -:map_frequencies_000 rr:predicateObjectMap :pom_082. -:pm_082 a rr:PredicateMap. -:pom_082 rr:predicateMap :pm_082. -:pm_082 rr:constant gtfs:headwaySeconds. -:pom_082 rr:objectMap :om_083. -:om_083 a rr:ObjectMap; - rml:reference "headway_secs"; - rr:termType rr:Literal; - rr:datatype xsd:integer. -:pom_083 a rr:PredicateObjectMap. -:map_frequencies_000 rr:predicateObjectMap :pom_083. -:pm_083 a rr:PredicateMap. -:pom_083 rr:predicateMap :pm_083. -:pm_083 rr:constant gtfs:exactTimes. -:pom_083 rr:objectMap :om_084. -:om_084 a rr:ObjectMap; - rml:reference "exact_times"; - rr:termType rr:Literal; - rr:datatype xsd:boolean. -:pom_084 a rr:PredicateObjectMap. -:map_frequencies_000 rr:predicateObjectMap :pom_084. -:pm_084 a rr:PredicateMap. -:pom_084 rr:predicateMap :pm_084. -:pm_084 rr:constant gtfs:trip. -:pom_084 rr:objectMap :om_085. -:om_008 a rr:ObjectMap; - rr:parentTriplesMap :map_trips_000; - rr:joinCondition :jc_000. -:jc_000 rr:child "trip_id"; - rr:parent "trip_id". -:om_009 a rr:ObjectMap; - rr:parentTriplesMap :map_stops_000; - rr:joinCondition :jc_001. -:jc_001 rr:child "stop_id"; - rr:parent "stop_id". -:om_016 a rr:ObjectMap; - rr:parentTriplesMap :map_services1_000; - rr:joinCondition :jc_002. -:jc_002 rr:child "service_id"; - rr:parent "service_id". -:om_017 a rr:ObjectMap; - rr:parentTriplesMap :map_services2_000; - rr:joinCondition :jc_003. -:jc_003 rr:child "service_id"; - rr:parent "service_id". -:om_018 a rr:ObjectMap; - rr:parentTriplesMap :map_routes_000; - rr:joinCondition :jc_004. -:jc_004 rr:child "route_id"; - rr:parent "route_id". -:om_019 a rr:ObjectMap; - rr:parentTriplesMap :map_shapes_000; - rr:joinCondition :jc_005. -:jc_005 rr:child "shape_id"; - rr:parent "shape_id". -:om_028 a rr:ObjectMap; - rr:parentTriplesMap :map_agency_000; - rr:joinCondition :jc_006. -:jc_006 rr:child "agency_id"; - rr:parent "agency_id". -:om_048 a rr:ObjectMap; - rr:parentTriplesMap :map_stops_000; - rr:joinCondition :jc_007. -:jc_007 rr:child "parent_station"; - rr:parent "stop_id". -:om_050 a rr:ObjectMap; - rr:parentTriplesMap :map_calendar_rules_000; - rr:joinCondition :jc_008. -:jc_008 rr:child "service_id"; - rr:parent "service_id". -:om_052 a rr:ObjectMap; - rr:parentTriplesMap :map_calendar_date_rules_000; - rr:joinCondition :jc_009. -:jc_009 rr:child "service_id"; - rr:parent "service_id". -:om_085 a rr:ObjectMap; - rr:parentTriplesMap :map_trips_000; - rr:joinCondition :jc_010. -:jc_010 rr:child "trip_id"; - rr:parent "trip_id". \ No newline at end of file diff --git a/dat/morph-default-config.ini b/dat/morph-default-config.ini deleted file mode 100644 index 490d476..0000000 --- a/dat/morph-default-config.ini +++ /dev/null @@ -1,36 +0,0 @@ -[DEFAULT] -main_dir: . -mappings_dir: . - - -[CONFIGURATION] - -# INPUT -na_filter=yes -na_values=,#N/A,N/A,#N/A N/A,n/a,NA,,#NA,NULL,null,NaN,nan,None - -# OUTPUT -output_dir=${main_dir}/morph-output -output_file=result -output_format=N-QUADS -clean_output_dir=no -only_printable_characters=no -safe_percent_encoding= - -# MAPPINGS -mapping_partition=PARTIAL-AGGREGATIONS -infer_sql_datatypes=no - -# MATERIALIZATION -chunksize=100000 - -# MULTIPROCESSING -number_of_processes=2 - -# LOGS -logging_level=INFO -logs_file= - - -[DataSource1] -mappings=${main_dir}/recipes.rml.csv \ No newline at end of file diff --git a/dat/student_sport.db b/dat/student_sport.db new file mode 100644 index 0000000000000000000000000000000000000000..4d43c0797119260913ab2ef8e5937064f7950957 GIT binary patch literal 20480 zcmeI&&ui0Q7zgn8ZPL}H>t5X@%u#p=NLwi45D%WTuBjBes%@R@RNl2w1L>QZro-Eq z{&k*rkg=2CK@krM9=!P`X-7NTgSQR7FNEiPpS(XmIlYkf^M)Hb{FNUJM9A-w>x5GB zkaI#vPCaS$OpB%}`m~`UovAOY=E&^_r#ZGk^xO$yN9<ecQfX>nJE&TA(qC)sGOncTBheYeuu=TGc?UYs3PjvI0BQrW0IZQ1q5&40UNy4!OH zk8ZrzBJW-*W11#CzLh9O)5nC`1+{qHMC)9H&wwbgsx#^Z?l5$HY4Jl5^xlY|c<)Zh zC>_y^xwS=)U(9~0$$nB77D~S300Izz00bZa0SG_<0uX?}WfoXp(}`(T#i85Zb3IRNWOb6?tT}-cvhP . +@prefix foaf: . +@prefix ex: . +@prefix xsd: . +@base . + + + a rr:TriplesMap; + + rr:logicalTable [ rr:tableName "Student"; ] ; + + rr:subjectMap [ + rr:template "http://example.com/student/{ID}"; + rr:class ex:Student; + ]; + + rr:predicateObjectMap + [ + rr:predicate ex:firstName ; + rr:objectMap [ rr:column "FirstName" ] + ]; + + rr:predicateObjectMap + [ + rr:predicate ex:lastName ; + rr:objectMap [ rr:column "LastName" ] + ] + . + + + a rr:TriplesMap; + + rr:logicalTable [ rr:tableName "Sport"; ] ; + + rr:subjectMap [ + rr:template "http://example.com/sport/{ID}"; + rr:class ex:Sport; + ]; + + rr:predicateObjectMap + [ + rr:predicate ex:id ; + rr:objectMap [ rr:column "ID"; ] + ]; + + rr:predicateObjectMap + [ + rr:predicate ex:description ; + rr:objectMap [ rr:column "Description" ] + ] + . + + + a rr:TriplesMap; + + rr:logicalTable [ rr:tableName "Student_Sport" ]; + + rr:subjectMap [ rr:template "http://example.com/student/{ID_Student}" ]; + + rr:predicateObjectMap [ + rr:predicate ex:plays ; + rr:objectMap [ rr:template "http://example.com/sport/{ID_Sport}" ]; + ]. diff --git a/examples/ex2_1.ipynb b/examples/ex2_1.ipynb index 8d074ce..4e224df 100644 --- a/examples/ex2_1.ipynb +++ b/examples/ex2_1.ipynb @@ -7,7 +7,8 @@ "outputs": [], "source": [ "# for use in tutorial and development; do not include this `sys.path` change in production:\n", - "import sys ; sys.path.insert(0, \"../\")" + "import sys ; sys.path.insert(0, \"../\")\n", + "import os" ] }, { @@ -23,7 +24,7 @@ "source": [ "# Load data via Morph-KGC\n", "\n", - "> [`morph-kgc`](https://github.com/oeg-upm/morph-kgc) is an engine that constructs RDF knowledge graphs from heterogeneous data sources with R2RML and RML mapping languages. Morph-KGC is built on top of pandas and it leverages mapping partitions to significantly reduce execution times and memory consumption for large data sources.\n", + "> [`morph-kgc`](https://github.com/oeg-upm/morph-kgc) is an engine that constructs RDF knowledge graphs from heterogeneous data sources with [R2RML](https://www.w3.org/2001/sw/rdb2rdf/r2rml/) and [RML](https://rml.io/specs/rml/) mapping languages. Morph-KGC is built on top of pandas and it leverages mapping partitions to significantly reduce execution times and memory consumption for large data sources.\n", "\n", "For documentation see " ] @@ -32,7 +33,55 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "First, let's set up our recipe KG:" + "This example uses a simple SQLite database with students and sports and transforms it to an RDF knowledge graph using an R2RML mapping.\n", + "\n", + "First, let's visualize the sample database." + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "CREATE TABLE \"Student\" (\n", + " \"ID\" integer PRIMARY KEY,\n", + " \"FirstName\" varchar(50),\n", + " \"LastName\" varchar(50)\n", + ");\n", + "\n", + "CREATE TABLE \"Sport\" (\n", + " \"ID\" integer PRIMARY KEY,\n", + " \"Description\" varchar(50)\n", + ");\n", + "\n", + "CREATE TABLE \"Student_Sport\" (\n", + " \"ID_Student\" integer,\n", + " \"ID_Sport\" integer,\n", + " PRIMARY KEY (\"ID_Student\",\"ID_Sport\"),\n", + " FOREIGN KEY (\"ID_Student\") REFERENCES \"Student\"(\"ID\"),\n", + " FOREIGN KEY (\"ID_Sport\") REFERENCES \"Sport\"(\"ID\")\n", + ");\n", + "\n", + "INSERT INTO \"Student\" (\"ID\",\"FirstName\",\"LastName\") VALUES (10,'Venus', 'Williams');\n", + "INSERT INTO \"Student\" (\"ID\",\"FirstName\",\"LastName\") VALUES (11,'Fernando', 'Alonso');\n", + "INSERT INTO \"Student\" (\"ID\",\"FirstName\",\"LastName\") VALUES (12,'David', 'Villa');\n", + "\n", + "INSERT INTO \"Sport\" (\"ID\", \"Description\") VALUES (110,'Tennis');\n", + "INSERT INTO \"Sport\" (\"ID\", \"Description\") VALUES (111,'Football');\n", + "INSERT INTO \"Sport\" (\"ID\", \"Description\") VALUES (112,'Formula1');\n", + "\n", + "INSERT INTO \"Student_Sport\" (\"ID_Student\", \"ID_Sport\") VALUES (10,110);\n", + "INSERT INTO \"Student_Sport\" (\"ID_Student\", \"ID_Sport\") VALUES (11,111);\n", + "INSERT INTO \"Student_Sport\" (\"ID_Student\", \"ID_Sport\") VALUES (11,112);\n", + "INSERT INTO \"Student_Sport\" (\"ID_Student\", \"ID_Sport\") VALUES (12,111);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can see that it contains 3 tables, and some data for them.\n", + "\n", + "`Morph-KGC` is configured via a `config.ini` file. Let's create a basic one for our example." ] }, { @@ -41,100 +90,120 @@ "metadata": {}, "outputs": [], "source": [ - "from icecream import ic\n", - "from os.path import dirname\n", - "import kglab\n", - "import os\n", - "\n", - "namespaces = {\n", - " \"nom\": \"http://example.org/#\",\n", - " \"wtm\": \"http://purl.org/heals/food/\",\n", - " \"ind\": \"http://purl.org/heals/ingredient/\",\n", - " \"skos\": \"http://www.w3.org/2004/02/skos/core#\",\n", - " }\n", + "config = f\"\"\"\n", + " [StudentSportDB]\n", + " mappings={os.path.dirname(os.getcwd())}/dat/student_sport.r2rml.ttl\n", + " db_url=sqlite:///{os.path.dirname(os.getcwd())}/dat/student_sport.db\n", + " \"\"\"\n", "\n", - "kg = kglab.KnowledgeGraph(\n", - " name = \"A recipe KG example based on Food.com\",\n", - " base_uri = \"https://www.food.com/recipe/\",\n", - " namespaces = namespaces,\n", - " )" + "# it is also possible to provide a path to the config file:\n", + "# config = 'path/to/config.ini'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Now let's use `morph-kgc` to load a file, based on a given _configuration_ for the RML transform.\n", - "\n", - "Data can be loaded from multiple text formats, and also through different ORMs (e.g., `SQLAlchemy`), via a config file with extension `.ini`" + "You can see how to create this config file in the [docs](https://github.com/oeg-upm/Morph-KGC/wiki/Configuration)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's use `morph-kgc` to load the RDF data from the SQLite, based on the the `config.ini` and an R2RML mapping." ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "ic| dirname(os.getcwd()): '/Users/paco/src/kglab'\n", - "ic| config_path: '/Users/paco/src/kglab/dat/morph-default-config.ini'\n" + "INFO | 2022-02-25 16:50:14,340 | 7 mapping rules retrieved.\n", + "INFO | 2022-02-25 16:50:14,358 | Mapping partition with 1 groups generated.\n", + "INFO | 2022-02-25 16:50:14,359 | Maximum number of rules within mapping group: 7.\n", + "INFO | 2022-02-25 16:50:14,361 | Mappings processed in 0.778 seconds.\n", + "INFO | 2022-02-25 16:50:14,455 | Number of triples generated in total: 22.\n" ] }, { - "ename": "FileNotFoundError", - "evalue": "[Errno 2] No such file or directory: './dat/recipes.ttl'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m/var/folders/dp/q971mmvs2m98ypxb3sb0xmxc0000gn/T/ipykernel_45097/3189966681.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0mic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconfig_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m \u001b[0mkg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmaterialize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconfig_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m~/src/kglab/kglab/kglab.py\u001b[0m in \u001b[0;36mmaterialize\u001b[0;34m(self, config)\u001b[0m\n\u001b[1;32m 904\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_g\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 905\u001b[0m \u001b[0;31m# generate the triples and load them to an RDFlib graph\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 906\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_g\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmorph_kgc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmaterialize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 907\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 908\u001b[0m \u001b[0;31m# merge\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/src/subgraph/venv/lib/python3.7/site-packages/morph_kgc/__init__.py\u001b[0m in \u001b[0;36mmaterialize\u001b[0;34m(config)\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0msetup_oracle\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 26\u001b[0;31m \u001b[0mmappings_df\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mretrieve_mappings\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 27\u001b[0m \u001b[0msubject_maps_df\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_subject_maps\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmappings_df\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 28\u001b[0m \u001b[0mmapping_partitions\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mgroup\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgroup\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmappings_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mby\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'mapping_partition'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/src/subgraph/venv/lib/python3.7/site-packages/morph_kgc/engine.py\u001b[0m in \u001b[0;36mretrieve_mappings\u001b[0;34m(config)\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 28\u001b[0;31m \u001b[0mmappings\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmappings_parser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse_mappings\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 29\u001b[0m \u001b[0mlogging\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Mappings processed in '\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mget_delta_time\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstart_time\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m' seconds.'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 30\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/src/subgraph/venv/lib/python3.7/site-packages/morph_kgc/mapping/mapping_parser.py\u001b[0m in \u001b[0;36mparse_mappings\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 293\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mparse_mappings\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 295\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_from_r2_rml\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 296\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_preprocess_mappings\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 297\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_infer_datatypes\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/src/subgraph/venv/lib/python3.7/site-packages/morph_kgc/mapping/mapping_parser.py\u001b[0m in \u001b[0;36m_get_from_r2_rml\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 323\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 324\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0msection_name\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_data_sources_sections\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 325\u001b[0;31m \u001b[0mdata_source_mappings_df\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_parse_data_source_mapping_files\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msection_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 326\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmappings_df\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmappings_df\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata_source_mappings_df\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 327\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/src/subgraph/venv/lib/python3.7/site-packages/morph_kgc/mapping/mapping_parser.py\u001b[0m in \u001b[0;36m_parse_data_source_mapping_files\u001b[0;34m(self, section_name)\u001b[0m\n\u001b[1;32m 339\u001b[0m \u001b[0mmapping_graph\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrdflib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mGraph\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 340\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 341\u001b[0;31m \u001b[0mmapping_file_paths\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_mappings_files\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msection_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 342\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 343\u001b[0m \u001b[0;31m# load mapping rules to the graph\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/src/subgraph/venv/lib/python3.7/site-packages/morph_kgc/config.py\u001b[0m in \u001b[0;36mget_mappings_files\u001b[0;34m(self, source_section)\u001b[0m\n\u001b[1;32m 337\u001b[0m \u001b[0mmapping_file_paths\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmapping_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 338\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 339\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mFileNotFoundError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merrno\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mENOENT\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrerror\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merrno\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mENOENT\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmapping_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 340\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 341\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mmapping_file_paths\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: './dat/recipes.ttl'" - ] + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "#data_path = dirname(os.getcwd()) + \"/dat/recipes.ttl\"\n", - "config_path = dirname(os.getcwd()) + \"/dat/morph-default-config.ini\"\n", - "\n", - "ic(dirname(os.getcwd()))\n", + "from icecream import ic\n", + "import icecream\n", + "import kglab\n", "\n", - "# config = f\"\"\"[DataSource1]\n", - "# mappings={datapath}\"\"\"\n", + "namespaces = {\n", + " \"ex\": \"http://example.com/\",\n", + " }\n", "\n", - "ic(config_path)\n", + "kg = kglab.KnowledgeGraph(\n", + " name = \"A KG example with students and sports\",\n", + " namespaces = namespaces,\n", + " )\n", "\n", - "kg.materialize(config_path)" + "kg.materialize(config)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Data can be loaded from multiple text formats (e.g. CSV, JSON, XML, Parquet), and also through different relational DBMS (PostgresSQL, MySQL, Oracle, Microsoft SQL Server, MariaDB)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Let's try to query." + "Now let's try to query!" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ic| row.asdict(): {'sport_desc': rdflib.term.Literal('Formula1'),\n", + " 'student_name': rdflib.term.Literal('Fernando')}\n", + "ic| row.asdict(): {'sport_desc': rdflib.term.Literal('Football'),\n", + " 'student_name': rdflib.term.Literal('Fernando')}\n", + "ic| row.asdict(): {'sport_desc': rdflib.term.Literal('Tennis'),\n", + " 'student_name': rdflib.term.Literal('Venus')}\n", + "ic| row.asdict(): {'sport_desc': rdflib.term.Literal('Football'),\n", + " 'student_name': rdflib.term.Literal('David')}\n" + ] + } + ], "source": [ "sparql = \"\"\"\n", - " SELECT ?subject ?object\n", + " PREFIX ex: \n", + "\n", + " SELECT ?student_name ?sport_desc\n", " WHERE {\n", - " ?subject rdf:type wtm:Recipe .\n", - " ?subject wtm:hasIngredient ?object .\n", + " ?student rdf:type ex:Student .\n", + " ?student ex:firstName ?student_name .\n", + " ?student ex:plays ?sport .\n", + " ?sport ex:description ?sport_desc\n", " }\n", " \"\"\"\n", "\n", @@ -159,7 +228,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.9" + "version": "3.8.10" } }, "nbformat": 4, diff --git a/kglab/kglab.py b/kglab/kglab.py index 50fc94a..e71893e 100644 --- a/kglab/kglab.py +++ b/kglab/kglab.py @@ -896,7 +896,7 @@ def materialize ( Binding to the morph-kgc `materialize()` method. config: -path to a morph-kgc configuration file; see +morph-kgc configuration, it can be the path to the config file, or a string with the config; see returns: this `KnowledgeGraph` object – used for method chaining diff --git a/requirements.txt b/requirements.txt index d9ce5a7..77b0825 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,6 +7,7 @@ gcsfs >= 0.7.1 gensim >= 3.8.3 icecream >= 2.1 matplotlib >= 3.3.4 +morph-kgc >= 1.6.0 networkx >= 2.6 numpy >= 1.19.2 owlrl >= 6.0.2 From 704d14755d27f7c55da92bf434d5ca03474fcd63 Mon Sep 17 00:00:00 2001 From: Paco Nathan Date: Sun, 27 Feb 2022 12:44:49 -0800 Subject: [PATCH 5/7] clean to prep for release --- README.md | 1 + changelog.txt | 4 +- docs/ack.md | 7 +- docs/community.md | 11 +- docs/ref.md | 290 +++++++++++++++++++++++-------------------- docs/stub.ipynb | 32 ----- examples/ex2_1.ipynb | 103 +++++++-------- mkdocs.yml | 3 +- requirements-dev.txt | 2 +- 9 files changed, 209 insertions(+), 244 deletions(-) delete mode 100644 docs/stub.ipynb diff --git a/README.md b/README.md index 9445f77..366e099 100644 --- a/README.md +++ b/README.md @@ -179,6 +179,7 @@ and to our contributors: [@louisguitton](https://github.com/louisguitton), [@tomaarsen](https://github.com/tomaarsen), [@Mec-iS](https://github.com/Mec-iS), +[@ArenasGuerreroJulian](https://github.com/ArenasGuerreroJulian), [@fils](https://github.com/fils), [@gauravjaglan](https://github.com/gauravjaglan), [@pebbie](https://github.com/pebbie), diff --git a/changelog.txt b/changelog.txt index b475625..3e2ae27 100644 --- a/changelog.txt +++ b/changelog.txt @@ -2,9 +2,9 @@ ## 0.4.4 -2022-02-24 +2022-02-27 - * integration of `morph-kgc`; kudos @Mec-iS and many thanks to @ArenasGuerreroJulian + * integration of `morph-kgc`; kudos @Mec-iS and @ArenasGuerreroJulian ## 0.4.3 diff --git a/docs/ack.md b/docs/ack.md index bf21e72..819fab9 100644 --- a/docs/ack.md +++ b/docs/ack.md @@ -11,6 +11,7 @@ and to our contributors: [@louisguitton](https://github.com/louisguitton), [@tomaarsen](https://github.com/tomaarsen), [@Mec-iS](https://github.com/Mec-iS), +[@ArenasGuerreroJulian](https://github.com/ArenasGuerreroJulian), [@fils](https://github.com/fils), [@gauravjaglan](https://github.com/gauravjaglan), [@pebbie](https://github.com/pebbie), @@ -105,7 +106,7 @@ Source code for **kglab** plus its logo, documentation, and examples have an [MIT license](https://spdx.org/licenses/MIT.html) which is succinct and simplifies use in commercial applications. -All materials herein are Copyright © 2020-2021 Derwen, Inc. +All materials herein are Copyright © 2020-2022 Derwen, Inc. [![logo for Derwen, Inc.](https://derwen.ai/static/block_logo.png)](https://derwen.ai/) @@ -134,3 +135,7 @@ See also: * [zincbase](https://github.com/complexdb/zincbase) * *pro:* probabilistic graph measures, complex simulation suite, leverages GPUs * *con:* lacks interchange with RDF or other standard formats + +In general, check + for excellent +curated listings of open source semantic technologies in Python. diff --git a/docs/community.md b/docs/community.md index de5caba..d0c5bc9 100644 --- a/docs/community.md +++ b/docs/community.md @@ -13,8 +13,9 @@ source project. Links for open source community resources: * [Issue Tracker](https://github.com/DerwenAI/kglab/issues) for discussions and open source governance - * ["Graph-Based Data Science"](https://derwen.ai/s/kcgh) talk which gets updated along with the code - * [*Graph-Based Data Science*](https://www.linkedin.com/groups/6725785/) group on LinkedIn – join to receive related updates, news, conference coupons, etc. + * ["Graph Data Science"](https://derwen.ai/s/kcgh) talk which gets updated along with the code + * [*Graph Data Science*](https://www.linkedin.com/groups/6725785/) group on LinkedIn – join to receive related updates, news, conference coupons, etc. + * ["Graph Thinking"](https://medium.com/knowledge-technologies/graph-thinking-5e9c85134ab0) with Jürgen Müller The [Knowledge Graph Conference](glossary/#knowledge-graph-conference) hosts several community resources where you can post questions and @@ -25,12 +26,6 @@ topics. * [community Slack](https://knowledgegraphconf.slack.com/ssb/redirect) – specifically on the `#ask` channel * [Knowledge Tech Q&A site](https://answers.knowledgegraph.tech/) for extended discussions -[KGC](glossary/#knowledge-graph-conference) -also hosts -[monthly office hours](https://www.notion.so/KG-Community-Events-Calendar-8aacbe22efa94d9b8b39b7288e22c2d3) -with [Paco Nathan](ack/#project-lead) -and others involved in this open source project. - ## Project Feedback and Roadmap diff --git a/docs/ref.md b/docs/ref.md index 7f38161..b737948 100644 --- a/docs/ref.md +++ b/docs/ref.md @@ -16,7 +16,7 @@ Core feature areas include: --- #### [`__init__` method](#kglab.KnowledgeGraph.__init__) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L70) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L81) ```python __init__(name="generic", base_uri=None, language="en", use_gpus=True, import_graph=None, namespaces=None) @@ -45,7 +45,7 @@ a dictionary of [*namespace*](https://rdflib.readthedocs.io/en/stable/apidocs/rd --- #### [`rdf_graph` method](#kglab.KnowledgeGraph.rdf_graph) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L129) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L140) ```python rdf_graph() @@ -59,7 +59,7 @@ the [`rdflib.Graph`](https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html --- #### [`add_ns` method](#kglab.KnowledgeGraph.add_ns) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L154) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L165) ```python add_ns(prefix, iri, override=True, replace=False) @@ -84,7 +84,7 @@ replace any existing prefix with the new namespace --- #### [`get_ns` method](#kglab.KnowledgeGraph.get_ns) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L198) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L209) ```python get_ns(prefix) @@ -101,7 +101,7 @@ the RDFlib [`Namespace`](https://rdflib.readthedocs.io/en/stable/apidocs/rdflib. --- #### [`get_ns_dict` method](#kglab.KnowledgeGraph.get_ns_dict) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L214) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L225) ```python get_ns_dict() @@ -115,7 +115,7 @@ a `dict` describing the namespaces in this RDF graph --- #### [`describe_ns` method](#kglab.KnowledgeGraph.describe_ns) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L236) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L247) ```python describe_ns() @@ -129,7 +129,7 @@ a [`pandas.DataFrame`](https://pandas.pydata.org/pandas-docs/stable/reference/ap --- #### [`get_context` method](#kglab.KnowledgeGraph.get_context) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L263) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L274) ```python get_context() @@ -144,7 +144,7 @@ context needed for JSON-LD serialization --- #### [`encode_date` method](#kglab.KnowledgeGraph.encode_date) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L282) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L293) ```python encode_date(dt, tzinfos) @@ -164,7 +164,7 @@ timezones as a dict, used by --- #### [`add` method](#kglab.KnowledgeGraph.add) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L304) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L315) ```python add(s, p, o) @@ -174,20 +174,20 @@ Uses the RDF Graph as its context. To prepare for upcoming **kglab** features, **this is the preferred method for adding relations to an RDF graph.** - * `s` : `typing.Union[rdflib.term.URIRef, rdflib.term.Literal, rdflib.term.BNode]` + * `s` : `typing.Union[rdflib.term.Node, rdflib.term.URIRef, rdflib.term.Literal, rdflib.term.BNode]` *subject* node; - * `p` : `typing.Union[rdflib.term.URIRef, rdflib.term.Literal, rdflib.term.BNode]` + * `p` : `typing.Union[rdflib.term.Node, rdflib.term.URIRef, rdflib.term.Literal, rdflib.term.BNode]` *predicate* relation; - * `o` : `typing.Union[rdflib.term.URIRef, rdflib.term.Literal, rdflib.term.BNode]` + * `o` : `typing.Union[rdflib.term.Node, rdflib.term.URIRef, rdflib.term.Literal, rdflib.term.BNode]` *object* node; --- #### [`remove` method](#kglab.KnowledgeGraph.remove) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L338) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L349) ```python remove(s, p, o) @@ -197,20 +197,20 @@ Uses the RDF Graph as its context. To prepare for upcoming **kglab** features, **this is the preferred method for removing relations from an RDF graph.** - * `s` : `typing.Union[rdflib.term.URIRef, rdflib.term.Literal, rdflib.term.BNode]` + * `s` : `typing.Union[rdflib.term.Node, rdflib.term.URIRef, rdflib.term.Literal, rdflib.term.BNode]` *subject* node; - * `p` : `typing.Union[rdflib.term.URIRef, rdflib.term.Literal, rdflib.term.BNode]` + * `p` : `typing.Union[rdflib.term.Node, rdflib.term.URIRef, rdflib.term.Literal, rdflib.term.BNode]` *predicate* relation; - * `o` : `typing.Union[rdflib.term.URIRef, rdflib.term.Literal, rdflib.term.BNode]` + * `o` : `typing.Union[rdflib.term.Node, rdflib.term.URIRef, rdflib.term.Literal, rdflib.term.BNode]` *object* node; --- #### [`load_rdf` method](#kglab.KnowledgeGraph.load_rdf) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/decorators.py#L57) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/decorators.py#L61) ```python load_rdf(path, format="ttl", base=None, **args) @@ -237,7 +237,7 @@ this `KnowledgeGraph` object – used for method chaining --- #### [`load_rdf_text` method](#kglab.KnowledgeGraph.load_rdf_text) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L530) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L541) ```python load_rdf_text(data, format="ttl", base=None, **args) @@ -263,7 +263,7 @@ this `KnowledgeGraph` object – used for method chaining --- #### [`save_rdf` method](#kglab.KnowledgeGraph.save_rdf) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L573) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L584) ```python save_rdf(path, format="ttl", base=None, encoding="utf-8", **args) @@ -287,7 +287,7 @@ optional text encoding value, defaults to `"utf-8"`, must be in the [Python code --- #### [`save_rdf_text` method](#kglab.KnowledgeGraph.save_rdf_text) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L638) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L649) ```python save_rdf_text(format="ttl", base=None, encoding="utf-8", **args) @@ -310,7 +310,7 @@ text representing the RDF graph --- #### [`load_jsonld` method](#kglab.KnowledgeGraph.load_jsonld) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/decorators.py#L57) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/decorators.py#L61) ```python load_jsonld(path, encoding="utf-8", **args) @@ -333,7 +333,7 @@ this `KnowledgeGraph` object – used for method chaining --- #### [`save_jsonld` method](#kglab.KnowledgeGraph.save_jsonld) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L725) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L736) ```python save_jsonld(path, encoding="utf-8", **args) @@ -351,7 +351,7 @@ optional text encoding value, which defaults to `"utf-8"`; must be in the [Pytho --- #### [`load_parquet` method](#kglab.KnowledgeGraph.load_parquet) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/decorators.py#L57) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/decorators.py#L61) ```python load_parquet(path, **kwargs) @@ -373,7 +373,7 @@ this `KnowledgeGraph` object – used for method chaining --- #### [`save_parquet` method](#kglab.KnowledgeGraph.save_parquet) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L811) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L825) ```python save_parquet(path, compression="snappy", storage_options=None, **kwargs) @@ -396,7 +396,7 @@ extra options parsed by [`fsspec`](https://github.com/intake/filesystem_spec) fo --- #### [`load_csv` method](#kglab.KnowledgeGraph.load_csv) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L856) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L870) ```python load_csv(url) @@ -411,9 +411,26 @@ this `KnowledgeGraph` object – used for method chaining +--- +#### [`materialize` method](#kglab.KnowledgeGraph.materialize) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L891) + +```python +materialize(config) +``` +Binding to the morph-kgc `materialize()` method. + + * `config` : `str` +morph-kgc configuration, it can be the path to the config file, or a string with the config; see + + * *returns* : `KnowledgeGraph` +this `KnowledgeGraph` object – used for method chaining + + + --- #### [`import_roam` method](#kglab.KnowledgeGraph.import_roam) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/decorators.py#L57) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/decorators.py#L61) ```python import_roam(path, encoding="utf-8") @@ -439,49 +456,9 @@ a list of identifiers for the top-level nodes added from the Roam Research graph ---- -#### [`n3fy` method](#kglab.KnowledgeGraph.n3fy) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L976) - -```python -n3fy(node, pythonify=True) -``` -Wrapper for RDFlib [`n3()`](https://rdflib.readthedocs.io/en/stable/utilities.html?highlight=n3#serializing-a-single-term-to-n3) and [`toPython()`](https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html?highlight=toPython#rdflib.Variable.toPython) to serialize a node into a human-readable representation using N3 format. - - * `node` : `typing.Union[rdflib.term.URIRef, rdflib.term.Literal, rdflib.term.BNode]` -must be a [`rdflib.term.Node`](https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html?highlight=Node#rdflib.term.Node) - - * `pythonify` : `bool` -flag to force instances of [`rdflib.term.Literal`](https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html?highlight=Literal#rdflib.term.Identifier) to their Python literal representation - - * *returns* : `typing.Any` -text (or Python objects) for the serialized node - - - ---- -#### [`n3fy_row` method](#kglab.KnowledgeGraph.n3fy_row) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L1002) - -```python -n3fy_row(row_dict, pythonify=True) -``` -Wrapper for RDFlib [`n3()`](https://rdflib.readthedocs.io/en/stable/utilities.html?highlight=n3#serializing-a-single-term-to-n3) and [`toPython()`](https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html?highlight=toPython#rdflib.Variable.toPython) to serialize one row of a result set from a SPARQL query into a human-readable representation for each term using N3 format. - - * `row_dict` : `dict` -one row of a SPARQL query results, as a `dict` - - * `pythonify` : `bool` -flag to force instances of [`rdflib.term.Literal`](https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html?highlight=Literal#rdflib.term.Identifier) to their Python literal representation - - * *returns* : `dict` -a dictionary of serialized row bindings - - - --- #### [`query` method](#kglab.KnowledgeGraph.query) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L1031) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L1021) ```python query(sparql, bindings=None) @@ -501,7 +478,7 @@ initial variable bindings --- #### [`query_as_df` method](#kglab.KnowledgeGraph.query_as_df) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L1059) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L1049) ```python query_as_df(sparql, bindings=None, simplify=True, pythonify=True) @@ -527,7 +504,7 @@ the query result set represented as a [`pandas.DataFrame`](https://pandas.pydata --- #### [`visualize_query` method](#kglab.KnowledgeGraph.visualize_query) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L1103) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L1093) ```python visualize_query(sparql, notebook=False) @@ -545,9 +522,49 @@ PyVis network object, to be rendered +--- +#### [`n3fy` method](#kglab.KnowledgeGraph.n3fy) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L1114) + +```python +n3fy(node, pythonify=True) +``` +Wrapper for RDFlib [`n3()`](https://rdflib.readthedocs.io/en/stable/utilities.html?highlight=n3#serializing-a-single-term-to-n3) and [`toPython()`](https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html?highlight=toPython#rdflib.Variable.toPython) to serialize a node into a human-readable representation using N3 format. + + * `node` : `typing.Union[rdflib.term.Node, rdflib.term.URIRef, rdflib.term.Literal, rdflib.term.BNode]` +must be a [`rdflib.term.Node`](https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html?highlight=Node#rdflib.term.Node) + + * `pythonify` : `bool` +flag to force instances of [`rdflib.term.Literal`](https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html?highlight=Literal#rdflib.term.Identifier) to their Python literal representation + + * *returns* : `typing.Any` +text (or Python objects) for the serialized node + + + +--- +#### [`n3fy_row` method](#kglab.KnowledgeGraph.n3fy_row) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L1140) + +```python +n3fy_row(row_dict, pythonify=True) +``` +Wrapper for RDFlib [`n3()`](https://rdflib.readthedocs.io/en/stable/utilities.html?highlight=n3#serializing-a-single-term-to-n3) and [`toPython()`](https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html?highlight=toPython#rdflib.Variable.toPython) to serialize one row of a result set from a SPARQL query into a human-readable representation for each term using N3 format. + + * `row_dict` : `dict` +one row of a SPARQL query results, as a `dict` + + * `pythonify` : `bool` +flag to force instances of [`rdflib.term.Literal`](https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html?highlight=Literal#rdflib.term.Identifier) to their Python literal representation + + * *returns* : `dict` +a dictionary of serialized row bindings + + + --- #### [`validate` method](#kglab.KnowledgeGraph.validate) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L1127) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L1169) ```python validate(shacl_graph=None, shacl_graph_format=None, ont_graph=None, ont_graph_format=None, advanced=False, inference=None, inplace=True, abort_on_first=None, **kwargs) @@ -584,7 +601,7 @@ a tuple of `conforms` (RDF graph passes the validation rules) + `report_graph` ( --- #### [`infer_owlrl_closure` method](#kglab.KnowledgeGraph.infer_owlrl_closure) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L1205) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/kglab.py#L1247) ```python infer_owlrl_closure() @@ -597,7 +614,7 @@ See --- #### [`__init__` method](#kglab.GPViz.__init__) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/gpviz.py#L56) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/gpviz.py#L61) ```python __init__(sparql, namespaces) @@ -1496,7 +1513,7 @@ the namespaces for the corresponding RDF graph --- #### [`visualize_query` method](#kglab.GPViz.visualize_query) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/gpviz.py#L355) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/gpviz.py#L360) ```python visualize_query(notebook=False) @@ -1512,7 +1529,7 @@ PyVis graph to be rendered ## [module functions](#kglab) --- #### [`calc_quantile_bins` function](#kglab.calc_quantile_bins) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/util.py#L47) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/util.py#L51) ```python calc_quantile_bins(num_rows) @@ -1529,26 +1546,23 @@ the calculated bins, as a [`numpy.ndarray`](https://numpy.org/doc/stable/referen --- #### [`get_gpu_count` function](#kglab.get_gpu_count) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/util.py#L13) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/util.py#L17) ```python get_gpu_count() ``` Special handling for detecting GPU availability: an approach -recommended by the NVIDIA RAPIDS engineering team, since `nvml` +recommended by the NVidia RAPIDS engineering team, since `nvml` bindings are difficult for Python libraries to keep updated. -This has the side-effect of importing the `cuDF` library, when -GPUs are available. - * *returns* : `int` -count of available GPUs +count of available GPUs, where `0` means none or disabled. --- #### [`import_from_neo4j` function](#kglab.import_from_neo4j) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/external_import.py#L21) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/external_import.py#L19) ```python import_from_neo4j(username, password, dbname, host="localhost", port="7474") @@ -1582,7 +1596,7 @@ an [`rdflib.Graph`](https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html? --- #### [`root_mean_square` function](#kglab.root_mean_square) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/util.py#L100) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/util.py#L104) ```python root_mean_square(values) @@ -1599,7 +1613,7 @@ RMS metric as a float --- #### [`stripe_column` function](#kglab.stripe_column) -[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/util.py#L63) +[*\[source\]*](https://github.com/DerwenAI/kglab/blob/main/kglab/util.py#L67) ```python stripe_column(values, bins, use_gpus=False) @@ -1629,7 +1643,7 @@ Census_Dyad_Tally = typing.Tuple[pandas.core.frame.DataFrame, dict] #### [`Census_Item` type](#kglab.Census_Item) ```python -Census_Item = typing.Union[str, rdflib.term.URIRef, rdflib.term.Literal, rdflib.term.BNode] +Census_Item = typing.Union[str, rdflib.term.Node, rdflib.term.URIRef, rdflib.term.Literal, rdflib.term.BNode] ``` #### [`EvoShapeBoard` type](#kglab.EvoShapeBoard) @@ -1654,7 +1668,7 @@ IOPathLike = typing.Union[str, pathlib.Path, urlpath.URL, typing.IO] #### [`NodeLike` type](#kglab.NodeLike) ```python -NodeLike = typing.Union[str, NoneType, rdflib.term.URIRef, rdflib.term.Literal, rdflib.term.BNode] +NodeLike = typing.Union[str, NoneType, rdflib.term.Node, rdflib.term.URIRef, rdflib.term.Literal, rdflib.term.BNode] ``` #### [`PathLike` type](#kglab.PathLike) @@ -1664,12 +1678,12 @@ PathLike = typing.Union[str, pathlib.Path, urlpath.URL] #### [`RDF_Node` type](#kglab.RDF_Node) ```python -RDF_Node = typing.Union[rdflib.term.URIRef, rdflib.term.Literal, rdflib.term.BNode] +RDF_Node = typing.Union[rdflib.term.Node, rdflib.term.URIRef, rdflib.term.Literal, rdflib.term.BNode] ``` #### [`RDF_Triple` type](#kglab.RDF_Triple) ```python -RDF_Triple = typing.Tuple[typing.Union[rdflib.term.URIRef, rdflib.term.Literal, rdflib.term.BNode], typing.Union[rdflib.term.URIRef, rdflib.term.Literal, rdflib.term.BNode], typing.Union[rdflib.term.URIRef, rdflib.term.Literal, rdflib.term.BNode]] +RDF_Triple = typing.Tuple[typing.Union[rdflib.term.Node, rdflib.term.URIRef, rdflib.term.Literal, rdflib.term.BNode], typing.Union[rdflib.term.Node, rdflib.term.URIRef, rdflib.term.Literal, rdflib.term.BNode], typing.Union[rdflib.term.Node, rdflib.term.URIRef, rdflib.term.Literal, rdflib.term.BNode]] ``` #### [`SPARQL_Bindings` type](#kglab.SPARQL_Bindings) diff --git a/docs/stub.ipynb b/docs/stub.ipynb deleted file mode 100644 index ea0c05b..0000000 --- a/docs/stub.ipynb +++ /dev/null @@ -1,32 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "this is a placeholder" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/examples/ex2_1.ipynb b/examples/ex2_1.ipynb index 4e224df..ecda3a9 100644 --- a/examples/ex2_1.ipynb +++ b/examples/ex2_1.ipynb @@ -7,15 +7,7 @@ "outputs": [], "source": [ "# for use in tutorial and development; do not include this `sys.path` change in production:\n", - "import sys ; sys.path.insert(0, \"../\")\n", - "import os" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**WIP** during integration" + "import sys ; sys.path.insert(0, \"../\")" ] }, { @@ -33,9 +25,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This example uses a simple SQLite database with students and sports and transforms it to an RDF knowledge graph using an R2RML mapping.\n", + "This example uses a simple SQLite database as input, transforming it into an RDF knowledge graph based on an R2RML mapping for relations between \"students\" and \"sports\".\n", "\n", - "First, let's visualize the sample database." + "First, let's visualize the sample database:" ] }, { @@ -79,9 +71,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We can see that it contains 3 tables, and some data for them.\n", + "This has three tables plus the data to populate them.\n", "\n", - "`Morph-KGC` is configured via a `config.ini` file. Let's create a basic one for our example." + "`Morph-KGC` needs a configuration to describe the mapping, so let's create a basic one for our example:" ] }, { @@ -90,28 +82,32 @@ "metadata": {}, "outputs": [], "source": [ - "config = f\"\"\"\n", - " [StudentSportDB]\n", - " mappings={os.path.dirname(os.getcwd())}/dat/student_sport.r2rml.ttl\n", - " db_url=sqlite:///{os.path.dirname(os.getcwd())}/dat/student_sport.db\n", - " \"\"\"\n", + "import os\n", "\n", - "# it is also possible to provide a path to the config file:\n", - "# config = 'path/to/config.ini'" + "config = f\"\"\"\n", + "[StudentSportDB]\n", + "mappings={os.path.dirname(os.getcwd())}/dat/student_sport.r2rml.ttl\n", + "db_url=sqlite:///{os.path.dirname(os.getcwd())}/dat/student_sport.db\n", + " \"\"\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "You can see how to create this config file in the [docs](https://github.com/oeg-upm/Morph-KGC/wiki/Configuration)." + "You can see how to create this config file in the [docs](https://github.com/oeg-upm/Morph-KGC/wiki/Configuration).\n", + "\n", + "Alternatively, you provide a path to a config file, for example:\n", + "```\n", + "config = \"path/to/config.ini\"\n", + "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Now let's use `morph-kgc` to load the RDF data from the SQLite, based on the the `config.ini` and an R2RML mapping." + "Next we'll use `morph-kgc` to load the RDF data from the SQLite based on an R2RML mapping:" ] }, { @@ -123,27 +119,16 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO | 2022-02-25 16:50:14,340 | 7 mapping rules retrieved.\n", - "INFO | 2022-02-25 16:50:14,358 | Mapping partition with 1 groups generated.\n", - "INFO | 2022-02-25 16:50:14,359 | Maximum number of rules within mapping group: 7.\n", - "INFO | 2022-02-25 16:50:14,361 | Mappings processed in 0.778 seconds.\n", - "INFO | 2022-02-25 16:50:14,455 | Number of triples generated in total: 22.\n" + "INFO | 2022-02-27 12:15:21,403 | 7 mapping rules retrieved.\n", + "INFO | 2022-02-27 12:15:21,418 | Mapping partition with 1 groups generated.\n", + "INFO | 2022-02-27 12:15:21,419 | Maximum number of rules within mapping group: 7.\n", + "INFO | 2022-02-27 12:15:21,420 | Mappings processed in 1.739 seconds.\n", + "INFO | 2022-02-27 12:15:21,523 | Number of triples generated in total: 22.\n" ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ "from icecream import ic\n", - "import icecream\n", "import kglab\n", "\n", "namespaces = {\n", @@ -155,14 +140,14 @@ " namespaces = namespaces,\n", " )\n", "\n", - "kg.materialize(config)" + "kg.materialize(config);" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Data can be loaded from multiple text formats (e.g. CSV, JSON, XML, Parquet), and also through different relational DBMS (PostgresSQL, MySQL, Oracle, Microsoft SQL Server, MariaDB)." + "Data can be loaded from multiple text formats, e.g. CSV, JSON, XML, Parquet, and also through different relational DBMS such as PostgresSQL, MySQL, Oracle, Microsoft SQL Server, MariaDB, and so on." ] }, { @@ -183,38 +168,36 @@ "name": "stderr", "output_type": "stream", "text": [ - "ic| row.asdict(): {'sport_desc': rdflib.term.Literal('Formula1'),\n", - " 'student_name': rdflib.term.Literal('Fernando')}\n", - "ic| row.asdict(): {'sport_desc': rdflib.term.Literal('Football'),\n", - " 'student_name': rdflib.term.Literal('Fernando')}\n", - "ic| row.asdict(): {'sport_desc': rdflib.term.Literal('Tennis'),\n", - " 'student_name': rdflib.term.Literal('Venus')}\n", - "ic| row.asdict(): {'sport_desc': rdflib.term.Literal('Football'),\n", - " 'student_name': rdflib.term.Literal('David')}\n" + "ic| student_name: 'Venus', sport_desc: 'Tennis'\n", + "ic| student_name: 'David', sport_desc: 'Football'\n", + "ic| student_name: 'Fernando', sport_desc: 'Football'\n", + "ic| student_name: 'Fernando', sport_desc: 'Formula1'\n" ] } ], "source": [ "sparql = \"\"\"\n", - " PREFIX ex: \n", - "\n", - " SELECT ?student_name ?sport_desc\n", - " WHERE {\n", - " ?student rdf:type ex:Student .\n", - " ?student ex:firstName ?student_name .\n", - " ?student ex:plays ?sport .\n", - " ?sport ex:description ?sport_desc\n", - " }\n", + "PREFIX ex: \n", + "\n", + "SELECT ?student_name ?sport_desc\n", + "WHERE {\n", + " ?student rdf:type ex:Student .\n", + " ?student ex:firstName ?student_name .\n", + " ?student ex:plays ?sport .\n", + " ?sport ex:description ?sport_desc\n", + "}\n", " \"\"\"\n", "\n", "for row in kg._g.query(sparql):\n", - " ic(row.asdict())" + " student_name = kg.n3fy(row.student_name)\n", + " sport_desc = kg.n3fy(row.sport_desc)\n", + " ic(student_name, sport_desc)" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -228,7 +211,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.7.4" } }, "nbformat": 4, diff --git a/mkdocs.yml b/mkdocs.yml index baacef9..8b3307c 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -23,6 +23,7 @@ nav: - Build an RDF graph using `RDFlib`: ex1_0.md - Leverage the `kglab` abstraction layer: ex1_1.md - Build a medium size KG from a CSV dataset: ex2_0.md + - Using `morph-kgc` to input from relational databases, CSV, etc: ex2_1.md - Querying: - Run SPARQL queries: ex4_0.md - Validation: @@ -37,8 +38,6 @@ nav: - Statistical relational learning with `pslpython`: ex7_1.md - Embedding: - Vector embedding with `gensim`: ex8_0.md - - Topology: - - (TBD) Evolved Shape Prediction: stub.ipynb - Concepts: - What's a Knowledge Graph?: what.md diff --git a/requirements-dev.txt b/requirements-dev.txt index b805163..0e46fd5 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -9,7 +9,7 @@ mkdocs-git-revision-date-plugin mkdocs-material mknotebooks mypy -nbconvert +nbconvert >= 6.4 nbmake >= 1.0 notebook >= 6.1.5 pipdeptree From 21f42086c6e1bd7b9ef6d2535392c7ed9ccd60d8 Mon Sep 17 00:00:00 2001 From: Paco Nathan Date: Sun, 27 Feb 2022 12:52:07 -0800 Subject: [PATCH 6/7] notebooks are primarily used as tutorial examples, not unit tests per se --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 29d5c69..1e27e7a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,4 +26,4 @@ jobs: - uses: pre-commit/action@v2.0.2 - name: Test all notebooks in the examples/ folder can run run: | - docker exec -i kglab-notebooks bash -c 'pip install pytest nbmake && pytest --nbmake work/examples/*.ipynb' + docker exec -i kglab-notebooks bash -c 'python3 test.py' From 884a5897303417f10823e093393336859e3a74c4 Mon Sep 17 00:00:00 2001 From: Paco Nathan Date: Sun, 27 Feb 2022 13:04:55 -0800 Subject: [PATCH 7/7] no longer use notebooks in CI tests --- .github/workflows/ci.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1e27e7a..99a1041 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,6 +24,4 @@ jobs: run: docker logs kglab-notebooks - uses: pre-commit/action@v2.0.2 - - name: Test all notebooks in the examples/ folder can run - run: | - docker exec -i kglab-notebooks bash -c 'python3 test.py' + - name: TODO we no longer want to test notebooks, run unit test suite instead