From ff6d2e830f52110c850ffe3c986cb35589a23f20 Mon Sep 17 00:00:00 2001 From: Emerson Rocha Date: Sun, 12 Jun 2022 20:18:19 -0300 Subject: [PATCH] rdf+bcp47+hxl (#41), admin-l (#39), pcodes (#2): added draft of 999999999_54872.py --objectivum-formato=_temp_hxl_meta_in_json --- officina/999999999/0/999999999_54872.py | 53 +++++++++++++++++++++++-- officina/999999999/0/L999999999_0.py | 49 ++++++++++++++++++++++- 2 files changed, 97 insertions(+), 5 deletions(-) diff --git a/officina/999999999/0/999999999_54872.py b/officina/999999999/0/999999999_54872.py index 96a5bbb..05f2c96 100755 --- a/officina/999999999/0/999999999_54872.py +++ b/officina/999999999/0/999999999_54872.py @@ -45,6 +45,7 @@ from L999999999_0 import ( BCP47_AD_HXL, RDF_SPATIA_NOMINALIBUS_EXTRAS, + HXLHashtagSimplici, SetEncoder, bcp47_langtag, bcp47_rdf_extension_poc, @@ -274,6 +275,7 @@ def make_args(self, hxl_output=True): # 'application/x-ndjson', '_temp_bcp47', '_temp_bcp47_meta_in_json', + '_temp_hxl_meta_in_json', '_temp_header_hxl_to_bcp47', '_temp_header_bcp47_to_hxl', ], @@ -345,6 +347,26 @@ def make_args(self, hxl_output=True): default=None ) + parser.add_argument( + '--punctum-separato-de-fontem', + help='Character(s) used as separator from input file ' + + 'Used only for tabular results. ' + + 'Defaults to comma ","', + dest='fontem_separato', + default=",", + nargs='?' + ) + + parser.add_argument( + '--punctum-separato-de-resultatum', + help='Character(s) used as separator for generate output. ' + + 'Used only for tabular results. ' + + 'Defaults to tab "\t"', + dest='resultatum_separato', + default="\t", + nargs='?' + ) + parser.add_argument( # '--venandum-insectum-est, --debug', '--venandum-insectum-est', '--debug', @@ -375,6 +397,9 @@ def execute_cli(self, pyargs, stdin=STDIN, stdout=sys.stdout, _infile = None _stdin = True + resultatum_separato = pyargs.resultatum_separato + fontem_separato = pyargs.fontem_separato + # rdf_namespace_archivo if pyargs.rdf_namespace_archivo: rdf_namespaces_extras(pyargs.rdf_namespace_archivo) @@ -382,13 +407,31 @@ def execute_cli(self, pyargs, stdin=STDIN, stdout=sys.stdout, # pass # @TODO maybe refactor this temporary part - if pyargs.objectivum_formato == '_temp_bcp47_meta_in_json': + # if pyargs.objectivum_formato == '_temp_bcp47_meta_in_json': + if pyargs.objectivum_formato in [ + '_temp_bcp47_meta_in_json', '_temp_hxl_meta_in_json']: caput, data = hxltm_carricato_brevibus( - _infile, _stdin, punctum_separato="\t") + _infile, _stdin, punctum_separato=fontem_separato) + + if pyargs.objectivum_formato == '_temp_hxl_meta_in_json': + caput_novo = [] + for _item in caput: + # print('hxl item > ', _item) + _hxl = HXLHashtagSimplici(_item).praeparatio() + _item_bcp47 = _hxl.quod_bcp47(strictum=False) + # print('_item_bcp47 > ', _item_bcp47) + caput_novo.append(_item_bcp47) + caput = caput_novo + # print('caput', caput) + + rdf_sine_spatia_nominalibus = pyargs.rdf_sine_spatia_nominalibus + if not rdf_sine_spatia_nominalibus: + rdf_sine_spatia_nominalibus = [] + rdf_sine_spatia_nominalibus.append('devnull') meta = bcp47_rdf_extension_poc( caput, data, objective_bag=pyargs.rdf_bag, - rdf_sine_spatia_nominalibus=pyargs.rdf_sine_spatia_nominalibus, + rdf_sine_spatia_nominalibus=rdf_sine_spatia_nominalibus, est_meta=True) print(json.dumps( meta, sort_keys=False, ensure_ascii=False, cls=SetEncoder)) @@ -397,7 +440,7 @@ def execute_cli(self, pyargs, stdin=STDIN, stdout=sys.stdout, # @TODO remove thsi temporary part if pyargs.objectivum_formato == '_temp_bcp47': caput, data = hxltm_carricato( - _infile, _stdin, punctum_separato="\t") + _infile, _stdin, punctum_separato=fontem_separato) # print(caput, data) # print('') meta = bcp47_rdf_extension_poc( @@ -420,6 +463,7 @@ def execute_cli(self, pyargs, stdin=STDIN, stdout=sys.stdout, return self.EXIT_OK if pyargs.objectivum_formato == '_temp_header_bcp47_to_hxl': + # delimiter = "\t" delimiter = "\t" hxl_base = '#item+rem' if _stdin is True: @@ -434,6 +478,7 @@ def execute_cli(self, pyargs, stdin=STDIN, stdout=sys.stdout, caput_novo = [] errors = [] + # TODO: rework this funcion for item in caput: if item in BCP47_AD_HXL: # print(BCP47_AD_HXL[item]) diff --git a/officina/999999999/0/L999999999_0.py b/officina/999999999/0/L999999999_0.py index fb6eeef..dc306fb 100644 --- a/officina/999999999/0/L999999999_0.py +++ b/officina/999999999/0/L999999999_0.py @@ -522,6 +522,7 @@ def _expand_hxl_ad_rdf(): # 'p': 'http://www.wikidata.org/prop/', 'wdt': 'http://www.wikidata.org/prop/direct/', 'wdv': 'http://www.wikidata.org/value/', + 'p': 'http://www.wikidata.org/prop/', } # For "Base OWL" of Wikidata, download link: http://wikiba.se/ontology @@ -1817,6 +1818,8 @@ def _aux_recalc_containers(result: dict) -> dict: trivium_aliis[index_ex_tabula] = set() trivium_aliis[int(index_ex_tabula)].add(aliud) + # print(trivium_aliis, result['rdfs:Container']) + # Second pass _trivium_aliis = [] for trivium_alii, _item in result['rdfs:Container'].items(): @@ -1824,6 +1827,10 @@ def _aux_recalc_containers(result: dict) -> dict: _trivium_indici = _item['trivium']['index'] _cum_aliis = [] + if _trivium_indici == -1: + # Item is referenced by others, but does not explicitly exist + continue + for _item in trivium_aliis[_trivium_indici]: _cum_aliis.extend( result['rdfs:Container'][_item]['indices_columnis']) @@ -2184,6 +2191,10 @@ def bcp47_rdf_extension_poc( for caput_originali_asa in result['caput_asa']['caput_originali_asa']: # print(caput_originali_asa) # print(caput_originali_asa['extension']['r']['xsl:transform']) + + if 'r' not in caput_originali_asa['extension']: + continue + xsl_items = caput_originali_asa['extension']['r']['xsl:transform'] if not xsl_items or len(xsl_items) == 0: continue @@ -3947,6 +3958,38 @@ def quod_attributa(self, praefixa: str) -> list: return resultatum + def quod_bcp47(self, caput_contextui: List[str] = None, + strictum=True) -> str: + + if self.hashtag in BCP47_EX_HXL: + # Already '#item+conceptum+codicem'/'#item+conceptum+numerordinatio' + return BCP47_EX_HXL[self.hashtag]['bcp47'] + + if self.hashtag in HXL_HASH_ET_ATTRIBUTA_AD_RDF: + return HXL_HASH_ET_ATTRIBUTA_AD_RDF[self.hashtag]['__no1bpc47__'] + + + hxl_base = '#item+rem' + numerordinatio = self.quod_numerordinatio(caput_contextui) + item_meta = hxl_hashtag_to_bcp47(numerordinatio) + + if len(item_meta['_error']) == 0 and \ + item_meta['Language-Tag_normalized']: + return item_meta['Language-Tag_normalized'] + # print('item_meta ', item_meta) + # bcp47 = '{0}{1}'.format( + # hxl_base, + # item_meta['_callbacks']['hxl_attrs'] + # ) + return bcp47 + else: + # print('item_meta ', item_meta) + if strictum: + raise SyntaxError('{0} <{1}> <{2}>'.format( + self.hashtag, numerordinatio, item_meta) + ) + return 'qcc-Zxxx-r-aDEVNULL-abnop-anop-x-error' + def quod_numerordinatio(self, caput_contextui: List[str] = None): if self.hashtag in BCP47_EX_HXL: # Already '#item+conceptum+codicem'/'#item+conceptum+numerordinatio' @@ -4211,7 +4254,10 @@ def hxl_hashtag_to_bcp47( )) elif item.startswith('p_'): - _item_parts = item.replace('p_', '').replace('_', ':') + # print('item', item) + # _item_parts = item.replace('p_', '').replace('_', ':') + # _item_parts = item.lstrip('p_').replace('_', ':') + _item_parts = item[2:].replace('_', ':') _item_parts = _item_parts + ':NOP' result['extension']['r']['rdf:predicate'].append(_item_parts) _index_p = result['extension']['r']['rdf:predicate'].index( @@ -4219,6 +4265,7 @@ def hxl_hashtag_to_bcp47( ) # _subject_nop = 'NOP' reserved for potential future use + # print('_item_parts', _item_parts) _predicate_ns, _predicate_item, _subject, _subject_nop = \ _item_parts.split(':')