Skip to content

Commit

Permalink
rdf+bcp47+hxl (#41): current refactoring done; starting metadata insp…
Browse files Browse the repository at this point in the history
…ection features
  • Loading branch information
fititnt committed Jun 6, 2022
1 parent 8fd5296 commit 39bbaa8
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 8 deletions.
20 changes: 19 additions & 1 deletion officina/999999999/0/999999999_54872.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
bcp47_rdf_extension_poc,
hxltm_carricato,
HXLTMAdRDFSimplicis,
hxltm_carricato_brevibus,
rdf_namespaces_extras
)

Expand Down Expand Up @@ -98,6 +99,13 @@
Temporary tests . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
(Debug information in JSON)
{0} --objectivum-formato=_temp_bcp47_meta_in_json \
--rdf-namespaces-archivo=\
999999999/1568346/data/hxlstandard-rdf-namespaces-example.hxl.csv \
999999999/1568346/data/unesco-thesaurus.bcp47g.tsv
(Data operations)
{0} --objectivum-formato=_temp_bcp47 --rdf-namespaces-archivo=\
999999999/1568346/data/hxlstandard-rdf-namespaces-example.hxl.csv \
999999999/1568346/data/unesco-thesaurus.bcp47g.tsv
Expand Down Expand Up @@ -237,6 +245,7 @@ def make_args(self, hxl_output=True):
# # - Uses '.ndjson' as extension
# 'application/x-ndjson',
'_temp_bcp47',
'_temp_bcp47_meta_in_json',
],
# required=True
default='application/x-turtle'
Expand Down Expand Up @@ -280,7 +289,6 @@ def make_args(self, hxl_output=True):
default=None
)


# praefīxum , n, s, nominativus,
# https://en.wiktionary.org/wiki/praefixus#Latin
# cōnfigūrātiōnī, f, s, dativus,
Expand Down Expand Up @@ -332,6 +340,16 @@ def execute_cli(self, pyargs, stdin=STDIN, stdout=sys.stdout,
# print(RDF_NAMESPACES_EXTRAS)
# pass

# @TODO maybe refactor this temporary part
if pyargs.objectivum_formato == '_temp_bcp47_meta_in_json':
caput, data = hxltm_carricato_brevibus(
_infile, _stdin, punctum_separato="\t")

meta = bcp47_rdf_extension_poc(
caput, data, objective_bag=pyargs.rdf_bag, est_meta=True)
print(json.dumps(meta, sort_keys=False, ensure_ascii=False))
return self.EXIT_OK

# @TODO remove thsi temporary part
if pyargs.objectivum_formato == '_temp_bcp47':
caput, data = hxltm_carricato(
Expand Down
82 changes: 76 additions & 6 deletions officina/999999999/0/L999999999_0.py
Original file line number Diff line number Diff line change
Expand Up @@ -1356,6 +1356,10 @@ def bcp47_rdf_extension_relationship(
for subject in item_meta['extension']['r']['rdf:subject']:
# is_pivot_key = False
subject_key, subject_value = subject.split(':')
_temp1, _temp2 = subject.split('||')
subject_key = _temp1
subject_value = _temp2.replace(':NOP', '')
# raise ValueError(subject)
# if subject.startswith('∀'):
# is_pivot_key = True
# subject = subject.replace('∀', '')
Expand Down Expand Up @@ -1434,6 +1438,7 @@ def bcp47_rdf_extension_poc(
objective_bag: str = '1',
_auxiliary_bags: List[str] = None,
namespaces: List[dict] = None,
est_meta: bool = False,
strictum: bool = True
) -> dict:
"""bcp47_rdf_extension_poc _summary_
Expand Down Expand Up @@ -1472,7 +1477,6 @@ def bcp47_rdf_extension_poc(
result = {
'header': header,
'header_result': [],
'data': data,
# 'rdf:subject': None,
# 'rdf:predicate': [],
# 'rdf:object': None,
Expand All @@ -1481,6 +1485,7 @@ def bcp47_rdf_extension_poc(
'triples': [],
# We always start with default prefixes
'prefixes': RDF_NAMESPACES,
'data': data,
'_error': [],
}
# return {}
Expand Down Expand Up @@ -1568,10 +1573,13 @@ def _helper_aux(
# len(bag_meta['prefix']) > 0:
# value_prefixes = bag_meta['prefix']

for predicate in bag_meta['rdf:predicate']:
for predicate_and_subject in bag_meta['rdf:predicate']:
if not object_literal:
continue

_temp1, _temp2 = predicate_and_subject.split('||')
predicate = _temp1

if value_separator is not None and \
object_literal.find(value_separator) > -1 and \
object_literal.find('\\' + value_separator) == -1:
Expand Down Expand Up @@ -1641,10 +1649,9 @@ def _helper_aux(
if len(aux_triples) > 0:
result['triples'].extend(aux_triples)

# raise ValueError(meta)

# result['prefixes'] = RDF_NAMESPACES
# result['prefixes'] = meta['prefixes']
if est_meta:
# return bag_meta
return result

return result
# return result['triples']
Expand Down Expand Up @@ -3863,6 +3870,69 @@ def hxltm_carricato(
return caput, data


def hxltm_carricato_brevibus(
archivum_trivio: str = None,
est_stdin: bool = False,
punctum_separato: str = ",",
data_lineis: int = 3,
est_hxl: bool = False
) -> list:
"""hxltm_carricato_brevibus read only header and part of the data
Note: this helper is not as efficent as read line by line. But some
operations already require such task.
Trivia:
- carricātō, n, s, dativus, https://en.wiktionary.org/wiki/carricatus#Latin
- verbum: https://en.wiktionary.org/wiki/carricatus#Latin
- capitī, s, n, https://en.wiktionary.org/wiki/caput#Latin
- brevibus, pl, m/f/n, https://en.wiktionary.org/wiki/brevis#Latin
Args:
archivum_trivio (str, optional): Path to file. Defaults to None.
est_stdin (bool, optional): Is the file stdin?. Defaults to False.
Returns:
list: list of [caput, data], where data is array of lines
"""
caput = []

if est_stdin:
_data = []
for linea in sys.stdin:
if len(caput) == 0:
# caput = linea
# _reader_caput = csv.reader(linea)
_gambi = [linea, linea]
_reader_caput = csv.reader(_gambi, delimiter=punctum_separato)
caput = next(_reader_caput)
else:
if data_lineis <= 0:
pass
else:
data_lineis -= 1
_data.append(linea)
_reader = csv.reader(_data)
return caput, list(_reader)
# return caput
# else:
# fons = archivum_trivio
data = []
with open(archivum_trivio, 'r') as _fons:
_csv_reader = csv.reader(_fons, delimiter=punctum_separato)
for linea in _csv_reader:
if len(caput) == 0:
caput = linea
else:
if data_lineis <= 0:
break
else:
data_lineis -= 1
data.append(linea)

return caput, data


def hxltm_cum_ordinibus_ex_columnis(
caput: list, data: list, quaestio: list, data_referentibus: dict = None
) -> Tuple[list, list]:
Expand Down
2 changes: 1 addition & 1 deletion officina/999999999/1568346/bcp47-to-hxl-to-rdf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ bcp47_and_hxlrdf_roundtrip__drill() {

# echo "test"

# bcp47_to_hxl_to_rdf__tests
bcp47_to_hxl_to_rdf__tests
# test_unesco_thesaurus

bcp47_and_hxlrdf_roundtrip__drill

0 comments on commit 39bbaa8

Please sign in to comment.