Skip to content

Commit

Permalink
rdf+bcp47+hxl (#41), admin-l (#39), pcodes (#2): bash bootstrap_1603_…
Browse files Browse the repository at this point in the history
…45_16__item_rdf() draft
  • Loading branch information
fititnt committed Jun 13, 2022
1 parent ff6d2e8 commit 1877474
Show file tree
Hide file tree
Showing 3 changed files with 183 additions and 24 deletions.
1 change: 1 addition & 0 deletions officina/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ node_modules
!999999/0/.gitkeep
!999999999/1568346/*
catalog-v001.xml
MDCIII.devel.owl

# *.xml
# *.tmx
Expand Down
19 changes: 18 additions & 1 deletion officina/999999999/0/999999999_54872.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ def make_args(self, hxl_output=True):
# # - Uses '.ndjson' as extension
# 'application/x-ndjson',
'_temp_bcp47',
'_temp_no1',
'_temp_bcp47_meta_in_json',
'_temp_hxl_meta_in_json',
'_temp_header_hxl_to_bcp47',
Expand Down Expand Up @@ -438,9 +439,23 @@ def execute_cli(self, pyargs, stdin=STDIN, stdout=sys.stdout,
return self.EXIT_OK

# @TODO remove thsi temporary part
if pyargs.objectivum_formato == '_temp_bcp47':
# if pyargs.objectivum_formato == '_temp_bcp47':
if pyargs.objectivum_formato in ['_temp_bcp47', '_temp_no1']:

caput, data = hxltm_carricato(
_infile, _stdin, punctum_separato=fontem_separato)

if pyargs.objectivum_formato == '_temp_no1':
caput_novo = []
for _item in caput:
# print('hxl item > ', _item)
_hxl = HXLHashtagSimplici(_item).praeparatio()
_item_bcp47 = _hxl.quod_bcp47(strictum=False)
# print('_item_bcp47 > ', _item_bcp47)
caput_novo.append(_item_bcp47)
caput = caput_novo
# print('caput', caput)

# print(caput, data)
# print('')
meta = bcp47_rdf_extension_poc(
Expand Down Expand Up @@ -755,5 +770,7 @@ def numerordinatio_progenitori(

est_cli = Cli()
args = est_cli.make_args()
# print(' >>>> args', args)
# raise ValueError(args)

est_cli.execute_cli(args)
187 changes: 164 additions & 23 deletions officina/999999999/1603_45_16.sh
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,10 @@ bootstrap_1603_45_16__all() {
continue
fi

# echo "numerordinatio_praefixo $numerordinatio_praefixo"
# bootstrap_1603_45_16__item "1603_45_16_24" "24" "AGO" "AO" "3" "1" "0"
bootstrap_1603_45_16__item "$numerordinatio_praefixo" "$unm49" "$v_iso3" "$v_iso2" "$cod_ab_level_max" "1" "0"
# bootstrap_1603_45_16__item "$numerordinatio_praefixo" "$unm49" "$v_iso3" "$v_iso2" "1" "0"
sleep 5
# bootstrap_1603_45_16__item_no1 "$numerordinatio_praefixo" "$unm49" "$v_iso3" "$v_iso2" "$cod_ab_level_max" "1" "0"
bootstrap_1603_45_16__item_rdf "$numerordinatio_praefixo" "$unm49" "$v_iso3" "$v_iso2" "$cod_ab_level_max" "1" "0"

sleep 3
done
} <"${opus_temporibus_temporarium}"

Expand Down Expand Up @@ -262,7 +261,7 @@ bootstrap_1603_45_16__apothecae() {
# Outputs:
# Convert files
#######################################
bootstrap_1603_45_16__item() {
bootstrap_1603_45_16__item_no1() {
numerordinatio_praefixo="$1"
unm49="${2}"
iso3661p1a3="${3}"
Expand Down Expand Up @@ -291,8 +290,8 @@ bootstrap_1603_45_16__item() {
echo "${FUNCNAME[0]} ... [$numerordinatio_praefixo] [$unm49] [$iso3661p1a3] [$pcode_praefixo]"

# for file_path in "${ROOTDIR}"/999999/1603/45/16/xlsx/*.xlsx; do
ISO3166p1a3_original=$(basename --suffix=.xlsx "$file_path")
ISO3166p1a3=$(echo "$ISO3166p1a3_original" | tr '[:lower:]' '[:upper:]')
# ISO3166p1a3_original=$(basename --suffix=.xlsx "$file_path")
# ISO3166p1a3=$(echo "$ISO3166p1a3_original" | tr '[:lower:]' '[:upper:]')
# UNm49=$(numerordinatio_codicem_locali__1603_45_49 "$ISO3166p1a3")

if [ ! -d "$objectivum_archivum_basi" ]; then
Expand Down Expand Up @@ -342,6 +341,140 @@ bootstrap_1603_45_16__item() {
# done
}

#######################################
# Convert the XLSXs to intermediate formats on 999999/1603/45/16 using
# 999999999_7200235.py to 1603/45/16/{cod_ab_level}/
#
# @TODO: potentially use more than one source (such as IGBE data for BRA)
# instead of direclty from OCHA
#
# Globals:
# ROOTDIR
#
# Arguments:
# numerordinatio_praefixo
# unm49
# iso3661p1a3
# pcode_praefixo
# cod_ab_level_max
# est_temporarium_fontem
# est_temporarium_objectivum
#
# Outputs:
# Convert files
#######################################
bootstrap_1603_45_16__item_rdf() {
numerordinatio_praefixo="$1"
unm49="${2}"
iso3661p1a3="${3}"
pcode_praefixo="${4}"
cod_ab_level_max="${5}"
est_temporarium_fontem="${6:-"1"}"
est_temporarium_objectivum="${7:-"0"}"

if [ "$est_temporarium_fontem" -eq "1" ]; then
_basim_fontem="${ROOTDIR}/999999"
else
_basim_fontem="${ROOTDIR}"
fi
if [ "$est_temporarium_objectivum" -eq "1" ]; then
_basim_objectivum="${ROOTDIR}/999999"
else
_basim_objectivum="${ROOTDIR}"
fi

_iso3661p1a3_lower=$(echo "$iso3661p1a3" | tr '[:upper:]' '[:lower:]')

fontem_archivum="${_basim_fontem}/1603/45/16/xlsx/${_iso3661p1a3_lower}.xlsx"
objectivum_archivum_basi="${_basim_objectivum}/1603/45/16/${unm49}"
# opus_temporibus_temporarium="${ROOTDIR}/999999/0/${unm49}~lvl.tsv"
opus_temporibus_temporarium="${ROOTDIR}/999999/0/${unm49}.ttl"

echo "${FUNCNAME[0]} ... [$numerordinatio_praefixo] [$unm49] [$iso3661p1a3] [$pcode_praefixo]"

# for file_path in "${ROOTDIR}"/999999/1603/45/16/xlsx/*.xlsx; do
# ISO3166p1a3_original=$(basename --suffix=.xlsx "$file_path")
# ISO3166p1a3=$(echo "$ISO3166p1a3_original" | tr '[:lower:]' '[:upper:]')
# UNm49=$(numerordinatio_codicem_locali__1603_45_49 "$ISO3166p1a3")

# if [ ! -d "$objectivum_archivum_basi" ]; then
# mkdir "$objectivum_archivum_basi"
# fi

# file_xlsx="${ISO3166p1a3_original}.xlsx"

echo "cod_ab_levels $cod_ab_level_max"

for ((i = 0; i <= cod_ab_level_max; i++)); do
cod_level="$i"
if [ "$_iso3661p1a3_lower" == "bra" ] && [ "$cod_level" == "2" ]; then
echo ""
echo "Skiping COD-AB-BR lvl 2"
echo ""
continue
fi

objectivum_archivum_basi_lvl="${objectivum_archivum_basi}/${cod_level}"
# objectivum_archivum_no1="${objectivum_archivum_basi_lvl}/${numerordinatio_praefixo}_${unm49}_${cod_level}.no1.tm.hxl.csv"
objectivum_archivum_no1="${objectivum_archivum_basi_lvl}/${numerordinatio_praefixo}_${unm49}_${cod_level}.no1.tm.hxl.csv"

objectivum_archivum_no1_owl_ttl="${objectivum_archivum_basi_lvl}/${numerordinatio_praefixo}_${unm49}_${cod_level}.no1.owl.ttl"

# set -x
# rm "$objectivum_archivum_no1" || true
# set +x
# continue
echo " cod-ab-$_iso3661p1a3_lower-$cod_level [$objectivum_archivum_no1] ..."
# if [ ! -d "$objectivum_archivum_basi_lvl" ]; then
# mkdir "$objectivum_archivum_basi_lvl"
# fi

# echo "TODO"

rdf_trivio=$((5000 + cod_level))

# set -x
# "${ROOTDIR}/999999999/0/999999999_54872.py" \
# --objectivum-formato=_temp_no1 \
# --rdf-trivio="${rdf_trivio}" \
# "${objectivum_archivum_no1}" |
# rapper --quiet --input=turtle --output=turtle /dev/fd/0 \
# > "${objectivum_archivum_no1_owl_ttl}"
# set +x
set -x
"${ROOTDIR}/999999999/0/999999999_54872.py" \
--objectivum-formato=_temp_no1 \
--rdf-trivio="${rdf_trivio}" \
"${objectivum_archivum_no1}" \
> "${opus_temporibus_temporarium}"

rapper --quiet --input=turtle --output=turtle \
"${opus_temporibus_temporarium}" \
>"${objectivum_archivum_no1_owl_ttl}"
set +x

echo "OWL TTL: [${objectivum_archivum_no1_owl_ttl}]"

sleep 10

# set -x
# "${ROOTDIR}/999999999/0/999999999_7200235.py" \
# --methodus=xlsx_ad_no1 \
# --numerordinatio-praefixo="$numerordinatio_praefixo" \
# --ordines="$cod_level" \
# --pcode-praefix="$pcode_praefixo" \
# --unm49="$unm49" \
# "$fontem_archivum" >"${objectivum_archivum_no1}"
# set +x

rm "$opus_temporibus_temporarium"

done

# return 0
# done
}

#######################################
# Convert the XLSXs to intermediate formats on 999999/1603/45/16
# DEPRECATED use bootstrap_999999_1603_45_16_neo
Expand Down Expand Up @@ -434,12 +567,10 @@ bootstrap_999999_1603_45_16_neo() {
objectivum_iso3661p1a3="${1:-""}"
# objectivum_unm49="${1:-""}"



echo "${FUNCNAME[0]} ... [$objectivum_iso3661p1a3]"

echo "NOTE: this entire function is deprecated." Use bootstrap_1603_45_16__item
echo " Use bootstrap_1603_45_16__item (called by bootstrap_1603_45_16__all)"
echo "NOTE: this entire function is deprecated." Use bootstrap_1603_45_16__item_no1
echo " Use bootstrap_1603_45_16__item_no1 (called by bootstrap_1603_45_16__all)"
echo " or at least call 999999999_7200235.py with correct --pcode-praefixo="
sleep 3
echo "running anyway..."
Expand Down Expand Up @@ -488,7 +619,6 @@ bootstrap_999999_1603_45_16_neo() {
--methodus=xlsx_ad_hxl \
--ordines="$cod_level" "$file_path" >"${objectivum_archivum_hxl}"


"${ROOTDIR}/999999999/0/999999999_7200235.py" \
--methodus=xlsx_ad_hxltm \
--ordines="$cod_level" "$file_path" >"${objectivum_archivum_hxltm}"
Expand Down Expand Up @@ -890,7 +1020,6 @@ __temp_preproces_quicktest_1603_16_24() {
ISO3166p1a3_original=$(basename --suffix=.xlsx "$file_path")
ISO3166p1a3=$(echo "$ISO3166p1a3_original" | tr '[:lower:]' '[:upper:]')


for ((i = 0; i <= cod_ab_level_max; i++)); do
cod_level="$i"
if [ "$_iso3661p1a3_lower" == "bra" ] && [ "$cod_level" == "2" ]; then
Expand All @@ -902,6 +1031,7 @@ __temp_preproces_quicktest_1603_16_24() {

objectivum_archivum_basi_lvl="${objectivum_archivum_basi}/${cod_level}"
objectivum_archivum_no1="${objectivum_archivum_basi_lvl}/${numerordinatio_praefixo}_${unm49}_${cod_level}.no1.tm.hxl.csv"
objectivum_archivum_no1_owl_ttl="${objectivum_archivum_basi_lvl}/${numerordinatio_praefixo}_${unm49}_${cod_level}.no1.owl.ttl"
objectivum_archivum_no1bcp47="${objectivum_archivum_basi_lvl}/${numerordinatio_praefixo}_${unm49}_${cod_level}.no1.tm.hxl.csv"

# set -x
Expand All @@ -926,6 +1056,18 @@ __temp_preproces_quicktest_1603_16_24() {

frictionless validate "${objectivum_archivum_no1}" || true

rdf_trivio=$((5000 + cod_level))

"${ROOTDIR}/999999999/0/999999999_54872.py" \
--objectivum-formato=_temp_no1 \
--punctum-separato-de-fontem=',' \
--rdf-trivio="${rdf_trivio}" \
"${objectivum_archivum_no1}" |
rapper --quiet --input=turtle --output=turtle /dev/fd/0 \
>"${objectivum_archivum_no1_owl_ttl}"

echo "OWL TTL: [${objectivum_archivum_no1_owl_ttl}]"

# set -x
# "${ROOTDIR}/999999999/0/999999999_7200235.py" \
# --methodus=xlsx_ad_no1bcp47 \
Expand Down Expand Up @@ -1026,10 +1168,10 @@ __temp_download_external_cod_data() {
# __temp_download_external_cod_data
# exit 1
# echo "all"
# bootstrap_1603_45_16__all
# bootstrap_1603_45_16__item "1603_45_16" "24" "AGO" "AO" "1" "1" "0"
# bootstrap_1603_45_16__item "1603_45_16" "24" "AGO" "AO" "3" "1" "0"
__temp_preproces_quicktest_1603_16_24
bootstrap_1603_45_16__all
# bootstrap_1603_45_16__item_no1 "1603_45_16" "24" "AGO" "AO" "1" "1" "0"
# bootstrap_1603_45_16__item_no1 "1603_45_16" "24" "AGO" "AO" "3" "1" "0"
# __temp_preproces_quicktest_1603_16_24
exit 0

# bootstrap_1603_45_16__all
Expand All @@ -1042,14 +1184,14 @@ exit 0
"${ROOTDIR}/999999999/0/999999999_7200235.py" \
--methodus='cod_ab_index_levels_ttl' \
--punctum-separato-ad-tab \
> "${ROOTDIR}/999999/1603/45/16/1603_45_16.index.skos.ttl"
>"${ROOTDIR}/999999/1603/45/16/1603_45_16.index.skos.ttl"

rapper --quiet --input=turtle --output=dot \
"${ROOTDIR}/999999/1603/45/16/1603_45_16.index.skos.ttl" \
> "${ROOTDIR}/999999/1603/45/16/1603_45_16.index.dot"
>"${ROOTDIR}/999999/1603/45/16/1603_45_16.index.dot"
rapper --quiet --input=turtle --output=dot \
"${ROOTDIR}/999999/1603/45/16/1603_45_16.index.skos.ttl" \
> "${ROOTDIR}/999999/1603/45/16/1603_45_16.index.dot"
>"${ROOTDIR}/999999/1603/45/16/1603_45_16.index.dot"

# ./999999999/0/1603_1.py --methodus='ontologia-simplici' --ontologia-radici=1603_1_7 --ontologia-ex-archivo=1603/1/7/1603_1_7.no1.tm.hxl.csv | rapper --quiet --input=turtle --output=turtle /dev/fd/0 > /home/fititnt/Downloads/test.ttl
# ./999999999/0/1603_1.py --methodus='ontologia-simplici' --ontologia-radici=1603_1_7 --ontologia-ex-archivo=1603/1/7/1603_1_7.no1.tm.hxl.csv | rapper --quiet --input=turtle --output=dot /dev/fd/0 > /home/fititnt/Downloads/test.dot
Expand All @@ -1060,7 +1202,7 @@ rapper --quiet --input=turtle --output=dot \

# dot -Tpng \
# "${ROOTDIR}/999999/1603/45/16/1603_45_16.index.dot" \
# > "${ROOTDIR}/999999/1603/45/16/1603_45_16.index.png"
# > "${ROOTDIR}/999999/1603/45/16/1603_45_16.index.png"

exit 1

Expand Down Expand Up @@ -1219,7 +1361,6 @@ set +x
# - Exemplo de população
# - wdt:P1082 "+6747815"^^xsd:decimal ;


#### TEMP / Other tests ________________________________________________________
# ./999999999/0/999999999_7200235.py --methodus=xlsx_ad_no1 --numerordinatio-praefixo=1603_45_16_24 --ordines=0 --pcode-praefix=AO --unm49=24 999999/1603/45/16/xlsx/ago.xlsx
# ./999999999/0/999999999_7200235.py --methodus=xlsx_ad_no1 --numerordinatio-praefixo=1603_45_16_24 --ordines=0 --pcode-praefix=AO --unm49=24 999999/1603/45/16/xlsx/ago.xlsx
Expand Down

0 comments on commit 1877474

Please sign in to comment.