From 713814be4073feb3f2aa2bc712c7f97fba3d5a18 Mon Sep 17 00:00:00 2001 From: Richard Marston Date: Tue, 14 Dec 2021 10:59:54 +0100 Subject: [PATCH] Convert pdfs to pngs Signed-off-by: Richard Marston --- Dockerfile | 2 +- process_images.sh | 10 +++++++++- xlsx2xml.py | 6 +++++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0a74585..27b54be 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ from alpine:latest run apk update -run apk add python3 py3-pip +run apk add python3 py3-pip poppler-utils run /usr/bin/pip3 install pyxb openpyxl copy . /excel2xml cmd sh /excel2xml/process_all_xlsx.sh diff --git a/process_images.sh b/process_images.sh index 03e1246..635df7c 100644 --- a/process_images.sh +++ b/process_images.sh @@ -2,9 +2,17 @@ INPUT_DIR=$1 OUTPUT_DIR=$2 -find $INPUT_DIR -type f -not -name '*.xlsx' -exec sh -c ' +find $INPUT_DIR -type f -not \( -name '*.xlsx' -o -name '*.pdf' \) -exec sh -c ' FILE="$0" OUTPUT_DIR="$1" FILE_NO_SPACES=$(basename "${FILE}" | tr '[:blank:]' '_') cp "$FILE" "$OUTPUT_DIR/$FILE_NO_SPACES" ' {} ${OUTPUT_DIR} ';' + +find $INPUT_DIR -type f -name '*.pdf' -exec sh -c ' + FILE="$0" + OUTPUT_DIR="$1" + FILE_NO_SPACES=$(basename "${FILE}" | tr '[:blank:]' '_') + pdftoppm -png -singlefile "$FILE" "$OUTPUT_DIR/$FILE_NO_SPACES" + echo "Converting $FILE to $OUTPUT_DIR/${FILE_NO_SPACES}.png" +' {} ${OUTPUT_DIR} ';' diff --git a/xlsx2xml.py b/xlsx2xml.py index da840b2..94737bb 100644 --- a/xlsx2xml.py +++ b/xlsx2xml.py @@ -160,7 +160,11 @@ def switch_uriType(argument): name = cell_str(sheet_ranges, 38, 3+index) drawingType = cell_str(sheet_ranges, 39, 3+index) uriType = cell_str(sheet_ranges, 40, 3+index) - uri = cell_str(sheet_ranges, 41, 3+index).strip().replace(' ', '_') + uri = cell_str(sheet_ranges, 41, 3+index).strip().replace(' ', '_') + + if uri.endswith('.pdf'): + uri = uri + '.png' + if name != 'None' and uri!='None' : drawing = IEC62559.Drawing() resourcestr = IEC62559.Resource_String(uri)