From d5078cdbfdcbc97de316e757f6e886f4215f1a5d Mon Sep 17 00:00:00 2001 From: ThrawnCA Date: Fri, 15 Mar 2024 11:02:59 +1000 Subject: [PATCH 1/7] Merge pull request #208 from ckan/github-206-empty-lines Skip empty lines instead of erroring # Conflicts: # ckanext/xloader/loader.py ### RESOLVED. --- ckanext/xloader/loader.py | 10 ++++++++-- .../tests/samples/sample_with_empty_lines.csv | 10 ++++++++++ ckanext/xloader/tests/test_loader.py | 12 ++++++++++++ 3 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 ckanext/xloader/tests/samples/sample_with_empty_lines.csv diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py index b7a38472..9c7a91b8 100644 --- a/ckanext/xloader/loader.py +++ b/ckanext/xloader/loader.py @@ -194,6 +194,7 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', dialect=None, encod # Get the list of rows to skip. The rows in the tabulator stream are # numbered starting with 1. skip_rows = list(range(1, header_offset + 1)) + skip_rows.append({'type': 'preset', 'value': 'blank'}) # Get the delimiter used in the file delimiter = stream.dialect.get('delimiter') @@ -426,8 +427,10 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', dialect=None, e try: file_format = os.path.splitext(table_filepath)[1].strip('.') with UnknownEncodingStream(table_filepath, file_format, decoding_result, - post_parse=[TypeConverter().convert_types], dialect=dialect, + dialect=dialect, force_encoding=bool(encoding), + skip_rows=[{'type': 'preset', 'value': 'blank'}], + post_parse=[TypeConverter().convert_types], logger=(logger if not has_logged_dialect else None)) as stream: header_offset, headers = headers_guess(stream.sample) has_logged_dialect = True @@ -435,8 +438,10 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', dialect=None, e try: file_format = mimetype.lower().split('/')[-1] with UnknownEncodingStream(table_filepath, file_format, decoding_result, - post_parse=[TypeConverter().convert_types], dialect=dialect, + dialect=dialect, force_encoding=bool(encoding), + skip_rows=[{'type': 'preset', 'value': 'blank'}], + post_parse=[TypeConverter().convert_types], logger=(logger if not has_logged_dialect else None)) as stream: header_offset, headers = headers_guess(stream.sample) has_logged_dialect = True @@ -459,6 +464,7 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', dialect=None, e # Get the list of rows to skip. The rows in the tabulator stream are # numbered starting with 1. We also want to skip the header row. skip_rows = list(range(1, header_offset + 2)) + skip_rows.append({'type': 'preset', 'value': 'blank'}) TYPES, TYPE_MAPPING = get_types() # (canada fork only): add config option for strict guessing diff --git a/ckanext/xloader/tests/samples/sample_with_empty_lines.csv b/ckanext/xloader/tests/samples/sample_with_empty_lines.csv new file mode 100644 index 00000000..abc8a0dc --- /dev/null +++ b/ckanext/xloader/tests/samples/sample_with_empty_lines.csv @@ -0,0 +1,10 @@ +date,temperature,place +2011-01-01,1,Galway +2011-01-02,-1,Galway +2011-01-03,0,Galway +2011-01-01,6,Berkeley + +,,Berkeley +2011-01-03,5, + + diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py index e024b315..2bc686cb 100644 --- a/ckanext/xloader/tests/test_loader.py +++ b/ckanext/xloader/tests/test_loader.py @@ -632,6 +632,18 @@ def test_with_blanks(self, Session): ) assert len(self._get_records(Session, resource_id)) == 3 + def test_with_empty_lines(self, Session): + csv_filepath = get_sample_filepath("sample_with_empty_lines.csv") + resource = factories.Resource() + resource_id = resource['id'] + loader.load_csv( + csv_filepath, + resource_id=resource_id, + mimetype="text/csv", + logger=logger, + ) + assert len(self._get_records(Session, resource_id)) == 6 + def test_with_quoted_commas(self, Session): csv_filepath = get_sample_filepath("sample_with_quoted_commas.csv") resource = factories.Resource() From 3dd059dfe1b9c8aeccd80b5a3a2c71b03ecdbf7e Mon Sep 17 00:00:00 2001 From: Jesse Vickery Date: Wed, 8 May 2024 19:29:16 +0000 Subject: [PATCH 2/7] feat(workflow): change log file; - Added change log file workflow. --- .github/workflows/change_log.yml | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 .github/workflows/change_log.yml diff --git a/.github/workflows/change_log.yml b/.github/workflows/change_log.yml new file mode 100644 index 00000000..f255187a --- /dev/null +++ b/.github/workflows/change_log.yml @@ -0,0 +1,30 @@ +name: Changelog Entry +on: [pull_request] + +permissions: + contents: read + +jobs: + check_file: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Check Chanelog Exists + run: | + ls ./changes/${{ github.event.number }}.* || exit 1 + + - name: Check Changelog Extension + run: | + fullfile=$(ls ./changes/${{ github.event.number }}.*) + filename=$(basename -- "$fullfile") + extension="${filename##*.}" + allowed_types='[ "fix", "bugfix", "hotfix", "feature", "misc", "changes", "migration", "removal" ]' + if [[ $allowed_types =~ "\"$extension\"" ]]; then + exit 0 + else + echo "\n" + echo "ERROR: changelog file ending in ${extension} not supported." + echo "\n" + exit 1 + fi From 38362dd68a18a9e434ae77a6fed608373d8fc445 Mon Sep 17 00:00:00 2001 From: Jesse Vickery Date: Wed, 8 May 2024 19:31:32 +0000 Subject: [PATCH 3/7] feat(workflow): change log file; - Added change log file workflow. --- .github/workflows/change_log.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/change_log.yml b/.github/workflows/change_log.yml index f255187a..641c878b 100644 --- a/.github/workflows/change_log.yml +++ b/.github/workflows/change_log.yml @@ -12,7 +12,13 @@ jobs: - name: Check Chanelog Exists run: | - ls ./changes/${{ github.event.number }}.* || exit 1 + ls ./changes/${{ github.event.number }}.* + if [[ $? -ne 0 ]]; then + echo "\n" + echo "ERROR: changelog for PR ${{ github.event.number }} does not exist." + echo "\n" + exit 1 + fi - name: Check Changelog Extension run: | From d92770283387b109f9f3742d8828324e2c72967d Mon Sep 17 00:00:00 2001 From: Jesse Vickery Date: Wed, 8 May 2024 19:34:26 +0000 Subject: [PATCH 4/7] feat(workflow): change log file; - Added change log file workflow. --- .github/workflows/change_log.yml | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/.github/workflows/change_log.yml b/.github/workflows/change_log.yml index 641c878b..06bb65d1 100644 --- a/.github/workflows/change_log.yml +++ b/.github/workflows/change_log.yml @@ -12,12 +12,13 @@ jobs: - name: Check Chanelog Exists run: | - ls ./changes/${{ github.event.number }}.* - if [[ $? -ne 0 ]]; then - echo "\n" - echo "ERROR: changelog for PR ${{ github.event.number }} does not exist." - echo "\n" - exit 1 + if [[ $(ls ./changes/${{ github.event.number }}.*) ]]; then + exit 0 + else + echo "\n" + echo "ERROR: changelog for PR ${{ github.event.number }} does not exist." + echo "\n" + exit 1 fi - name: Check Changelog Extension @@ -27,10 +28,10 @@ jobs: extension="${filename##*.}" allowed_types='[ "fix", "bugfix", "hotfix", "feature", "misc", "changes", "migration", "removal" ]' if [[ $allowed_types =~ "\"$extension\"" ]]; then - exit 0 + exit 0 else - echo "\n" - echo "ERROR: changelog file ending in ${extension} not supported." - echo "\n" - exit 1 + echo "\n" + echo "ERROR: changelog file ending in ${extension} not supported." + echo "\n" + exit 1 fi From eba50beb1d582e7f0d97bcdd97cdd91237c8e833 Mon Sep 17 00:00:00 2001 From: Jesse Vickery Date: Wed, 8 May 2024 19:35:53 +0000 Subject: [PATCH 5/7] feat(workflow): change log file; - Added change log file workflow. --- .github/workflows/change_log.yml | 8 ++++---- changes/27.backport.feature | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) create mode 100644 changes/27.backport.feature diff --git a/.github/workflows/change_log.yml b/.github/workflows/change_log.yml index 06bb65d1..a7f17212 100644 --- a/.github/workflows/change_log.yml +++ b/.github/workflows/change_log.yml @@ -15,9 +15,9 @@ jobs: if [[ $(ls ./changes/${{ github.event.number }}.*) ]]; then exit 0 else - echo "\n" + echo -e "\n" echo "ERROR: changelog for PR ${{ github.event.number }} does not exist." - echo "\n" + echo -e "\n" exit 1 fi @@ -30,8 +30,8 @@ jobs: if [[ $allowed_types =~ "\"$extension\"" ]]; then exit 0 else - echo "\n" + echo -e "\n" echo "ERROR: changelog file ending in ${extension} not supported." - echo "\n" + echo -e "\n" exit 1 fi diff --git a/changes/27.backport.feature b/changes/27.backport.feature new file mode 100644 index 00000000..e77b2ac2 --- /dev/null +++ b/changes/27.backport.feature @@ -0,0 +1 @@ +Ignore empty rows when loading into the DataStore. From 44657a0b42749a85e22688a7fa8d02efcdcb907e Mon Sep 17 00:00:00 2001 From: Jesse Vickery Date: Wed, 8 May 2024 19:38:31 +0000 Subject: [PATCH 6/7] feat(workflow): change log file; - Added change log file workflow. --- .github/workflows/change_log.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/change_log.yml b/.github/workflows/change_log.yml index a7f17212..851a6520 100644 --- a/.github/workflows/change_log.yml +++ b/.github/workflows/change_log.yml @@ -13,10 +13,13 @@ jobs: - name: Check Chanelog Exists run: | if [[ $(ls ./changes/${{ github.event.number }}.*) ]]; then + echo -e "\n" + echo "\033[0;36mINFO: changelog for PR ${{ github.event.number }} exists.\033[0;0m" + echo -e "\n" exit 0 else echo -e "\n" - echo "ERROR: changelog for PR ${{ github.event.number }} does not exist." + echo "\033[0;31mERROR: changelog for PR ${{ github.event.number }} does not exist.\033[0;0m" echo -e "\n" exit 1 fi @@ -28,10 +31,13 @@ jobs: extension="${filename##*.}" allowed_types='[ "fix", "bugfix", "hotfix", "feature", "misc", "changes", "migration", "removal" ]' if [[ $allowed_types =~ "\"$extension\"" ]]; then + echo -e "\n" + echo "\033[0;36mINFO: extension ${extension} accepted.\033[0;0m" + echo -e "\n" exit 0 else echo -e "\n" - echo "ERROR: changelog file ending in ${extension} not supported." + echo "\033[0;31mERROR: changelog file ending in ${extension} not supported.\033[0;0m" echo -e "\n" exit 1 fi From c513ee6f112a3d486ce3f57c8b775a1cbd1b00b7 Mon Sep 17 00:00:00 2001 From: Jesse Vickery Date: Wed, 8 May 2024 19:39:30 +0000 Subject: [PATCH 7/7] feat(workflow): change log file; - Added change log file workflow. --- .github/workflows/change_log.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/change_log.yml b/.github/workflows/change_log.yml index 851a6520..1700c9f2 100644 --- a/.github/workflows/change_log.yml +++ b/.github/workflows/change_log.yml @@ -14,12 +14,12 @@ jobs: run: | if [[ $(ls ./changes/${{ github.event.number }}.*) ]]; then echo -e "\n" - echo "\033[0;36mINFO: changelog for PR ${{ github.event.number }} exists.\033[0;0m" + echo -e "\033[0;36mINFO: changelog for PR ${{ github.event.number }} exists.\033[0;0m" echo -e "\n" exit 0 else echo -e "\n" - echo "\033[0;31mERROR: changelog for PR ${{ github.event.number }} does not exist.\033[0;0m" + echo -e "\033[0;31mERROR: changelog for PR ${{ github.event.number }} does not exist.\033[0;0m" echo -e "\n" exit 1 fi @@ -32,12 +32,12 @@ jobs: allowed_types='[ "fix", "bugfix", "hotfix", "feature", "misc", "changes", "migration", "removal" ]' if [[ $allowed_types =~ "\"$extension\"" ]]; then echo -e "\n" - echo "\033[0;36mINFO: extension ${extension} accepted.\033[0;0m" + echo -e "\033[0;36mINFO: extension ${extension} accepted.\033[0;0m" echo -e "\n" exit 0 else echo -e "\n" - echo "\033[0;31mERROR: changelog file ending in ${extension} not supported.\033[0;0m" + echo -e "\033[0;31mERROR: changelog file ending in ${extension} not supported.\033[0;0m" echo -e "\n" exit 1 fi