Skip to content

Commit

Permalink
Merge pull request #27 from open-data/cherry-pick/8ad1ac9f368acb04cdd…
Browse files Browse the repository at this point in the history
…58953d3f7cc19f7df2b40

Merge pull request ckan#208 from ckan/github-206-empty-lines
  • Loading branch information
JVickery-TBS authored May 8, 2024
2 parents e82890b + c513ee6 commit 778aeeb
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 2 deletions.
43 changes: 43 additions & 0 deletions .github/workflows/change_log.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
name: Changelog Entry
on: [pull_request]

permissions:
contents: read

jobs:
check_file:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3

- name: Check Chanelog Exists
run: |
if [[ $(ls ./changes/${{ github.event.number }}.*) ]]; then
echo -e "\n"
echo -e "\033[0;36mINFO: changelog for PR ${{ github.event.number }} exists.\033[0;0m"
echo -e "\n"
exit 0
else
echo -e "\n"
echo -e "\033[0;31mERROR: changelog for PR ${{ github.event.number }} does not exist.\033[0;0m"
echo -e "\n"
exit 1
fi
- name: Check Changelog Extension
run: |
fullfile=$(ls ./changes/${{ github.event.number }}.*)
filename=$(basename -- "$fullfile")
extension="${filename##*.}"
allowed_types='[ "fix", "bugfix", "hotfix", "feature", "misc", "changes", "migration", "removal" ]'
if [[ $allowed_types =~ "\"$extension\"" ]]; then
echo -e "\n"
echo -e "\033[0;36mINFO: extension ${extension} accepted.\033[0;0m"
echo -e "\n"
exit 0
else
echo -e "\n"
echo -e "\033[0;31mERROR: changelog file ending in ${extension} not supported.\033[0;0m"
echo -e "\n"
exit 1
fi
1 change: 1 addition & 0 deletions changes/27.backport.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Ignore empty rows when loading into the DataStore.
10 changes: 8 additions & 2 deletions ckanext/xloader/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', dialect=None, encod
# Get the list of rows to skip. The rows in the tabulator stream are
# numbered starting with 1.
skip_rows = list(range(1, header_offset + 1))
skip_rows.append({'type': 'preset', 'value': 'blank'})

# Get the delimiter used in the file
delimiter = stream.dialect.get('delimiter')
Expand Down Expand Up @@ -426,17 +427,21 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', dialect=None, e
try:
file_format = os.path.splitext(table_filepath)[1].strip('.')
with UnknownEncodingStream(table_filepath, file_format, decoding_result,
post_parse=[TypeConverter().convert_types], dialect=dialect,
dialect=dialect,
force_encoding=bool(encoding),
skip_rows=[{'type': 'preset', 'value': 'blank'}],
post_parse=[TypeConverter().convert_types],
logger=(logger if not has_logged_dialect else None)) as stream:
header_offset, headers = headers_guess(stream.sample)
has_logged_dialect = True
except TabulatorException:
try:
file_format = mimetype.lower().split('/')[-1]
with UnknownEncodingStream(table_filepath, file_format, decoding_result,
post_parse=[TypeConverter().convert_types], dialect=dialect,
dialect=dialect,
force_encoding=bool(encoding),
skip_rows=[{'type': 'preset', 'value': 'blank'}],
post_parse=[TypeConverter().convert_types],
logger=(logger if not has_logged_dialect else None)) as stream:
header_offset, headers = headers_guess(stream.sample)
has_logged_dialect = True
Expand All @@ -459,6 +464,7 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', dialect=None, e
# Get the list of rows to skip. The rows in the tabulator stream are
# numbered starting with 1. We also want to skip the header row.
skip_rows = list(range(1, header_offset + 2))
skip_rows.append({'type': 'preset', 'value': 'blank'})

TYPES, TYPE_MAPPING = get_types()
# (canada fork only): add config option for strict guessing
Expand Down
10 changes: 10 additions & 0 deletions ckanext/xloader/tests/samples/sample_with_empty_lines.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
date,temperature,place
2011-01-01,1,Galway
2011-01-02,-1,Galway
2011-01-03,0,Galway
2011-01-01,6,Berkeley

,,Berkeley
2011-01-03,5,


12 changes: 12 additions & 0 deletions ckanext/xloader/tests/test_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,18 @@ def test_with_blanks(self, Session):
)
assert len(self._get_records(Session, resource_id)) == 3

def test_with_empty_lines(self, Session):
csv_filepath = get_sample_filepath("sample_with_empty_lines.csv")
resource = factories.Resource()
resource_id = resource['id']
loader.load_csv(
csv_filepath,
resource_id=resource_id,
mimetype="text/csv",
logger=logger,
)
assert len(self._get_records(Session, resource_id)) == 6

def test_with_quoted_commas(self, Session):
csv_filepath = get_sample_filepath("sample_with_quoted_commas.csv")
resource = factories.Resource()
Expand Down

0 comments on commit 778aeeb

Please sign in to comment.