diff --git a/.gitignore b/.gitignore index a14506e7..9d43e5ff 100644 --- a/.gitignore +++ b/.gitignore @@ -50,6 +50,8 @@ examples/flatten/rollup/direct/actual examples/flatten/rollup/direct/actual.* examples/flatten/rollup/file/actual examples/flatten/rollup/file/actual.* +examples/flatten/line-terminator/actual +examples/flatten/line-terminator/actual.* examples/receipt/source-map/actual examples/receipt/source-map/actual.* examples/bods/unflatten/actual diff --git a/CHANGELOG.md b/CHANGELOG.md index 3baa5c52..e8c0d95c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +## [0.20.0] - 2022-12-07 + +### Changed + +- Add `--line-terminator` option to `flatten` and `create-template` + ## [0.19.0] - 2022-11-16 ### Fixed diff --git a/examples/flatten/line-terminator/cmd.txt b/examples/flatten/line-terminator/cmd.txt new file mode 100644 index 00000000..357dea1a --- /dev/null +++ b/examples/flatten/line-terminator/cmd.txt @@ -0,0 +1 @@ +$ flatten-tool flatten --line-terminator=LF --root-list-path=cafe --main-sheet-name=cafe --schema=examples/receipt/cafe.schema examples/receipt/normalised/expected.json -o examples/flatten/line-terminator/actual diff --git a/examples/flatten/line-terminator/expected/cafe.csv b/examples/flatten/line-terminator/expected/cafe.csv new file mode 100644 index 00000000..038cb8da --- /dev/null +++ b/examples/flatten/line-terminator/expected/cafe.csv @@ -0,0 +1,3 @@ +id,name,address +CAFE-HEALTH,Healthy Cafe, +CAFE-VEG,Vegetarian Cafe, diff --git a/examples/flatten/line-terminator/expected/tab_dish.csv b/examples/flatten/line-terminator/expected/tab_dish.csv new file mode 100644 index 00000000..d821e7a1 --- /dev/null +++ b/examples/flatten/line-terminator/expected/tab_dish.csv @@ -0,0 +1,5 @@ +id,table/0/id,table/0/dish/0/id,table/0/dish/0/name,table/0/dish/0/cost +CAFE-HEALTH,TABLE-1,,Fish and Chips,9.95 +CAFE-HEALTH,TABLE-1,,Pesto Pasta Salad,6.95 +CAFE-HEALTH,TABLE-3,,Fish and Chips,9.95 +CAFE-VEG,TABLE-16,,Large Glass Sauvignon,5.95 diff --git a/examples/flatten/line-terminator/expected/table.csv b/examples/flatten/line-terminator/expected/table.csv new file mode 100644 index 00000000..faf31060 --- /dev/null +++ b/examples/flatten/line-terminator/expected/table.csv @@ -0,0 +1,6 @@ +id,table/0/id,table/0/number +CAFE-HEALTH,TABLE-1,1 +CAFE-HEALTH,TABLE-2,2 +CAFE-HEALTH,TABLE-3,3 +CAFE-VEG,TABLE-16,16 +CAFE-VEG,TABLE-17,17 diff --git a/examples/help/create-template/expected.txt b/examples/help/create-template/expected.txt index 2becd7aa..d050a743 100644 --- a/examples/help/create-template/expected.txt +++ b/examples/help/create-template/expected.txt @@ -4,6 +4,7 @@ usage: flatten-tool create-template [-h] -s SCHEMA [-f {csv,ods,xlsx,all}] [--disable-local-refs] [--no-deprecated-fields] [--truncation-length TRUNCATION_LENGTH] + [--line-terminator LINE_TERMINATOR] optional arguments: -h, --help show this help message and exit @@ -32,3 +33,6 @@ optional arguments: --truncation-length TRUNCATION_LENGTH The length of components of sub-sheet names (default 3). + --line-terminator LINE_TERMINATOR + The line terminator to use when writing CSV files: + CRLF or LF diff --git a/examples/help/flatten/expected.txt b/examples/help/flatten/expected.txt index 84a2435f..b618f561 100644 --- a/examples/help/flatten/expected.txt +++ b/examples/help/flatten/expected.txt @@ -9,6 +9,7 @@ usage: flatten-tool flatten [-h] [-s SCHEMA] [-f {csv,ods,xlsx,all}] [--xml] [--preserve-fields PRESERVE_FIELDS] [--disable-local-refs] [--remove-empty-schema-columns] + [--line-terminator LINE_TERMINATOR] input_name positional arguments: @@ -61,3 +62,6 @@ optional arguments: --remove-empty-schema-columns When using flatten with a schema, remove columns and sheets from the output that contain no data. + --line-terminator LINE_TERMINATOR + The line terminator to use when writing CSV files: + CRLF or LF diff --git a/flattentool/__init__.py b/flattentool/__init__.py index 55e526a7..fc034217 100644 --- a/flattentool/__init__.py +++ b/flattentool/__init__.py @@ -8,7 +8,7 @@ from flattentool.json_input import JSONParser from flattentool.lib import parse_sheet_configuration from flattentool.output import FORMATS as OUTPUT_FORMATS -from flattentool.output import FORMATS_SUFFIX +from flattentool.output import FORMATS_SUFFIX, LINE_TERMINATORS from flattentool.schema import SchemaParser from flattentool.xml_output import toxml @@ -24,7 +24,8 @@ def create_template( disable_local_refs=False, truncation_length=3, no_deprecated_fields=False, - **_ + line_terminator="CRLF", + **_, ): """ Creates template file(s) from given inputs @@ -33,6 +34,9 @@ def create_template( """ + if line_terminator not in LINE_TERMINATORS.keys(): + raise Exception(f"{line_terminator} is not a valid line terminator") + parser = SchemaParser( schema_filename=schema, rollup=rollup, @@ -46,7 +50,10 @@ def create_template( def spreadsheet_output(spreadsheet_output_class, name): spreadsheet_output = spreadsheet_output_class( - parser=parser, main_sheet_name=main_sheet_name, output_name=name + parser=parser, + main_sheet_name=main_sheet_name, + output_name=name, + line_terminator=LINE_TERMINATORS[line_terminator], ) spreadsheet_output.write_sheets() @@ -87,7 +94,8 @@ def flatten( disable_local_refs=False, remove_empty_schema_columns=False, truncation_length=3, - **_ + line_terminator="CRLF", + **_, ): """ Flatten a nested structure (JSON) to a flat structure (spreadsheet - csv or xlsx). @@ -99,6 +107,9 @@ def flatten( ): raise Exception("You must use filter_field and filter_value together") + if line_terminator not in LINE_TERMINATORS.keys(): + raise Exception(f"{line_terminator} is not a valid line terminator") + if schema: schema_parser = SchemaParser( schema_filename=schema, @@ -136,6 +147,7 @@ def spreadsheet_output(spreadsheet_output_class, name): main_sheet_name=main_sheet_name, output_name=name, sheet_prefix=sheet_prefix, + line_terminator=LINE_TERMINATORS[line_terminator], ) spreadsheet_output.write_sheets() @@ -206,7 +218,7 @@ def unflatten( disable_local_refs=False, xml_comment=None, truncation_length=3, - **_ + **_, ): """ Unflatten a flat structure (spreadsheet - csv or xlsx) into a nested structure (JSON). diff --git a/flattentool/cli.py b/flattentool/cli.py index b3b52f80..b732de61 100644 --- a/flattentool/cli.py +++ b/flattentool/cli.py @@ -96,6 +96,10 @@ def create_parser(): default=3, help="The length of components of sub-sheet names (default 3).", ) + parser_create_template.add_argument( + "--line-terminator", + help="The line terminator to use when writing CSV files: CRLF or LF", + ) parser_flatten = subparsers.add_parser("flatten", help="Flatten a JSON file") parser_flatten.add_argument("input_name", help="Name of the input JSON file.") @@ -177,7 +181,10 @@ def create_parser(): action="store_true", help="When using flatten with a schema, remove columns and sheets from the output that contain no data.", ) - + parser_flatten.add_argument( + "--line-terminator", + help="The line terminator to use when writing CSV files: CRLF or LF", + ) parser_unflatten = subparsers.add_parser( "unflatten", help="Unflatten a spreadsheet" ) diff --git a/flattentool/output.py b/flattentool/output.py index 3ba6561c..8b1298f1 100644 --- a/flattentool/output.py +++ b/flattentool/output.py @@ -22,12 +22,18 @@ class SpreadsheetOutput(object): # output_name is given a default here, partly to help with tests, # but should have been defined by the time we get here. def __init__( - self, parser, main_sheet_name="main", output_name="unflattened", sheet_prefix="" + self, + parser, + main_sheet_name="main", + output_name="unflattened", + sheet_prefix="", + line_terminator="\r\n", ): self.parser = parser self.main_sheet_name = main_sheet_name self.output_name = output_name self.sheet_prefix = sheet_prefix + self.line_terminator = line_terminator def open(self): pass @@ -94,7 +100,9 @@ def write_sheet(self, sheet_name, sheet): newline="", encoding="utf-8", ) as csv_file: - dictwriter = csv.DictWriter(csv_file, sheet_header) + dictwriter = csv.DictWriter( + csv_file, sheet_header, lineterminator=self.line_terminator + ) dictwriter.writeheader() for sheet_line in sheet.lines: dictwriter.writerow(sheet_line) @@ -163,3 +171,5 @@ def close(self): "ods": ".ods", "csv": "", # This is the suffix for the directory } + +LINE_TERMINATORS = {"LF": "\n", "CRLF": "\r\n"} diff --git a/flattentool/tests/test_docs.py b/flattentool/tests/test_docs.py index e65ab552..f37dbbfa 100644 --- a/flattentool/tests/test_docs.py +++ b/flattentool/tests/test_docs.py @@ -136,7 +136,7 @@ def test_example_in_doc(root, filename): def test_expected_number_of_examples_in_docs_data(): - expected = 61 + expected = 62 # See _get_examples_in_docs_data() if sys.version_info[:2] != (3, 8): expected -= 3 diff --git a/setup.py b/setup.py index bf080755..f8196805 100644 --- a/setup.py +++ b/setup.py @@ -43,7 +43,7 @@ def run(self): setup( name="flattentool", - version="0.19.0", + version="0.20.0", author="Open Data Services", author_email="code@opendataservices.coop", packages=["flattentool"],