From 2700771d13385dbd27807c691c318c9b25f03410 Mon Sep 17 00:00:00 2001 From: Craig Astill Date: Tue, 16 May 2023 16:01:12 +0100 Subject: [PATCH] Fixed up the `skip_initial` bad `.xlsx` failing test after changes. Fixed up the expectation in the previously failing `skip_intiial` testcase, now that Production code has been changed to skip over the rows without parsing. Issue: #52. --- tap_spreadsheets_anywhere/test/test_format.py | 53 +++++++++---------- 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/tap_spreadsheets_anywhere/test/test_format.py b/tap_spreadsheets_anywhere/test/test_format.py index ed16241..6bc3311 100644 --- a/tap_spreadsheets_anywhere/test/test_format.py +++ b/tap_spreadsheets_anywhere/test/test_format.py @@ -2,6 +2,7 @@ import json import logging import unittest +from datetime import datetime from pathlib import Path from unittest.mock import patch @@ -239,35 +240,31 @@ def test_bad_blank_line_above_headings_raises(self): continue def test_bad_blank_line_above_headings_skip_initial_over_bad_row(self): - """Test to verify a sample file that raises #52. - Iteratting through this bad sample file will currently fail - when parsing the blank line during sampling time, even when - supplied with the: `skip_interval` argument with a row - beyond the bad row. + """Test to verify a sample file that raises #52, does not fail when + using: `skip_interval`, to avoid the bad row. """ - exp = [ - "Date", - "Contact", - "Description", - "Invoice Number", - "Reference", - "Debit (GBP)", - "Credit (GBP)", - "Gross (GBP)", - "Net (GBP)", - "VAT (GBP)", - "Account Code", - "Account", - "Account Type", - "Revenue Type", - ] - table_spec = {"format": "excel", "skip_initial": 5} - # NOTE: `get_row_iterator` fails with Issue #52. This is because the - # current code parses each line against the header generated from the - # first row of the file. With the above bad file, this throws an - # `IndexError` during the sampling discovery phase. Skipping rows - # should be done in: `excel_handler.generator_wrapper`, before the rows - # are parsed. + # NOTE: that `get_row_iterator` will compress the header row and each + # subsequent data row together, so count one less row than in the file + # + expect a dict. + exp = { + 'account': 'Sales - Commission Fees', + 'account_code': 9999.0, + 'account_type': 'Revenue', + 'contact': 'Company A Limited', + 'credit_gbp': 123.45, + 'date': datetime(2023, 1, 31, 0, 0), + 'debit_gbp': 0.0, + 'description': 'Description for Company A', + 'gross_gbp': 123.45, + 'invoice_number': 'INV-1234', + 'net_gbp': 1234.45, + 'reference': 'REF-1234', + 'revenue_type': 'Commission Fees', + 'vat_gbp': 0.0, + } + table_spec = {"format": "excel", "skip_initial": 4} + # NOTE: `get_row_iterator` should no longer fail with Issue #52, now + # that: `excel_handler.generator_wrapper` is not parsing skipped rows. iterator = get_row_iterator(table_spec, self.uri) # Assert that the expected row, after skipping, is next. assert next(iterator) == exp