Skip to content

Commit

Permalink
Merge pull request #10 from Valerich/1-add-import-encoding-parametriz…
Browse files Browse the repository at this point in the history
…ation

Add import encoding parametrization. Issue #1
  • Loading branch information
Melevir authored May 11, 2020
2 parents 5f2a031 + faa9940 commit b4e539e
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 6 deletions.
20 changes: 17 additions & 3 deletions import_me/parsers/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import io
from contextlib import contextmanager

from typing import Optional, Iterator, Tuple, List, Any
from typing import Optional, Iterator, Tuple, List, Any, Dict

from import_me.exceptions import StopParsing
from import_me.parsers.base import BaseParser
Expand All @@ -24,11 +24,25 @@ def header_row_offset(self) -> Optional[int]:
raise StopParsing('Invalid row index.')
return index

@property
def _open_file_params(self) -> Dict[str, Any]:
return {
key: self._params[key]
for key in ['encoding', 'buffering', 'newline', 'errors']
if key in self._params
}

@property
def _reader_params(self) -> Dict[str, Any]:
reader_params = [i for i in dir(csv.Dialect) if not i.startswith('_')]
reader_params.append('dialect')
return {key: self._params[key] for key in reader_params if key in self._params}

@contextmanager
def open_file(self) -> Iterator:
if self.file_path:
try:
file_obj = open(self.file_path, 'r')
file_obj = open(self.file_path, 'r', **self._open_file_params)
yield file_obj
finally:
file_obj.close()
Expand All @@ -41,7 +55,7 @@ def open_file(self) -> Iterator:

def iterate_file_rows(self) -> Iterator[Tuple[int, List[Any]]]:
with self.open_file() as csv_file:
reader = csv.reader(csv_file, **self._params)
reader = csv.reader(csv_file, **self._reader_params)

self.validate_headers(reader)
csv_file.seek(0)
Expand Down
10 changes: 7 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,14 @@ def _xlsx_file_factory(header=None, data=None, header_row_index=0, data_row_inde

@pytest.fixture
def csv_file_factory():
def _csv_file_factory(header=None, data=None, header_row_index=0, data_row_index=1):
def _csv_file_factory(
header=None, data=None, header_row_index=0, data_row_index=1, file_kwargs=None, writer_kwargs=None,
):
file_kwargs = file_kwargs or {}
writer_kwargs = writer_kwargs or {}
csv_file = tempfile.NamedTemporaryFile(suffix='.csv')
with open(csv_file.name, 'w') as file:
writer = csv.writer(file)
with open(csv_file.name, 'w', **file_kwargs) as file:
writer = csv.writer(file, **writer_kwargs)

if header is not None:
for _row_index in range(header_row_index):
Expand Down
33 changes: 33 additions & 0 deletions tests/test_parsers/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,36 @@ class CSVParser(BaseCSVParser):
'row_index': 2,
},
]


def test_base_csv_parser_additional_params(csv_file_factory):
class CSVParser(BaseCSVParser):
columns = [
Column('first_name', index=0, header='First Name'),
Column('last_name', index=1, header='Last Name'),
]

csv_file = csv_file_factory(
header=['First Name', 'Last Name'],
data=[
['Ivan', 'Ivanov'],
['Petr', 'Petrov'],
],
file_kwargs={'encoding': 'cp1251'},
writer_kwargs={'delimiter': ';'},
)
parser = CSVParser(file_path=csv_file.name, encoding='cp1251', delimiter=';')
parser()

assert parser.cleaned_data == [
{
'first_name': 'Ivan',
'last_name': 'Ivanov',
'row_index': 1,
},
{
'first_name': 'Petr',
'last_name': 'Petrov',
'row_index': 2,
},
]

0 comments on commit b4e539e

Please sign in to comment.