Skip to content

Commit

Permalink
Make NameExtractor work correctly when the locations list is empty
Browse files Browse the repository at this point in the history
Previously, NameExtractor raised a hard to understand error in this
case. This was then improved to a clearer error message. This commit now
prevents the error message completely: if the list of locations passed
to Pipeline is empty, then NameExtractor.extract simply yields no
matches.

This reverts commit 18916c1, reversing
changes made to 38b7526.
  • Loading branch information
torfsen committed Apr 17, 2018
1 parent 18916c1 commit 0d184e2
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 28 deletions.
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,16 @@ The format of this file is based on [Keep a Changelog], and this
project uses [Semantic Versioning].


## [Unreleased]

### Fixed

- `NameExtractor` previously raised a hard to understand error message when the
location list passed to the `Pipeline` was empty. It now handles this edge
case correctly without raising an exception.
([#5], with contributions by [@konstin])


## [0.3.0] (2017-07-07)

### Added
Expand Down Expand Up @@ -88,3 +98,6 @@ First release.
[0.2.0]: https://github.com/stadt-karlsruhe/geoextract/compare/v0.1.0...v0.2.0
[0.1.0]: https://github.com/stadt-karlsruhe/geoextract/commits/v0.1.0

[#5]: https://github.com/stadt-karlsruhe/geoextract/pull/5

[@konstin]: https://github.com/konstin
12 changes: 6 additions & 6 deletions geoextract/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,10 +183,6 @@ def setup(self, pipeline): # noqa: D102
# avoid finding parts of words. That of course assumes that other
# word delimiters have been converted to spaces during
# normalization.

if not pipeline.normalized_names:
raise ValueError("You must provide locations for extraction")

self._automaton = ahocorasick.Automaton()
for name in pipeline.normalized_names:
name = name.strip()
Expand All @@ -195,8 +191,12 @@ def setup(self, pipeline): # noqa: D102

def extract(self, text): # noqa: D102
text = self._pad(text)

for end_index, name in self._automaton.iter(text):
try:
it = self._automaton.iter(text)
except AttributeError:
# Occurs when the list of locations was empty
return
for end_index, name in it:
end_index -= 1 # Trailing padding space
if PY2:
# UTF-8 encoded text might require more characters than the
Expand Down
7 changes: 0 additions & 7 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,6 @@

import requests

DUMMY_LOCATIONS = [
{
'name': 'FooBar',
'type': 'street'
}
]


def wait_for_server(url, max_tries=10, delay=1):
'''
Expand Down
5 changes: 3 additions & 2 deletions tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@
import requests

from geoextract import __version__ as geoextract_version, Pipeline
from . import DUMMY_LOCATIONS, stop_process, wait_for_server
from . import stop_process, wait_for_server


SERVER_URL = 'http://localhost:5000'
EXTRACT_URL = SERVER_URL + '/api/v1/extract'
Expand Down Expand Up @@ -73,7 +74,7 @@ def stop(self):


@contextlib.contextmanager
def app(locations=DUMMY_LOCATIONS, *args, **kwargs):
def app(locations=(), *args, **kwargs):
'''
Context manager that provides a geoextract app.
Expand Down
23 changes: 10 additions & 13 deletions tests/test_geoextract.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
import mock

import geoextract
from . import DUMMY_LOCATIONS, sort_as_json
from . import sort_as_json


class TestNameExtractor(object):
Expand Down Expand Up @@ -85,6 +85,14 @@ def test_word_only_matches(self):
'''
self._check('xfoo xfooy foox', [])

def test_no_locations(self):
'''
Test that the extractor works with an empty list of locations.
'''
extractor = geoextract.NameExtractor()
pipeline = geoextract.Pipeline([], extractors=[extractor])
assert pipeline.extract('foobar') == []


class TestWindowExtractor(object):
'''
Expand Down Expand Up @@ -864,17 +872,6 @@ def test_app_creation(self):
'''
Test creating a web app from a pipeline.
'''
pipeline = geoextract.Pipeline(DUMMY_LOCATIONS)
pipeline = geoextract.Pipeline([])
app = pipeline.create_app()
assert hasattr(app, 'run')

def test_empty_locations(self):
'''
Tests that an empty locations list throws an error.
'''
try:
geoextract.Pipeline([])
assert False
except ValueError as e:
error_message = "You must provide locations for extraction"
assert str(e) == str(ValueError(error_message))

0 comments on commit 0d184e2

Please sign in to comment.