Skip to content

Commit

Permalink
rename DeduplicationObserver -> DuplicateHandler
Browse files Browse the repository at this point in the history
  • Loading branch information
KhoomeiK committed Mar 27, 2024
1 parent b99c781 commit e6b37e0
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 10 deletions.
4 changes: 2 additions & 2 deletions harambe/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
LoggingObserver,
OutputObserver,
DownloadMeta,
DeduplicationObserver,
DuplicateHandler,
ObservationTrigger,
)
from harambe.tracker import FileDataTracker
Expand Down Expand Up @@ -75,7 +75,7 @@ def __init__(
observer = [observer]

self._observers = observer
self._deduper = DeduplicationObserver()
self._deduper = DuplicateHandler()

async def save_data(self, *data: ScrapeResult) -> None:
"""
Expand Down
2 changes: 1 addition & 1 deletion harambe/observer.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def files(self) -> List[Tuple[str, bytes]]:
return self._files


class DeduplicationObserver(OutputObserver):
class DuplicateHandler:
def __init__(self):
self._saved_data: set[bytes] = set()
self.rows_on_page = 0
Expand Down
14 changes: 7 additions & 7 deletions tests/test_observers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest

from harambe.observer import InMemoryObserver, DeduplicationObserver
from harambe.observer import InMemoryObserver, DuplicateHandler


@pytest.mark.asyncio
Expand Down Expand Up @@ -28,7 +28,7 @@ async def in_memory_on_queue_url():

@pytest.mark.asyncio
async def test_stop_pagination_observer_duplicate_data_error():
observer = DeduplicationObserver()
observer = DuplicateHandler()

unduplicated = await observer.on_save_data({"foo": "bar"})
await observer.on_paginate("https://example.com/page2")
Expand All @@ -42,7 +42,7 @@ async def test_stop_pagination_observer_duplicate_data_error():

@pytest.mark.asyncio
async def test_stop_pagination_observer_duplicate_url_error():
observer = DeduplicationObserver()
observer = DuplicateHandler()

unduplicated = await observer.on_queue_url("https://example.com", {"foo": "bar"})
await observer.on_paginate("https://example.com/page2")
Expand All @@ -56,7 +56,7 @@ async def test_stop_pagination_observer_duplicate_url_error():

@pytest.mark.asyncio
async def test_stop_pagination_observer_duplicate_download_error():
observer = DeduplicationObserver()
observer = DuplicateHandler()

unduplicated = await observer.on_download("https://example.com", "foo.txt", b"foo")
await observer.on_paginate("https://example.com/page2")
Expand All @@ -70,7 +70,7 @@ async def test_stop_pagination_observer_duplicate_download_error():

@pytest.mark.asyncio
async def test_stop_pagination_observer_no_duplicate_data():
observer = DeduplicationObserver()
observer = DuplicateHandler()
unduplicated1 = await observer.on_save_data({"foo": "bar"})
await observer.on_paginate("https://example.com/page2")
unduplicated2 = await observer.on_save_data({"baz": "qux"})
Expand All @@ -88,7 +88,7 @@ async def test_stop_pagination_observer_no_duplicate_data():

@pytest.mark.asyncio
async def test_ignore_underscore_attributes():
observer = DeduplicationObserver()
observer = DuplicateHandler()

unduplicated1 = await observer.on_save_data({"foo": "bar", "__url": "qux"})
unduplicated2 = await observer.on_save_data({"qux": "bar", "__url": "qux"})
Expand All @@ -105,7 +105,7 @@ async def test_ignore_underscore_attributes():

@pytest.mark.asyncio
async def test_duplicate_data_without_pagination():
observer = DeduplicationObserver()
observer = DuplicateHandler()
unduplicated = await observer.on_save_data({"foo": "bar"})
duplicated = await observer.on_save_data({"foo": "bar"})
assert not unduplicated and duplicated
Expand Down

0 comments on commit e6b37e0

Please sign in to comment.