Skip to content

Commit

Permalink
[BUILD-248] feat: Download updates from deck csv if url is provided (#…
Browse files Browse the repository at this point in the history
…923)

* Download updates from deck csv if url is provided

* Use correct progress callbacks for downloading decks and updates

* Add type hints

* Update vcr for test_get_deck_updates

* Fix error if notes are empty and no external_notes_url provided

* Add test

* Add type hint

* Use presigned url from deck updates response to download deck

* Make get_deck_updates return all updates at once, discard CSV note if JSON note exists

* Fix tests

* Add docstring

* Modify test

* Edit comment

* Fix tests

* Remove not needed code

* Add test

* ref: Rename methods

* Fix client tests
  • Loading branch information
RisingOrange authored Mar 22, 2024
1 parent f73c8f2 commit 1371a4e
Show file tree
Hide file tree
Showing 12 changed files with 423 additions and 215 deletions.
2 changes: 1 addition & 1 deletion ankihub/addon_ankihub_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def upload_logs(self, file: Path, key: str) -> None:
with open(file, "rb") as f:
log_data = f.read()

s3_url_suffix = self._get_presigned_url_suffix(key=key, action="upload")
s3_url_suffix = self._presigned_url_suffix_from_key(key=key, action="upload")
s3_response = self._send_request("PUT", API.S3, s3_url_suffix, data=log_data)
if s3_response.status_code != 200:
raise AnkiHubHTTPError(s3_response)
2 changes: 1 addition & 1 deletion ankihub/ankihub_client/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
DeckExtensionUpdateChunk,
DeckMedia,
DeckMediaUpdateChunk,
DeckUpdateChunk,
DeckUpdatesChunk,
Field,
NewNoteSuggestion,
NoteCustomization,
Expand Down
126 changes: 109 additions & 17 deletions ankihub/ankihub_client/ankihub_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@
DeckExtension,
DeckExtensionUpdateChunk,
DeckMediaUpdateChunk,
DeckUpdateChunk,
DeckUpdates,
DeckUpdatesChunk,
NewNoteSuggestion,
NoteInfo,
NoteSuggestion,
Expand Down Expand Up @@ -298,7 +299,7 @@ def upload_deck(
deck_name_normalized = re.sub('[\\\\/?<>:*|"^]', "_", deck_name)
deck_file_name = f"{deck_name_normalized}-{uuid.uuid4()}.json.gz"

s3_url_suffix = self._get_presigned_url_suffix(
s3_url_suffix = self._presigned_url_suffix_from_key(
key=deck_file_name, action="upload"
)

Expand Down Expand Up @@ -622,12 +623,15 @@ def download_deck(
self,
ah_did: uuid.UUID,
download_progress_cb: Optional[Callable[[int], None]] = None,
s3_presigned_url: Optional[str] = None,
) -> List[NoteInfo]:
deck_info = self.get_deck_by_id(ah_did)

s3_url_suffix = self._get_presigned_url_suffix(
key=deck_info.csv_notes_filename, action="download"
)
if not s3_presigned_url:
deck_info = self.get_deck_by_id(ah_did)
s3_url_suffix = self._presigned_url_suffix_from_key(
key=deck_info.csv_notes_filename, action="download"
)
else:
s3_url_suffix = self._presigned_url_suffix_from_url(s3_presigned_url)

if download_progress_cb:
s3_response_content = self._download_with_progress_cb(
Expand All @@ -639,7 +643,8 @@ def download_deck(
raise AnkiHubHTTPError(s3_response)
s3_response_content = s3_response.content

if deck_info.csv_notes_filename.endswith(".gz"):
csv_filename = s3_url_suffix[1:].split("?", maxsplit=1)[0]
if csv_filename.endswith(".gz"):
deck_csv_content = gzip.decompress(s3_response_content).decode("utf-8")
else:
deck_csv_content = s3_response_content.decode("utf-8")
Expand Down Expand Up @@ -680,9 +685,73 @@ def get_deck_updates(
self,
ah_did: uuid.UUID,
since: datetime,
download_progress_cb: Optional[Callable[[int], None]] = None,
) -> Iterator[DeckUpdateChunk]:
# download_progress_cb gets passed the number of notes downloaded until now
updates_download_progress_cb: Optional[Callable[[int], None]] = None,
deck_download_progress_cb: Optional[Callable[[int], None]] = None,
should_cancel: Optional[Callable[[], bool]] = None,
) -> Optional[DeckUpdates]:
"""
Fetches updates for a specific deck from the AnkiHub server.
Args:
ah_did: The UUID of the deck to fetch updates for.
since: The time from which to fetch updates.
updates_download_progress_cb: An optional callback function to report the progress of the updates download.
The function should take one argument: the number of notes downloaded.
deck_download_progress_cb: An optional callback function to report the progress of the deck download.
The function should take one argument: the percentage of the deck download progress.
should_cancel: An optional callback function that should return True if the operation should be cancelled.
Returns:
A DeckUpdates object containing the fetched updates and the latest update timestamp,
or None if the operation was cancelled.
"""
notes_data_from_csv = []
notes_data_from_json = []
latest_update = None
for chunk in self._get_deck_updates_inner(
ah_did,
since,
updates_download_progress_cb,
deck_download_progress_cb,
):
if should_cancel and should_cancel():
return None

if chunk.from_csv:
# The CSV contains all notes, so we assign instead of extending
notes_data_from_csv = chunk.notes
else:
notes_data_from_json.extend(chunk.notes)

# Each chunk contains the latest update timestamp of the notes in it, we need the latest one
latest_update = max(
chunk.latest_update, latest_update or chunk.latest_update
)

# When a note is both in the CSV and JSON, the JSON version is the more recent one and
# the CSV version should be discarded.
ah_nids_from_json = {note.ah_nid for note in notes_data_from_json}
filtered_notes_data_from_csv = [
note for note in notes_data_from_csv if note.ah_nid not in ah_nids_from_json
]
notes_data = notes_data_from_json + filtered_notes_data_from_csv

return DeckUpdates(
notes=notes_data,
latest_update=latest_update,
protected_fields=chunk.protected_fields,
protected_tags=chunk.protected_tags,
)

def _get_deck_updates_inner(
self,
ah_did: uuid.UUID,
since: datetime,
updates_download_progress_cb: Optional[Callable[[int], None]] = None,
deck_download_progress_cb: Optional[Callable[[int], None]] = None,
) -> Iterator[DeckUpdatesChunk]:
# updates_download_progress_cb gets passed the number of notes downloaded until now
# deck_download_progress_cb gets passed the percentage of the download progress

class Params(TypedDict, total=False):
since: str
Expand Down Expand Up @@ -710,19 +779,40 @@ class Params(TypedDict, total=False):
data["next"].split("/api", maxsplit=1)[1] if data["next"] else None
)

if data["external_notes_url"]:
notes_data_deck = self.download_deck(
ah_did,
deck_download_progress_cb,
s3_presigned_url=data["external_notes_url"],
)
chunk = DeckUpdatesChunk.from_dict({**data, "from_csv": True})
chunk.notes = notes_data_deck
yield chunk

# Get the rest of the updates, because the CSV is most likely not completely up to date
yield from self._get_deck_updates_inner(
ah_did=ah_did,
since=chunk.latest_update,
updates_download_progress_cb=updates_download_progress_cb,
deck_download_progress_cb=deck_download_progress_cb,
)
return
elif data["notes"] is None:
raise ValueError("No notes in the response") # pragma: no cover

# decompress and transform notes data
notes_data_base85 = data["notes"]
notes_data_gzipped = base64.b85decode(notes_data_base85)
notes_data = json.loads(self._gzip_decompress_string(notes_data_gzipped))
data["notes"] = _transform_notes_data(notes_data)

note_updates = DeckUpdateChunk.from_dict(data)
note_updates = DeckUpdatesChunk.from_dict({**data, "from_csv": False})
yield note_updates

notes_count += len(note_updates.notes)

if download_progress_cb:
download_progress_cb(notes_count)
if updates_download_progress_cb:
updates_download_progress_cb(notes_count)

first_request = False

Expand Down Expand Up @@ -866,7 +956,7 @@ def _create_suggestion_in_bulk_inner(
}
return errors_by_anki_nid

def _get_presigned_url_suffix(self, key: str, action: str) -> str:
def _presigned_url_suffix_from_key(self, key: str, action: str) -> str:
"""
Get presigned URL suffix for S3 to upload a single file.
The suffix is the part of the URL after the base url.
Expand All @@ -884,8 +974,10 @@ def _get_presigned_url_suffix(self, key: str, action: str) -> str:
raise AnkiHubHTTPError(response)

url = response.json()["pre_signed_url"]
result = url.split(self.s3_bucket_url)[1]
return result
return self._presigned_url_suffix_from_url(url)

def _presigned_url_suffix_from_url(self, url: str) -> str:
return url.split(self.s3_bucket_url)[1]

def _get_presigned_url_for_multiple_uploads(self, prefix: str) -> dict:
"""
Expand Down
7 changes: 6 additions & 1 deletion ankihub/ankihub_client/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def is_user_relation_owner(self):


@dataclass
class DeckUpdateChunk(DataClassJSONMixinWithConfig):
class DeckUpdates(DataClassJSONMixinWithConfig):
latest_update: Optional[datetime] = dataclasses.field(
metadata=field_options(
deserialize=lambda x: datetime.strptime(x, ANKIHUB_DATETIME_FORMAT_STR)
Expand All @@ -151,6 +151,11 @@ class DeckUpdateChunk(DataClassJSONMixinWithConfig):
notes: List[NoteInfo]


@dataclass
class DeckUpdatesChunk(DeckUpdates):
from_csv: bool


@dataclass
class DeckMedia(DataClassJSONMixinWithConfig):
name: str
Expand Down
50 changes: 22 additions & 28 deletions ankihub/gui/deck_updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from ..main.utils import create_backup
from ..settings import config
from .media_sync import media_sync
from .utils import show_error_dialog
from .utils import deck_download_progress_cb, show_error_dialog


class NotLoggedInError(Exception):
Expand Down Expand Up @@ -102,49 +102,39 @@ def _download_updates_for_deck(self, ankihub_did) -> bool:
"""Downloads note updates from AnkiHub and imports them into Anki.
Returns True if the action was successful, False if the user cancelled it."""

notes_data = []
latest_update: Optional[datetime] = None
deck_config = config.deck_config(ankihub_did)
for chunk in self._client.get_deck_updates(
deck_updates = self._client.get_deck_updates(
ankihub_did,
since=deck_config.latest_update,
download_progress_cb=lambda notes_count: _update_deck_download_progress_cb(
updates_download_progress_cb=lambda notes_count: _update_deck_updates_download_progress_cb(
notes_count, ankihub_did=ankihub_did
),
):
if aqt.mw.progress.want_cancel():
LOGGER.info("User cancelled deck update.")
return False

if not chunk.notes:
continue

notes_data += chunk.notes

# each chunk contains the latest update timestamp of the notes in it, we need the latest one
latest_update = max(
chunk.latest_update, latest_update or chunk.latest_update
)
deck_download_progress_cb=deck_download_progress_cb,
should_cancel=lambda: aqt.mw.progress.want_cancel(),
)
if deck_updates is None:
LOGGER.info("User cancelled deck update.")
return False

if notes_data:
note_types = fetch_note_types_based_on_notes(notes_data=notes_data)
if deck_updates.notes:
note_types = fetch_note_types_based_on_notes(notes_data=deck_updates.notes)
import_result = self._importer.import_ankihub_deck(
ankihub_did=ankihub_did,
notes=notes_data,
notes=deck_updates.notes,
note_types=note_types,
deck_name=deck_config.name,
is_first_import_of_deck=False,
behavior_on_remote_note_deleted=deck_config.behavior_on_remote_note_deleted,
anki_did=deck_config.anki_id,
protected_fields=chunk.protected_fields,
protected_tags=chunk.protected_tags,
protected_fields=deck_updates.protected_fields,
protected_tags=deck_updates.protected_tags,
subdecks=deck_config.subdecks_enabled,
suspend_new_cards_of_new_notes=deck_config.suspend_new_cards_of_new_notes,
suspend_new_cards_of_existing_notes=deck_config.suspend_new_cards_of_existing_notes,
)
self._import_results.append(import_result)

config.save_latest_deck_update(ankihub_did, latest_update)
config.save_latest_deck_update(ankihub_did, deck_updates.latest_update)
else:
LOGGER.info(f"No new updates for {ankihub_did=}")
return True
Expand Down Expand Up @@ -272,15 +262,19 @@ def show_tooltip_about_last_deck_updates_results() -> None:
)


def _update_deck_download_progress_cb(notes_count: int, ankihub_did: uuid.UUID):
def _update_deck_updates_download_progress_cb(
notes_count: int, ankihub_did: uuid.UUID
) -> None:
aqt.mw.taskman.run_on_main(
lambda: _update_deck_download_progress_cb_inner(
lambda: _update_deck_updates_download_progress_cb_inner(
notes_count=notes_count, ankihub_did=ankihub_did
)
)


def _update_deck_download_progress_cb_inner(notes_count: int, ankihub_did: uuid.UUID):
def _update_deck_updates_download_progress_cb_inner(
notes_count: int, ankihub_did: uuid.UUID
) -> None:
try:
aqt.mw.progress.update(
"Downloading updates\n"
Expand Down
16 changes: 2 additions & 14 deletions ankihub/gui/operations/deck_installation.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from ..exceptions import DeckDownloadAndInstallError, RemoteDeckNotFoundError
from ..media_sync import media_sync
from ..messages import messages
from ..utils import show_dialog, tooltip_icon
from ..utils import deck_download_progress_cb, show_dialog, tooltip_icon
from .subdecks import confirm_and_toggle_subdecks
from .utils import future_with_result, pass_exceptions_to_on_done

Expand Down Expand Up @@ -181,7 +181,7 @@ def _download_and_install_single_deck(
deck: Deck, behavior_on_remote_note_deleted: BehaviorOnRemoteNoteDeleted
) -> AnkiHubImportResult:
notes_data: List[NoteInfo] = AnkiHubClient().download_deck(
deck.ah_did, download_progress_cb=_download_progress_cb
deck.ah_did, download_progress_cb=deck_download_progress_cb
)

aqt.mw.taskman.run_on_main(
Expand Down Expand Up @@ -250,18 +250,6 @@ def _install_deck(
return import_result


def _download_progress_cb(percent: int):
# adding +1 to avoid progress increasing while at 0% progress
# (the aqt.mw.progress.update function does that)
aqt.mw.taskman.run_on_main(
lambda: aqt.mw.progress.update(
label="Downloading deck...",
value=percent + 1,
max=101,
)
)


def _cleanup_after_deck_install() -> None:
"""Clears unused tags and empty cards. We do this because importing a deck which the user
already has in their collection can result in many unused tags and empty cards."""
Expand Down
12 changes: 12 additions & 0 deletions ankihub/gui/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,3 +544,15 @@ def extract_argument(
new_kwargs[param.name] = bound_args.arguments[param.name]

return tuple(new_args), new_kwargs, arg_value


def deck_download_progress_cb(percent: int) -> None:
# adding +1 to avoid progress increasing while at 0% progress
# (the aqt.mw.progress.update function does that)
aqt.mw.taskman.run_on_main(
lambda: aqt.mw.progress.update(
label="Downloading deck...",
value=percent + 1,
max=101,
)
)
Loading

0 comments on commit 1371a4e

Please sign in to comment.