Skip to content

Commit

Permalink
feat: modify_refresh_interval flag in opensearch index_documents (#2980)
Browse files Browse the repository at this point in the history
* feature/opensearch: modify_refresh_interval flag

* CR change: rename modify_refresh_interval to enable_refresh_interval

---------

Co-authored-by: jaidisido <[email protected]>
  • Loading branch information
AvihaiSam and jaidisido authored Oct 7, 2024
1 parent cc77561 commit 4074f25
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions awswrangler/opensearch/_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,7 @@ def index_documents(
initial_backoff: int | None = None,
max_backoff: int | None = None,
use_threads: bool | int = False,
enable_refresh_interval: bool = True,
**kwargs: Any,
) -> dict[str, Any]:
"""
Expand Down Expand Up @@ -559,6 +560,8 @@ def index_documents(
True to enable concurrent requests, False to disable multiple threads.
If enabled os.cpu_count() will be used as the max number of threads.
If integer is provided, specified number is used.
enable_refresh_interval
True (default) to enable ``refresh_interval`` modification to ``-1`` (disabled) while indexing documents
**kwargs
KEYWORD arguments forwarded to bulk operation
elasticsearch >= 7.10.2 / opensearch: \
Expand Down Expand Up @@ -614,7 +617,7 @@ def index_documents(
widgets=widgets, max_value=total_documents, prefix="Indexing: "
).start()
for i, bulk_chunk_documents in enumerate(actions):
if i == 1: # second bulk iteration, in case the index didn't exist before
if i == 1 and enable_refresh_interval: # second bulk iteration, in case the index didn't exist before
refresh_interval = _get_refresh_interval(client, index)
_disable_refresh_interval(client, index)
_logger.debug("running bulk index of %s documents", len(bulk_chunk_documents))
Expand Down Expand Up @@ -655,6 +658,7 @@ def index_documents(
raise e

finally:
_set_refresh_interval(client, index, refresh_interval)
if enable_refresh_interval:
_set_refresh_interval(client, index, refresh_interval)

return {"success": success, "errors": errors}

0 comments on commit 4074f25

Please sign in to comment.