Skip to content

Commit

Permalink
issue addressed
Browse files Browse the repository at this point in the history
Signed-off-by: Miguel Brandão <[email protected]>
  • Loading branch information
HolyMichael committed Sep 8, 2023
1 parent 0d9781c commit 9407a66
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 1 deletion.
6 changes: 6 additions & 0 deletions deepsearch/cps/cli/cli_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@
containing coordinates of COS.""",
)

CONV_SETTINGS = typer.Option(
None,
"--conv-settings",
help="""Provide conversion settings to be used on local file upload""",
)

SOURCE_PATH = typer.Option(
None,
"--input-file",
Expand Down
3 changes: 3 additions & 0 deletions deepsearch/cps/cli/data_indices_typer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from deepsearch.cps.cli.cli_options import (
ATTACHMENT_KEY,
ATTACHMENT_PATH,
CONV_SETTINGS,
COORDINATES_PATH,
INDEX_ITEM_ID,
INDEX_KEY,
Expand Down Expand Up @@ -135,6 +136,7 @@ def upload_files(
local_file: Path = SOURCE_PATH,
index_key: str = INDEX_KEY,
s3_coordinates: Path = COORDINATES_PATH,
conv_settings=CONV_SETTINGS,
):
"""
Upload pdfs, zips, or online documents to a data index in a project
Expand Down Expand Up @@ -163,6 +165,7 @@ def upload_files(
url=urls,
local_file=local_file,
s3_coordinates=cos_coordinates,
conv_settings=conv_settings,
)

typer.echo("Tasks have been queued successfully")
Expand Down
8 changes: 7 additions & 1 deletion deepsearch/cps/data_indices/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def upload_files(
url: Optional[Union[str, List[str]]] = None,
local_file: Optional[Union[str, Path]] = None,
s3_coordinates: Optional[S3Coordinates] = None,
conv_settings: Optional[Any] = None,
):
"""
Orchestrate document conversion and upload to an index in a project
Expand All @@ -47,6 +48,7 @@ def upload_files(
api=api,
coords=coords,
local_file=Path(local_file),
conv_settings=conv_settings,
)
elif url is None and local_file is None and s3_coordinates is not None:
return process_external_cos(
Expand Down Expand Up @@ -101,12 +103,15 @@ def process_local_file(
coords: ElasticProjectDataCollectionSource,
local_file: Path,
progress_bar: bool = False,
conv_settings=None,
):
"""
Individual files are uploaded for conversion and storage in data index.
"""

# process multiple files from local directory
if conv_settings is None:
conv_settings = {}
root_dir = create_root_dir()
# batch individual pdfs into zips and add them to root_dir
batched_files = input_process.batch_single_files(
Expand Down Expand Up @@ -147,7 +152,8 @@ def process_local_file(
api=api, cps_proj_key=coords.proj_key, source_path=Path(single_zip)
)
file_url_array = [private_download_url]
payload = {"file_url": file_url_array}
payload = {"file_url": file_url_array, "conversion_settings": conv_settings}
print(payload)
task_id = api.data_indices.upload_file(coords=coords, body=payload)
task_ids.append(task_id)
progress.update(1)
Expand Down

0 comments on commit 9407a66

Please sign in to comment.