diff --git a/deepsearch/cps/cli/cli_options.py b/deepsearch/cps/cli/cli_options.py index 55a68ea1..bb70512b 100644 --- a/deepsearch/cps/cli/cli_options.py +++ b/deepsearch/cps/cli/cli_options.py @@ -32,6 +32,12 @@ containing coordinates of COS.""", ) +CONV_SETTINGS = typer.Option( + None, + "--conv-settings", + help="""Provide conversion settings to be used on local file upload""", +) + SOURCE_PATH = typer.Option( None, "--input-file", diff --git a/deepsearch/cps/cli/data_indices_typer.py b/deepsearch/cps/cli/data_indices_typer.py index 91716825..80d89ddc 100644 --- a/deepsearch/cps/cli/data_indices_typer.py +++ b/deepsearch/cps/cli/data_indices_typer.py @@ -11,6 +11,7 @@ from deepsearch.cps.cli.cli_options import ( ATTACHMENT_KEY, ATTACHMENT_PATH, + CONV_SETTINGS, COORDINATES_PATH, INDEX_ITEM_ID, INDEX_KEY, @@ -135,6 +136,7 @@ def upload_files( local_file: Path = SOURCE_PATH, index_key: str = INDEX_KEY, s3_coordinates: Path = COORDINATES_PATH, + conv_settings=CONV_SETTINGS, ): """ Upload pdfs, zips, or online documents to a data index in a project @@ -163,6 +165,7 @@ def upload_files( url=urls, local_file=local_file, s3_coordinates=cos_coordinates, + conv_settings=conv_settings, ) typer.echo("Tasks have been queued successfully") diff --git a/deepsearch/cps/data_indices/utils.py b/deepsearch/cps/data_indices/utils.py index 69f263c1..24757b56 100644 --- a/deepsearch/cps/data_indices/utils.py +++ b/deepsearch/cps/data_indices/utils.py @@ -25,6 +25,7 @@ def upload_files( url: Optional[Union[str, List[str]]] = None, local_file: Optional[Union[str, Path]] = None, s3_coordinates: Optional[S3Coordinates] = None, + conv_settings: Optional[Any] = None, ): """ Orchestrate document conversion and upload to an index in a project @@ -47,6 +48,7 @@ def upload_files( api=api, coords=coords, local_file=Path(local_file), + conv_settings=conv_settings, ) elif url is None and local_file is None and s3_coordinates is not None: return process_external_cos( @@ -101,12 +103,15 @@ def process_local_file( coords: ElasticProjectDataCollectionSource, local_file: Path, progress_bar: bool = False, + conv_settings=None, ): """ Individual files are uploaded for conversion and storage in data index. """ # process multiple files from local directory + if conv_settings is None: + conv_settings = {} root_dir = create_root_dir() # batch individual pdfs into zips and add them to root_dir batched_files = input_process.batch_single_files( @@ -147,7 +152,8 @@ def process_local_file( api=api, cps_proj_key=coords.proj_key, source_path=Path(single_zip) ) file_url_array = [private_download_url] - payload = {"file_url": file_url_array} + payload = {"file_url": file_url_array, "conversion_settings": conv_settings} + print(payload) task_id = api.data_indices.upload_file(coords=coords, body=payload) task_ids.append(task_id) progress.update(1)