From 4bea6c880c90e19541744e3b5b3fb906df69c953 Mon Sep 17 00:00:00 2001 From: Paul-Cornell Date: Wed, 31 Jul 2024 07:51:21 -0700 Subject: [PATCH] Clarify ingest tool/library names for Ingest CLI and Python Ingest library (#131) --- api-reference/api-services/free-api.mdx | 12 +-- .../saas-api-development-guide.mdx | 10 +- .../ingest/destination-connector/azure.mdx | 2 +- .../ingest/destination-connector/local.mdx | 2 +- .../ingest/destination-connector/s3.mdx | 2 +- .../destination-connector/singlestore.mdx | 2 +- .../ingest/source-connectors/azure.mdx | 2 +- .../ingest/source-connectors/google-drive.mdx | 2 +- .../ingest/source-connectors/local.mdx | 2 +- api-reference/ingest/source-connectors/s3.mdx | 2 +- faq/faq.mdx | 4 +- ingestion/overview.mdx | 24 ++--- .../ingest/destination-connectors/azure.mdx | 2 +- .../ingest/destination-connectors/local.mdx | 2 +- .../ingest/destination-connectors/s3.mdx | 2 +- .../destination-connectors/singlestore.mdx | 2 +- open-source/ingest/overview.mdx | 12 +-- .../ingest/source-connectors/airtable.mdx | 2 +- .../ingest/source-connectors/azure.mdx | 2 +- .../ingest/source-connectors/biomed.mdx | 2 +- open-source/ingest/source-connectors/box.mdx | 2 +- .../ingest/source-connectors/confluence.mdx | 2 +- .../ingest/source-connectors/delta-table.mdx | 2 +- .../ingest/source-connectors/discord.mdx | 2 +- .../ingest/source-connectors/dropbox.mdx | 2 +- .../source-connectors/elastic-search.mdx | 2 +- .../ingest/source-connectors/github.mdx | 2 +- .../ingest/source-connectors/gitlab.mdx | 2 +- .../google-cloud-storage.mdx | 2 +- open-source/ingest/source-connectors/jira.mdx | 2 +- .../ingest/source-connectors/local.mdx | 2 +- .../ingest/source-connectors/mongodb.mdx | 2 +- .../ingest/source-connectors/notion.mdx | 2 +- .../ingest/source-connectors/one-drive.mdx | 2 +- .../ingest/source-connectors/opensearch.mdx | 2 +- .../ingest/source-connectors/outlook.mdx | 2 +- .../ingest/source-connectors/reddit.mdx | 2 +- open-source/ingest/source-connectors/s3.mdx | 2 +- .../ingest/source-connectors/salesforce.mdx | 2 +- open-source/ingest/source-connectors/sftp.mdx | 2 +- .../ingest/source-connectors/sharepoint.mdx | 2 +- .../ingest/source-connectors/slack.mdx | 2 +- .../ingest/source-connectors/wikipedia.mdx | 2 +- .../dc-shared-text/azure-cognitive-search.mdx | 2 +- snippets/dc-shared-text/box.mdx | 2 +- snippets/dc-shared-text/chroma.mdx | 2 +- snippets/dc-shared-text/clarifai.mdx | 2 +- .../dc-shared-text/databricks-volumes.mdx | 2 +- snippets/dc-shared-text/delta-table.mdx | 2 +- snippets/dc-shared-text/dropbox.mdx | 2 +- snippets/dc-shared-text/elasticsearch.mdx | 2 +- .../dc-shared-text/google-cloud-service.mdx | 2 +- snippets/dc-shared-text/mongodb.mdx | 2 +- snippets/dc-shared-text/opensearch.mdx | 2 +- snippets/dc-shared-text/pinecone.mdx | 2 +- snippets/dc-shared-text/qdrant.mdx | 2 +- snippets/dc-shared-text/sql.mdx | 2 +- snippets/dc-shared-text/vectara.mdx | 2 +- snippets/dc-shared-text/weaviate.mdx | 2 +- .../connector-availability-cli-sdk.mdx | 2 +- .../multi-file-api-use-connectors.mdx | 2 +- .../multi-file-oss-use-connectors.mdx | 2 +- .../multi-file-partition-via-api.mdx | 2 +- .../post-api-single-file.mdx | 2 +- .../partition-by-api-oss.mdx | 6 +- snippets/sc-shared-text/astradb.mdx | 2 +- snippets/sc-shared-text/azure.mdx | 2 +- snippets/sc-shared-text/google-drive.mdx | 2 +- snippets/sc-shared-text/local.mdx | 2 +- snippets/sc-shared-text/s3.mdx | 2 +- welcome.mdx | 100 +++++++++++++----- 71 files changed, 170 insertions(+), 126 deletions(-) diff --git a/api-reference/api-services/free-api.mdx b/api-reference/api-services/free-api.mdx index cd1f7146..a52b12fa 100644 --- a/api-reference/api-services/free-api.mdx +++ b/api-reference/api-services/free-api.mdx @@ -36,7 +36,7 @@ import SharedPagesBilling from '/snippets/general-shared-text/pages-billing.mdx' ## Quickstart Let's say you want to preprocess an `*.eml` file using the free Unstructured API. There are several ways -you can do this, which all lead to the same result, so pick your preferred method: [POST](#post-request), [CLI](#unstructured-cli), [SDK](#unstructured-python-sdk-and-javascript-typescript-sdk), or [open source](#calling-the-unstructured-api-from-the-unstructured-open-source-library). +you can do this, which all lead to the same result, so pick your preferred method: [POST](#post-request), [CLI](#unstructured-ingest-cli), [SDK](#unstructured-python-sdk-and-javascript-typescript-sdk), or [open source](#calling-the-unstructured-api-from-the-unstructured-open-source-library). ### POST request @@ -63,7 +63,7 @@ After the command successfully runs, see the results in the specified output pat If you do not have any files available, you can download some from the [example-docs](https://github.com/Unstructured-IO/unstructured/tree/main/example-docs) folder in the Unstructured repo on GitHub. -`POST` requests support using only local machine paths as the source (input) for the file to preprocess and as the destination (output) that Unstructured sends the processed data to. To specify a source or destination other than a local machine, use the [CLI](#unstructured-cli), the [Python SDK](#unstructured-python-sdk-and-javascript-typescript-sdk), or the [open source library](#calling-the-unstructured-api-from-the-unstructured-open-source-library) instead. +`POST` requests support using only local machine paths as the source (input) for the file to preprocess and as the destination (output) that Unstructured sends the processed data to. To specify a source or destination other than a local machine, use the [Unstructured Ingest CLI](/ingestion/overview#unstructured-ingest-cli) or the [Unstructured Ingest Python library](/ingestion/overview#unstructured-ingest-python-library) instead. import SharedPOSTSingleFile from '/snippets/general-shared-text/post-api-single-file.mdx'; @@ -71,9 +71,9 @@ import SharedPOSTSingleFile from '/snippets/general-shared-text/post-api-single- [Learn more about how to use POST requests](/api-reference/api-services/post-requests). -### Unstructured CLI +### Unstructured Ingest CLI -To work with the Free Unstructured API by using the Unstructured CLI, you will need to: +To work with the Free Unstructured API by using the Unstructured Ingest CLI, you will need to: - Install Python, and then install the CLI package: @@ -113,9 +113,7 @@ To work with the Free Unstructured API in Python or JavaScript, use the Unstructured [Python SDK](https://github.com/Unstructured-IO/unstructured-python-client), or [JavaScript SDK](https://github.com/Unstructured-IO/unstructured-js-client). -The JavaScript/TypeScript SDK supports using only local machine paths as the source (input) for the files to preprocess and as the destination (output) that Unstructured sends the processed data to. To specify a source or destination other than a local machine, use the [CLI](#unstructured-cli), the Python SDK, or the [open source library](#calling-the-unstructured-api-from-the-unstructured-open-source-library) instead. - -Install your preferred SDK: +The JavaScript/TypeScript SDK supports using only local machine paths as the source (input) for the files to preprocess and as the destination (output) that Unstructured sends the processed data to. To specify a source or destination other than a local machine, use the [Unstructured Ingest CLI](/ingestion/overview#unstructured-ingest-cli) or the [Unstructured Ingest Python library](/ingestion/overview#unstructured-ingest-python-library) instead. ```bash Python diff --git a/api-reference/api-services/saas-api-development-guide.mdx b/api-reference/api-services/saas-api-development-guide.mdx index 7a8e33e4..0da1b4da 100644 --- a/api-reference/api-services/saas-api-development-guide.mdx +++ b/api-reference/api-services/saas-api-development-guide.mdx @@ -43,7 +43,7 @@ import SharedPagesBilling from '/snippets/general-shared-text/pages-billing.mdx' The following example illustrates how to preprocess an `*.eml` file using the Unstructured Serverless API. -There are several ways to use the Unstructured Serverless API, which all lead to the same result, so pick your preferred method: [POST](#post-request), [CLI](#unstructured-cli), [SDK](#unstructured-python-sdk-and-javascript-typescript-sdk), or [open source](#calling-the-unstructured-api-from-the-unstructured-open-source-library). +There are several ways to use the Unstructured Serverless API, which all lead to the same result, so pick your preferred method: [POST](#post-request), [CLI](#unstructured-ingest-cli), [SDK](#unstructured-python-sdk-and-javascript-typescript-sdk), or [open source](#calling-the-unstructured-api-from-the-unstructured-open-source-library). ### POST request @@ -72,7 +72,7 @@ After the command successfully runs, see the results in the specified output pat If you do not have any files available, you can download some from the [example-docs](https://github.com/Unstructured-IO/unstructured/tree/main/example-docs) folder in the Unstructured repo on GitHub. -`POST` requests support using only local machine paths as the source (input) for the files to preprocess and as the destination (output) that Unstructured sends the processed data to. To specify a source or destination other than a local machine, use the CLI, the Python SDK, or the open source library instead. +`POST` requests support using only local machine paths as the source (input) for the files to preprocess and as the destination (output) that Unstructured sends the processed data to. To specify a source or destination other than a local machine, use the [Unstructured Ingest CLI](/ingestion/overview#unstructured-ingest-cli) or the [Unstructured Ingest Python library](/ingestion/overview#unstructured-ingest-python-library) instead. import SharedPOSTSingleFile from '/snippets/general-shared-text/post-api-single-file.mdx'; @@ -80,9 +80,9 @@ import SharedPOSTSingleFile from '/snippets/general-shared-text/post-api-single- [Learn more about how to use POST requests](/api-reference/api-services/post-requests). -### Unstructured CLI +### Unstructured Ingest CLI -To work with the Unstructured Serverless API by using the Unstructured CLI, you will need to: +To work with the Unstructured Serverless API by using the Unstructured Ingest CLI, you will need to: - Install Python, and then install the CLI package: @@ -126,7 +126,7 @@ To work with the Unstructured Serverless API in Python, JavaScript, or TypeScrip Unstructured [Python SDK](https://github.com/Unstructured-IO/unstructured-python-client) or [JavaScript/TypeScript SDK](https://github.com/Unstructured-IO/unstructured-js-client). -The JavaScript/TypeScript SDK supports using only local machine paths as the source (input) for the files to preprocess and as the destination (output) that Unstructured sends the processed data to. To specify a source or destination other than a local machine, use the [CLI](#unstructured-cli), the Python SDK, or the [open source library](#calling-the-unstructured-api-from-the-unstructured-open-source-library) instead. +The JavaScript/TypeScript SDK supports using only local machine paths as the source (input) for the files to preprocess and as the destination (output) that Unstructured sends the processed data to. To specify a source or destination other than a local machine, use the [Unstructured Ingest CLI](/ingestion/overview#unstructured-ingest-cli) or the [Unstructured Ingest Python library](/ingestion/overview#unstructured-ingest-python-library) instead. First, install your preferred SDK: diff --git a/api-reference/ingest/destination-connector/azure.mdx b/api-reference/ingest/destination-connector/azure.mdx index 38bcb278..5d3537ba 100644 --- a/api-reference/ingest/destination-connector/azure.mdx +++ b/api-reference/ingest/destination-connector/azure.mdx @@ -12,7 +12,7 @@ import SharedAPIKeyURL from '/snippets/general-shared-text/api-key-url.mdx'; -Now call the Unstructured CLI or Python SDK. The source connector can be any of the ones supported. This example uses the local source connector: +Now call the Unstructured Ingest CLI or the Unstructured Ingest Python library. The source connector can be any of the ones supported. This example uses the local source connector: import AzureAPISh from '/snippets/destination_connectors/azure.sh.mdx'; import AzureAPIPyV2 from '/snippets/destination_connectors/azure.v2.py.mdx'; diff --git a/api-reference/ingest/destination-connector/local.mdx b/api-reference/ingest/destination-connector/local.mdx index b97e9085..746ac743 100644 --- a/api-reference/ingest/destination-connector/local.mdx +++ b/api-reference/ingest/destination-connector/local.mdx @@ -12,7 +12,7 @@ import SharedAPIKeyURL from '/snippets/general-shared-text/api-key-url.mdx'; -Now call the Unstructured CLI or Python SDK. The source connector can be any of the ones supported. This example uses the local source connector: +Now call the Unstructured Ingest CLI or the Unstructured Ingest Python library. The source connector can be any of the ones supported. This example uses the local source connector: import AzureAPISh from '/snippets/destination_connectors/azure.sh.mdx'; import AzureAPIPyV2 from '/snippets/destination_connectors/azure.v2.py.mdx'; diff --git a/api-reference/ingest/destination-connector/s3.mdx b/api-reference/ingest/destination-connector/s3.mdx index 858f84d6..5d0858d0 100644 --- a/api-reference/ingest/destination-connector/s3.mdx +++ b/api-reference/ingest/destination-connector/s3.mdx @@ -12,7 +12,7 @@ import SharedAPIKeyURL from '/snippets/general-shared-text/api-key-url.mdx'; -Now call the Unstructured CLI or Python SDK. The source connector can be any of the ones supported. This example uses the local source connector: +Now call the Unstructured Ingest CLI or the Unstructured Ingest Python library. The source connector can be any of the ones supported. This example uses the local source connector: import S3APISh from '/snippets/destination_connectors/s3.sh.mdx'; import S3APIPyV2 from '/snippets/destination_connectors/s3.v2.py.mdx'; diff --git a/api-reference/ingest/destination-connector/singlestore.mdx b/api-reference/ingest/destination-connector/singlestore.mdx index 1660743b..e0baf66d 100644 --- a/api-reference/ingest/destination-connector/singlestore.mdx +++ b/api-reference/ingest/destination-connector/singlestore.mdx @@ -12,7 +12,7 @@ import SharedAPIKeyURL from '/snippets/general-shared-text/api-key-url.mdx'; -Now call the Unstructured CLI or Python SDK. The source connector can be any of the ones supported. This example uses the local source connector: +Now call the Unstructured Ingest CLI or the Unstructured Ingest Python library. The source connector can be any of the ones supported. This example uses the local source connector: import SingleStoreAPISh from '/snippets/destination_connectors/singlestore.sh.mdx'; import SingleStoreAPIPyV2 from '/snippets/destination_connectors/singlestore.v2.py.mdx'; diff --git a/api-reference/ingest/source-connectors/azure.mdx b/api-reference/ingest/source-connectors/azure.mdx index b8a1b739..d6fb8499 100644 --- a/api-reference/ingest/source-connectors/azure.mdx +++ b/api-reference/ingest/source-connectors/azure.mdx @@ -12,7 +12,7 @@ import SharedAPIKeyURL from '/snippets/general-shared-text/api-key-url.mdx'; -Now call the Unstructured CLI or Python SDK. The destination connector can be any of the ones supported. This example uses the local destination connector: +Now call the Unstructured Ingest CLI or the Unstructured Ingest Python library. The destination connector can be any of the ones supported. This example uses the local destination connector: import AzureAPISh from '/snippets/source_connectors/azure.sh.mdx'; import AzureAPIPyV2 from '/snippets/source_connectors/azure.v2.py.mdx'; diff --git a/api-reference/ingest/source-connectors/google-drive.mdx b/api-reference/ingest/source-connectors/google-drive.mdx index c341d782..ed779ed3 100644 --- a/api-reference/ingest/source-connectors/google-drive.mdx +++ b/api-reference/ingest/source-connectors/google-drive.mdx @@ -12,7 +12,7 @@ import SharedAPIKeyURL from '/snippets/general-shared-text/api-key-url.mdx'; -Now call the Unstructured CLI or Python SDK. The destination connector can be any of the ones supported. This example uses the local destination connector: +Now call the Unstructured Ingest CLI or the Unstructured Ingest Python library. The destination connector can be any of the ones supported. This example uses the local destination connector: import GoogleDriveAPISh from '/snippets/source_connectors/google_drive.sh.mdx'; import GoogleDriveAPIPyV2 from '/snippets/source_connectors/google_drive.v2.py.mdx'; diff --git a/api-reference/ingest/source-connectors/local.mdx b/api-reference/ingest/source-connectors/local.mdx index 69da8ce9..35b68a08 100644 --- a/api-reference/ingest/source-connectors/local.mdx +++ b/api-reference/ingest/source-connectors/local.mdx @@ -12,7 +12,7 @@ import SharedAPIKeyURL from '/snippets/general-shared-text/api-key-url.mdx'; -Now call the Unstructured CLI or Python SDK. The destination connector can be any of the ones supported. This example uses the local destination connector: +Now call the Unstructured Ingest CLI or the Unstructured Ingest Python library. The destination connector can be any of the ones supported. This example uses the local destination connector: import LocalAPISh from '/snippets/source_connectors/local.sh.mdx'; import LocalAPIPyV2 from '/snippets/source_connectors/local.v2.py.mdx'; diff --git a/api-reference/ingest/source-connectors/s3.mdx b/api-reference/ingest/source-connectors/s3.mdx index 2ccc945c..64e0afc4 100644 --- a/api-reference/ingest/source-connectors/s3.mdx +++ b/api-reference/ingest/source-connectors/s3.mdx @@ -12,7 +12,7 @@ import SharedAPIKeyURL from '/snippets/general-shared-text/api-key-url.mdx'; -Now call the Unstructured CLI or Python SDK. The destination connector can be any of the ones supported. This example uses the local destination connector: +Now call the Unstructured Ingest CLI or the Unstructured Ingest Python library. The destination connector can be any of the ones supported. This example uses the local destination connector: import S3APISh from '/snippets/source_connectors/s3.sh.mdx'; import S3APIPyV2 from '/snippets/source_connectors/s3.v2.py.mdx'; diff --git a/faq/faq.mdx b/faq/faq.mdx index 7fb28720..f8ceac3b 100644 --- a/faq/faq.mdx +++ b/faq/faq.mdx @@ -43,8 +43,8 @@ Yes, you can still use your old API keys. We will migrate all the user keys to t ### How can I generate and use a new API Key to process my documents? When you log in to the Serverless API dashboard, you can access your API keys by clicking the `API Keys` link in the side navigation. -Under the `Actions` column, click the `Copy` icon to copy the key or the boilerplate codes to process the documents -using the Unstructured REST API POST with `curl`, or the Unstructured CLI, or the [Unstructured Python SDK](https://github.com/Unstructured-IO/unstructured-python-client) or [Unstructured JavaScript/Typescript SDK](https://github.com/Unstructured-IO/unstructured-js-client). +Under the `Actions` column, click the `Copy` icon to copy the key or an example code snippet to process the documents +using the Unstructured REST API, or the [Unstructured Ingest CLI](/ingestion/overview#unstructured-ingest-cli), or the [Unstructured Python SDK](https://github.com/Unstructured-IO/unstructured-python-client) or [Unstructured JavaScript/Typescript SDK](https://github.com/Unstructured-IO/unstructured-js-client). ### What is the new Unstructured API pricing structure? diff --git a/ingestion/overview.mdx b/ingestion/overview.mdx index 06d1d395..a4c62903 100644 --- a/ingestion/overview.mdx +++ b/ingestion/overview.mdx @@ -10,7 +10,7 @@ You can perform ingestion with the following tools: - The [Unstructured Platform](/platform/overview), a no-code user interface, unlimited pay-as-you-go platform to get all of your data ready for Retrieval Augmented Generation (RAG) and model fine-tuning. - The [Unstructured Ingest CLI](#unstructured-ingest-cli), with unlimited pay-as-you-go and limited free options, that enable you to use command-line scripts to get all of your data ready for RAG and model fine-tuning. -- The [Unstructured Ingest Python](#unstructured-ingest-python) library and connectors, with unlimited pay-as-you-go and limited free options, that enable you to use Python code to get all of your data ready for RAG and model fine-tuning. +- The [Unstructured Ingest Python library](#unstructured-ingest-python-library), with unlimited pay-as-you-go and limited free options, that enable you to use Python code to get all of your data ready for RAG and model fine-tuning. The [Unstructured Python SDK](/api-reference/api-services/sdk-python) and Unstructured JavaScript/TypeScript SDK](/api-reference/api-services/sdk-jsts) can process only one file at a time. @@ -32,7 +32,7 @@ flowchart LR The Unstructured Platform enables you to connect to many kinds of [sources](/platform/platform-source-connectors/overview) and [destinations](/platform/platform-destination-connectors/overview). -If you use the Unstructured Ingest CLI or Unstructured Ingest Python, the source or destination can be a cloud storage location or a local location. For example: +If you use the Unstructured Ingest CLI or the Unstructured Ingest Python library, the source or destination can be a cloud storage location or a local location. For example: ```mermaid flowchart LR @@ -66,10 +66,10 @@ flowchart LR ``` - This flow always happens for the Unstructured Platform. The Platform only allows sending files from cloud storage and sending processed data to cloud storage. -- For the Unstructured CLI or Unstructured Ingest Python, to use this flow: +- For the Unstructured Ingest CLI or the Unstructured Ingest Python library, to use this flow: - - When using the Unstructured CLI, include the `--partition-by-api` option and set `--api-key` and `--partition-endpoint` to a valid, matching Unstructured API key and API URL, respectively. - - When using Unstructured Ingest Python, set `partition_by_api=True` and `api_key` and set `partition_endpoint` to a valid, matching Unstructured API key and API URL, respectively. + - When using the Unstructured Ingest CLI, include the `--partition-by-api` option and set `--api-key` and `--partition-endpoint` to a valid, matching Unstructured API key and API URL, respectively. + - When using the Unstructured Ingest Python library, set `partition_by_api=True` and `api_key` and set `partition_endpoint` to a valid, matching Unstructured API key and API URL, respectively. ## Local ingestion options @@ -81,10 +81,10 @@ flowchart LR ``` - This flow never happens for the Unstructured Platform. The Platform does not allow sending files from a local destination to Unstructured or Unstructured sending processed data to a local destination. -- For the Unstructured CLI or Unstructured Ingest Python, to use this flow: +- For the Unstructured Ingest CLI or the Unstructured Ingest Python library, to use this flow: - - When using the Unstructured CLI, omit the `--partition-by-api`, `--api-key`, and `--partition-endpoint` options. - - When using the Unstructured Ingest Python, omit `partition_by_api` or explicitly set `parition_by_api=False`. Also omit `api_key` and `partition_endpoint`. + - When using the Unstructured Ingest CLI, omit the `--partition-by-api`, `--api-key`, and `--partition-endpoint` options. + - When using the Unstructured Ingest Python library, omit `partition_by_api` or explicitly set `parition_by_api=False`. Also omit `api_key` and `partition_endpoint`. ## Unstructured Ingest CLI @@ -133,11 +133,11 @@ To begin using the CLI, see the quickstarts for the: - [Unstructured Serverless API](/api-reference/api-services/saas-api-development-guide#unstructured-cli) - [Free Unstructured API](/api-reference/api-services/free-api#unstructured-cli) -## Unstructured Ingest Python +## Unstructured Ingest Python library -The Unstructured Ingest Python library and connectors enable you to use Python code to get all of your data ready for RAG and model fine-tuning. +The Unstructured Ingest Python library enable you to use Python code to get all of your data ready for RAG and model fine-tuning. -One approach to using Unstructured Ingest Python is installing Python and then running the following command to install the library and the default connectors: +One approach to using the Unstructured Ingest Python library is installing Python and then running the following command to install the library and the default connectors: ```bash pip install unstructured @@ -159,4 +159,4 @@ Some source and destination connectors provide newer v2 and older v1 implementat - [v1 fsspec connectors](https://github.com/Unstructured-IO/unstructured/tree/main/unstructured/ingest/connector/fsspec) - [v1 Notion connector](https://github.com/Unstructured-IO/unstructured/tree/main/unstructured/ingest/connector/notion) -To begin using Unstructured Ingest Python, see the code examples for the [source](/api-reference/ingest/source-connectors/overview) and [destination](/api-reference/ingest/destination-connector/overview) connectors. \ No newline at end of file +To begin using the Unstructured Ingest Python library, see the code examples for the [source](/api-reference/ingest/source-connectors/overview) and [destination](/api-reference/ingest/destination-connector/overview) connectors. \ No newline at end of file diff --git a/open-source/ingest/destination-connectors/azure.mdx b/open-source/ingest/destination-connectors/azure.mdx index 76cb4e7c..39ad196b 100644 --- a/open-source/ingest/destination-connectors/azure.mdx +++ b/open-source/ingest/destination-connectors/azure.mdx @@ -10,7 +10,7 @@ import SharedAzure from '/snippets/dc-shared-text/azure.mdx'; -Now call the Unstructured CLI or Python. The source connector can be any of the ones supported. This example uses the local source connector: +Now call the Unstructured Ingest CLI or Unstructured Ingest Python. The source connector can be any of the ones supported. This example uses the local source connector: import AzureAPISh from '/snippets/destination_connectors/azure.sh.mdx'; import AzureAPIPyV2 from '/snippets/destination_connectors/azure.v2.py.mdx'; diff --git a/open-source/ingest/destination-connectors/local.mdx b/open-source/ingest/destination-connectors/local.mdx index 89f9d879..acc97883 100644 --- a/open-source/ingest/destination-connectors/local.mdx +++ b/open-source/ingest/destination-connectors/local.mdx @@ -10,7 +10,7 @@ import SharedContentLocal from '/snippets/dc-shared-text/local.mdx'; -Now call the Unstructured CLI or Python SDK. The source connector can be any of the ones supported. This example uses the local source connector: +Now call the Unstructured Ingest CLI or the Unstructured Ingest Python library. The source connector can be any of the ones supported. This example uses the local source connector: import LocalSh from '/snippets/source_connectors/local.sh.mdx'; import LocalPyV2 from '/snippets/source_connectors/local.v2.py.mdx'; diff --git a/open-source/ingest/destination-connectors/s3.mdx b/open-source/ingest/destination-connectors/s3.mdx index 71d1abc3..60448c26 100644 --- a/open-source/ingest/destination-connectors/s3.mdx +++ b/open-source/ingest/destination-connectors/s3.mdx @@ -10,7 +10,7 @@ import SharedS3 from '/snippets/dc-shared-text/s3.mdx'; -Now call the Unstructured CLI or Python. The source connector can be any of the ones supported. This example uses the local source connector: +Now call the Unstructured Ingest CLI or the Unstructured Ingest Python library. The source connector can be any of the ones supported. This example uses the local source connector: import S3APISh from '/snippets/destination_connectors/s3.sh.mdx'; import S3APIPyV2 from '/snippets/destination_connectors/s3.v2.py.mdx'; diff --git a/open-source/ingest/destination-connectors/singlestore.mdx b/open-source/ingest/destination-connectors/singlestore.mdx index fd611965..9009368f 100644 --- a/open-source/ingest/destination-connectors/singlestore.mdx +++ b/open-source/ingest/destination-connectors/singlestore.mdx @@ -10,7 +10,7 @@ import SharedSingleStore from '/snippets/dc-shared-text/singlestore.mdx'; -Now call the Unstructured CLI or Python. The source connector can be any of the ones supported. This example uses the local source connector: +Now call the Unstructured Ingest CLI or the Unstructured Ingest Python library. The source connector can be any of the ones supported. This example uses the local source connector: import SingleStoreAPISh from '/snippets/destination_connectors/singlestore.sh.mdx'; import SingleStoreAPIPyV2 from '/snippets/destination_connectors/singlestore.v2.py.mdx'; diff --git a/open-source/ingest/overview.mdx b/open-source/ingest/overview.mdx index 6ad1920a..03e4f65c 100644 --- a/open-source/ingest/overview.mdx +++ b/open-source/ingest/overview.mdx @@ -1,7 +1,7 @@ --- title: Ingest sidebarTitle: Overview -description: The Ingest Library is a powerful tool designed to coordinate the process of pulling data from data providers, partitioning the content, and pushing that new content to a desired location. This technical documentation will provide an in-depth understanding of the Ingest Library, including its features, architecture, installation, configuration, usage, API reference, troubleshooting, examples, and more. +description: The Unstructured Python Ingest library is a powerful tool designed to coordinate the process of pulling data from data providers, partitioning the content, and pushing that new content to a desired location. This technical documentation will provide an in-depth understanding of the Python Ingest library, including its features, architecture, installation, configuration, usage, API reference, troubleshooting, examples, and more. --- ## Library Documentation @@ -20,7 +20,7 @@ Each configuration used when generating an ingest process. ## Features -The Ingest Library/CLI offers the following key features: +The Unstructured Ingest CLI and Unstructured Ingest Python offer the following key features: * Data Ingestion: Facilitates the ingestion of data from various sources, such as databases, APIs, files, or streaming services. @@ -37,7 +37,7 @@ The Ingest Library/CLI offers the following key features: ## Architecture -The Ingest Library follows a modular architecture comprising the following components: +The Unstructured Python Ingest library follows a modular architecture comprising the following components: * Source Connectors: These components are responsible for fetching data from external sources, which can include databases, web services, file systems, or data streams. @@ -54,15 +54,15 @@ The Ingest Library follows a modular architecture comprising the following compo ## Installation -To install the Ingest Library, follow these steps: +To install the Unstructured Python Ingest library, follow these steps: 1. Run `pip install unstructured` to install the latest version of the unstructured library which include the ingest code and the cli. -2. For specific connectors, run `pip install unstructured[CONNECTOR_DEPS]` where `CONNECTOR_DEPS` references the extra dependency label for a particular connector. For example, `pip install unstructured[s3]` will install the dependencies to interact with the s3 connectors. If these aren’t installed before hand, a convenient error message will be printed for you when you run the ingest CLI for the first time, prompting you with the correct pip command to run. +2. For specific connectors, run `pip install unstructured[CONNECTOR_DEPS]` where `CONNECTOR_DEPS` references the extra dependency label for a particular connector. For example, `pip install unstructured[s3]` will install the dependencies to interact with the s3 connectors. If these aren’t installed before hand, a convenient error message will be printed for you when you run the Unstructured Ingest CLI for the first time, prompting you with the correct pip command to run. 3. Once installed, you can run `unstructured-ingest --help` to get all the available commands. ## Configuration -The Ingest Library requires configuration to define data sources, ingestion processes, and destination targets. For the CLI, configuration is done through the various cli parameters supported. When the library is run in python, those parameters that are exposed in the CLI map to python config classes, which are described in more detail in the configs section. \ No newline at end of file +The Unstructured Python Ingest library requires configuration to define data sources, ingestion processes, and destination targets. For the CLI, configuration is done through the various cli parameters supported. When the library is run in python, those parameters that are exposed in the CLI map to python config classes, which are described in more detail in the configs section. \ No newline at end of file diff --git a/open-source/ingest/source-connectors/airtable.mdx b/open-source/ingest/source-connectors/airtable.mdx index 8369e24d..6cd92cc2 100644 --- a/open-source/ingest/source-connectors/airtable.mdx +++ b/open-source/ingest/source-connectors/airtable.mdx @@ -19,4 +19,4 @@ import AirtablePy from '/snippets/source_connectors/airtable.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest airtable --help`. +For a full list of the options that the Unstructured Ingest CLI accepts check `unstructured-ingest airtable --help`. diff --git a/open-source/ingest/source-connectors/azure.mdx b/open-source/ingest/source-connectors/azure.mdx index 66abc17b..5b391226 100644 --- a/open-source/ingest/source-connectors/azure.mdx +++ b/open-source/ingest/source-connectors/azure.mdx @@ -10,7 +10,7 @@ import SharedContentAzure from '/snippets/sc-shared-text/azure.mdx'; -Now call the Unstructured CLI or Python. The destination connector can be any of the ones supported. This example uses the local destination connector: +Now call the Unstructured Ingest CLI or the Unstructured Ingest Python library. The destination connector can be any of the ones supported. This example uses the local destination connector: import AzureSh from '/snippets/source_connectors/azure.sh.mdx'; import AzurePyV2 from '/snippets/source_connectors/azure.v2.py.mdx'; diff --git a/open-source/ingest/source-connectors/biomed.mdx b/open-source/ingest/source-connectors/biomed.mdx index 093f44ce..ee0753f1 100644 --- a/open-source/ingest/source-connectors/biomed.mdx +++ b/open-source/ingest/source-connectors/biomed.mdx @@ -17,4 +17,4 @@ import BiomedPy from '/snippets/source_connectors/biomed.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest biomed --help`. +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest biomed --help`. diff --git a/open-source/ingest/source-connectors/box.mdx b/open-source/ingest/source-connectors/box.mdx index b511dcca..016820a9 100644 --- a/open-source/ingest/source-connectors/box.mdx +++ b/open-source/ingest/source-connectors/box.mdx @@ -18,4 +18,4 @@ import BoxPy from '/snippets/source_connectors/box.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest box --help`. +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest box --help`. diff --git a/open-source/ingest/source-connectors/confluence.mdx b/open-source/ingest/source-connectors/confluence.mdx index 73c8d834..7031439b 100644 --- a/open-source/ingest/source-connectors/confluence.mdx +++ b/open-source/ingest/source-connectors/confluence.mdx @@ -17,4 +17,4 @@ import ConfluencePy from '/snippets/source_connectors/confluence.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest confluence --help`. \ No newline at end of file +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest confluence --help`. \ No newline at end of file diff --git a/open-source/ingest/source-connectors/delta-table.mdx b/open-source/ingest/source-connectors/delta-table.mdx index c0cbde03..7757681e 100644 --- a/open-source/ingest/source-connectors/delta-table.mdx +++ b/open-source/ingest/source-connectors/delta-table.mdx @@ -17,4 +17,4 @@ import DeltaTablePy from '/snippets/source_connectors/delta_table.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest delta-table --help`. +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest delta-table --help`. diff --git a/open-source/ingest/source-connectors/discord.mdx b/open-source/ingest/source-connectors/discord.mdx index cecd435b..6570c1d5 100644 --- a/open-source/ingest/source-connectors/discord.mdx +++ b/open-source/ingest/source-connectors/discord.mdx @@ -17,4 +17,4 @@ import DiscordPy from '/snippets/source_connectors/discord.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest discord --help`. \ No newline at end of file +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest discord --help`. \ No newline at end of file diff --git a/open-source/ingest/source-connectors/dropbox.mdx b/open-source/ingest/source-connectors/dropbox.mdx index 2415bad6..cea4f481 100644 --- a/open-source/ingest/source-connectors/dropbox.mdx +++ b/open-source/ingest/source-connectors/dropbox.mdx @@ -18,4 +18,4 @@ import DropboxPy from '/snippets/source_connectors/dropbox.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest dropbox --help`. \ No newline at end of file +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest dropbox --help`. \ No newline at end of file diff --git a/open-source/ingest/source-connectors/elastic-search.mdx b/open-source/ingest/source-connectors/elastic-search.mdx index c53b26e1..eccf6344 100644 --- a/open-source/ingest/source-connectors/elastic-search.mdx +++ b/open-source/ingest/source-connectors/elastic-search.mdx @@ -17,4 +17,4 @@ import ElasticsearchPy from '/snippets/source_connectors/elasticsearch.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest elasticsearch --help`. \ No newline at end of file +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest elasticsearch --help`. \ No newline at end of file diff --git a/open-source/ingest/source-connectors/github.mdx b/open-source/ingest/source-connectors/github.mdx index 6c812b84..d725db89 100644 --- a/open-source/ingest/source-connectors/github.mdx +++ b/open-source/ingest/source-connectors/github.mdx @@ -17,4 +17,4 @@ import GitHubPy from '/snippets/source_connectors/github.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest github --help`. +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest github --help`. diff --git a/open-source/ingest/source-connectors/gitlab.mdx b/open-source/ingest/source-connectors/gitlab.mdx index fa3888fd..b762649c 100644 --- a/open-source/ingest/source-connectors/gitlab.mdx +++ b/open-source/ingest/source-connectors/gitlab.mdx @@ -17,4 +17,4 @@ import GitLabPy from '/snippets/source_connectors/gitlab.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest gitlab --help`. +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest gitlab --help`. diff --git a/open-source/ingest/source-connectors/google-cloud-storage.mdx b/open-source/ingest/source-connectors/google-cloud-storage.mdx index b15f81ef..35d11c42 100644 --- a/open-source/ingest/source-connectors/google-cloud-storage.mdx +++ b/open-source/ingest/source-connectors/google-cloud-storage.mdx @@ -17,4 +17,4 @@ import GCSPy from '/snippets/source_connectors/gcs.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest gcs --help`. +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest gcs --help`. diff --git a/open-source/ingest/source-connectors/jira.mdx b/open-source/ingest/source-connectors/jira.mdx index a5fcb3ee..7acfd5bc 100644 --- a/open-source/ingest/source-connectors/jira.mdx +++ b/open-source/ingest/source-connectors/jira.mdx @@ -17,4 +17,4 @@ import JiraPy from '/snippets/source_connectors/jira.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest jira --help`. \ No newline at end of file +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest jira --help`. \ No newline at end of file diff --git a/open-source/ingest/source-connectors/local.mdx b/open-source/ingest/source-connectors/local.mdx index 2839d320..6b406d0d 100644 --- a/open-source/ingest/source-connectors/local.mdx +++ b/open-source/ingest/source-connectors/local.mdx @@ -10,7 +10,7 @@ import SharedContentLocal from '/snippets/sc-shared-text/local.mdx'; -Now call the Unstructured CLI or Python. The destination connector can be any of the ones supported. This example uses the local destination connector: +Now call the Unstructured Ingest CLI or the Unstructured Ingest Python library. The destination connector can be any of the ones supported. This example uses the local destination connector: import LocalSh from '/snippets/source_connectors/local.sh.mdx'; import LocalPyV2 from '/snippets/source_connectors/local.v2.py.mdx'; diff --git a/open-source/ingest/source-connectors/mongodb.mdx b/open-source/ingest/source-connectors/mongodb.mdx index 56d71fc8..a695f226 100644 --- a/open-source/ingest/source-connectors/mongodb.mdx +++ b/open-source/ingest/source-connectors/mongodb.mdx @@ -23,4 +23,4 @@ import MongoDBPy from '/snippets/source_connectors/mongodb.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest mongodb --help`. +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest mongodb --help`. diff --git a/open-source/ingest/source-connectors/notion.mdx b/open-source/ingest/source-connectors/notion.mdx index 3b65809f..a2af0e11 100644 --- a/open-source/ingest/source-connectors/notion.mdx +++ b/open-source/ingest/source-connectors/notion.mdx @@ -17,4 +17,4 @@ import NotionPy from '/snippets/source_connectors/notion.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest notion --help`. \ No newline at end of file +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest notion --help`. \ No newline at end of file diff --git a/open-source/ingest/source-connectors/one-drive.mdx b/open-source/ingest/source-connectors/one-drive.mdx index aa606ff2..449e3371 100644 --- a/open-source/ingest/source-connectors/one-drive.mdx +++ b/open-source/ingest/source-connectors/one-drive.mdx @@ -17,4 +17,4 @@ import OneDrivePy from '/snippets/source_connectors/onedrive.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest onedrive --help`. +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest onedrive --help`. diff --git a/open-source/ingest/source-connectors/opensearch.mdx b/open-source/ingest/source-connectors/opensearch.mdx index dc889100..0dcf8be7 100644 --- a/open-source/ingest/source-connectors/opensearch.mdx +++ b/open-source/ingest/source-connectors/opensearch.mdx @@ -17,4 +17,4 @@ import OpenSearchPy from '/snippets/source_connectors/opensearch.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest opensearch --help`. +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest opensearch --help`. diff --git a/open-source/ingest/source-connectors/outlook.mdx b/open-source/ingest/source-connectors/outlook.mdx index 4328b824..ff88345f 100644 --- a/open-source/ingest/source-connectors/outlook.mdx +++ b/open-source/ingest/source-connectors/outlook.mdx @@ -17,4 +17,4 @@ import OutlookPy from '/snippets/source_connectors/outlook.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest outlook --help`. +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest outlook --help`. diff --git a/open-source/ingest/source-connectors/reddit.mdx b/open-source/ingest/source-connectors/reddit.mdx index 008995bf..fb620a9e 100644 --- a/open-source/ingest/source-connectors/reddit.mdx +++ b/open-source/ingest/source-connectors/reddit.mdx @@ -17,4 +17,4 @@ import RedditPy from '/snippets/source_connectors/reddit.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest reddit --help`. +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest reddit --help`. diff --git a/open-source/ingest/source-connectors/s3.mdx b/open-source/ingest/source-connectors/s3.mdx index 7717c4e7..12bafd2d 100644 --- a/open-source/ingest/source-connectors/s3.mdx +++ b/open-source/ingest/source-connectors/s3.mdx @@ -10,7 +10,7 @@ import SharedContentS3 from '/snippets/sc-shared-text/s3.mdx'; -Now call the Unstructured CLI or Python. The destination connector can be any of the ones supported. This example uses the local destination connector: +Now call the Unstructured Ingest CLI or the Unstructured Ingest Python library. The destination connector can be any of the ones supported. This example uses the local destination connector: import S3Sh from '/snippets/source_connectors/s3.sh.mdx'; import S3PyV2 from '/snippets/source_connectors/s3.v2.py.mdx'; diff --git a/open-source/ingest/source-connectors/salesforce.mdx b/open-source/ingest/source-connectors/salesforce.mdx index ccd68d9e..cfd555d9 100644 --- a/open-source/ingest/source-connectors/salesforce.mdx +++ b/open-source/ingest/source-connectors/salesforce.mdx @@ -17,4 +17,4 @@ import SalesforcePy from '/snippets/source_connectors/salesforce.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest salesforce --help`. +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest salesforce --help`. diff --git a/open-source/ingest/source-connectors/sftp.mdx b/open-source/ingest/source-connectors/sftp.mdx index 5799e14b..9d7db181 100644 --- a/open-source/ingest/source-connectors/sftp.mdx +++ b/open-source/ingest/source-connectors/sftp.mdx @@ -17,4 +17,4 @@ import SFTPPy from '/snippets/source_connectors/sftp.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest sftp --help`. \ No newline at end of file +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest sftp --help`. \ No newline at end of file diff --git a/open-source/ingest/source-connectors/sharepoint.mdx b/open-source/ingest/source-connectors/sharepoint.mdx index 39796c3c..0c92c3c8 100644 --- a/open-source/ingest/source-connectors/sharepoint.mdx +++ b/open-source/ingest/source-connectors/sharepoint.mdx @@ -17,4 +17,4 @@ import SharepointPy from '/snippets/source_connectors/sharepoint.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest sharepoint --help`. \ No newline at end of file +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest sharepoint --help`. \ No newline at end of file diff --git a/open-source/ingest/source-connectors/slack.mdx b/open-source/ingest/source-connectors/slack.mdx index 4acca93e..48d786d6 100644 --- a/open-source/ingest/source-connectors/slack.mdx +++ b/open-source/ingest/source-connectors/slack.mdx @@ -17,4 +17,4 @@ import SlackPy from '/snippets/source_connectors/slack.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest slack --help`. \ No newline at end of file +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest slack --help`. \ No newline at end of file diff --git a/open-source/ingest/source-connectors/wikipedia.mdx b/open-source/ingest/source-connectors/wikipedia.mdx index be15be1b..1a40c966 100644 --- a/open-source/ingest/source-connectors/wikipedia.mdx +++ b/open-source/ingest/source-connectors/wikipedia.mdx @@ -17,4 +17,4 @@ import WikipediaPy from '/snippets/source_connectors/wikipedia.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest wikipedia --help`. \ No newline at end of file +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest wikipedia --help`. \ No newline at end of file diff --git a/snippets/dc-shared-text/azure-cognitive-search.mdx b/snippets/dc-shared-text/azure-cognitive-search.mdx index 8f128932..d0a90a3d 100644 --- a/snippets/dc-shared-text/azure-cognitive-search.mdx +++ b/snippets/dc-shared-text/azure-cognitive-search.mdx @@ -20,7 +20,7 @@ import AzureCognitiveSearchPy from '/snippets/destination_connectors/azure_cogni -For a full list of the options the CLI accepts check `unstructured-ingest azure-cognitive-search --help`. +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest azure-cognitive-search --help`. NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). diff --git a/snippets/dc-shared-text/box.mdx b/snippets/dc-shared-text/box.mdx index 294ccfa6..28f48e0f 100644 --- a/snippets/dc-shared-text/box.mdx +++ b/snippets/dc-shared-text/box.mdx @@ -20,6 +20,6 @@ import BoxPy from '/snippets/destination_connectors/box.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest box --help`. +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest box --help`. NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/snippets/dc-shared-text/chroma.mdx b/snippets/dc-shared-text/chroma.mdx index 5a0975ac..527f03ae 100644 --- a/snippets/dc-shared-text/chroma.mdx +++ b/snippets/dc-shared-text/chroma.mdx @@ -20,6 +20,6 @@ import ChromaPy from '/snippets/destination_connectors/chroma.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest chroma --help`. +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest chroma --help`. NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/snippets/dc-shared-text/clarifai.mdx b/snippets/dc-shared-text/clarifai.mdx index 8b6a7cdd..3ed3c41f 100644 --- a/snippets/dc-shared-text/clarifai.mdx +++ b/snippets/dc-shared-text/clarifai.mdx @@ -22,6 +22,6 @@ import ClarifaiPy from '/snippets/destination_connectors/clarifai.py.mdx'; The upstream connector can be any of the ones supported, but for the convenience here, showing a sample command using the upstream local connector. -For a full list of the options the CLI accepts check `unstructured-ingest clarifai --help`. +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest clarifai --help`. NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/snippets/dc-shared-text/databricks-volumes.mdx b/snippets/dc-shared-text/databricks-volumes.mdx index 4803ceff..60729d6d 100644 --- a/snippets/dc-shared-text/databricks-volumes.mdx +++ b/snippets/dc-shared-text/databricks-volumes.mdx @@ -20,6 +20,6 @@ import DatabricksVolumesPy from '/snippets/destination_connectors/databricks_vol -For a full list of the options the CLI accepts check `unstructured-ingest databricks-volumes --help`. +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest databricks-volumes --help`. NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/snippets/dc-shared-text/delta-table.mdx b/snippets/dc-shared-text/delta-table.mdx index 8e403b32..7e386001 100644 --- a/snippets/dc-shared-text/delta-table.mdx +++ b/snippets/dc-shared-text/delta-table.mdx @@ -20,6 +20,6 @@ import DeltaTablePy from '/snippets/destination_connectors/delta_table.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest delta-table --help`. +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest delta-table --help`. NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/snippets/dc-shared-text/dropbox.mdx b/snippets/dc-shared-text/dropbox.mdx index fd835846..d10be6fb 100644 --- a/snippets/dc-shared-text/dropbox.mdx +++ b/snippets/dc-shared-text/dropbox.mdx @@ -20,6 +20,6 @@ import DropboxPy from '/snippets/destination_connectors/dropbox.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest dropbox --help`. +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest dropbox --help`. NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/snippets/dc-shared-text/elasticsearch.mdx b/snippets/dc-shared-text/elasticsearch.mdx index c1a26028..31b46d85 100644 --- a/snippets/dc-shared-text/elasticsearch.mdx +++ b/snippets/dc-shared-text/elasticsearch.mdx @@ -20,7 +20,7 @@ import ElasticsearchPy from '/snippets/destination_connectors/elasticsearch.py.m -For a full list of the options the CLI accepts check `unstructured-ingest elasticsearch --help`. +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest elasticsearch --help`. NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). diff --git a/snippets/dc-shared-text/google-cloud-service.mdx b/snippets/dc-shared-text/google-cloud-service.mdx index 29c35bba..cd4de1ab 100644 --- a/snippets/dc-shared-text/google-cloud-service.mdx +++ b/snippets/dc-shared-text/google-cloud-service.mdx @@ -20,6 +20,6 @@ import GCSPy from '/snippets/destination_connectors/gcs.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest gcs --help`. +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest gcs --help`. NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/snippets/dc-shared-text/mongodb.mdx b/snippets/dc-shared-text/mongodb.mdx index 8bf66139..ced6e495 100644 --- a/snippets/dc-shared-text/mongodb.mdx +++ b/snippets/dc-shared-text/mongodb.mdx @@ -20,6 +20,6 @@ import MongoDBPy from '/snippets/destination_connectors/mongodb.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest mongodb --help`. +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest mongodb --help`. NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/snippets/dc-shared-text/opensearch.mdx b/snippets/dc-shared-text/opensearch.mdx index c3caca7b..47878bc9 100644 --- a/snippets/dc-shared-text/opensearch.mdx +++ b/snippets/dc-shared-text/opensearch.mdx @@ -20,7 +20,7 @@ import OpensearchPy from '/snippets/destination_connectors/opensearch.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest opensearch --help`. +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest opensearch --help`. NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). diff --git a/snippets/dc-shared-text/pinecone.mdx b/snippets/dc-shared-text/pinecone.mdx index 964c3a46..3ab147de 100644 --- a/snippets/dc-shared-text/pinecone.mdx +++ b/snippets/dc-shared-text/pinecone.mdx @@ -19,6 +19,6 @@ import PineconePy from '/snippets/destination_connectors/pinecone.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest pinecone --help`. +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest pinecone --help`. NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/snippets/dc-shared-text/qdrant.mdx b/snippets/dc-shared-text/qdrant.mdx index 36193d45..73844902 100644 --- a/snippets/dc-shared-text/qdrant.mdx +++ b/snippets/dc-shared-text/qdrant.mdx @@ -22,6 +22,6 @@ import QdrantPy from '/snippets/destination_connectors/qdrant.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest qdrant --help`. +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest qdrant --help`. NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/snippets/dc-shared-text/sql.mdx b/snippets/dc-shared-text/sql.mdx index 3ed89390..82164aea 100644 --- a/snippets/dc-shared-text/sql.mdx +++ b/snippets/dc-shared-text/sql.mdx @@ -22,7 +22,7 @@ import SQLPy from '/snippets/destination_connectors/sql.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest sql --help`. +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest sql --help`. NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). diff --git a/snippets/dc-shared-text/vectara.mdx b/snippets/dc-shared-text/vectara.mdx index aa06a6ce..89274a54 100644 --- a/snippets/dc-shared-text/vectara.mdx +++ b/snippets/dc-shared-text/vectara.mdx @@ -14,6 +14,6 @@ import VectaraPy from '/snippets/destination_connectors/vectara.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest vectara --help`. +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest vectara --help`. NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/snippets/dc-shared-text/weaviate.mdx b/snippets/dc-shared-text/weaviate.mdx index 98cb38e4..bbfc7279 100644 --- a/snippets/dc-shared-text/weaviate.mdx +++ b/snippets/dc-shared-text/weaviate.mdx @@ -20,7 +20,7 @@ import WeaviatePy from '/snippets/destination_connectors/weaviate.py.mdx'; -For a full list of the options the CLI accepts check `unstructured-ingest weaviate --help`. +For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest weaviate --help`. NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). diff --git a/snippets/general-shared-text/connector-availability-cli-sdk.mdx b/snippets/general-shared-text/connector-availability-cli-sdk.mdx index f8cffc1b..870bf1f3 100644 --- a/snippets/general-shared-text/connector-availability-cli-sdk.mdx +++ b/snippets/general-shared-text/connector-availability-cli-sdk.mdx @@ -1 +1 @@ -All connectors can be used by the Unstructured CLI and Unstructured Python SDK. The Unstructured JavaScript/TypeScript SDK supports only local sources/destinations on the same machine. \ No newline at end of file +All connectors can be used by the Unstructured Ingest CLI and the Unstructured Ingest Python library. The Unstructured JavaScript/TypeScript SDK supports only local sources/destinations on the same machine. \ No newline at end of file diff --git a/snippets/general-shared-text/multi-file-api-use-connectors.mdx b/snippets/general-shared-text/multi-file-api-use-connectors.mdx index 19d9cc91..ff341df5 100644 --- a/snippets/general-shared-text/multi-file-api-use-connectors.mdx +++ b/snippets/general-shared-text/multi-file-api-use-connectors.mdx @@ -1 +1 @@ -The `partition` method in the Python and JavaScript/TypeScript SDKs process single files only. To process multiple files at a time, use the Unstructured CLI or Unstructured Python SDK with their provided [source connectors](/api-reference/ingest/source-connectors/overview) and [destination connectors](/api-reference/ingest/destination-connector/overview). \ No newline at end of file +The `partition` method in the Python and JavaScript/TypeScript SDKs process single files only. To process multiple files at a time, use the [Unstructured Ingest CLI](/ingestion/overview#unstructured-ingest-cli) or the [Unstructured Ingest Python library](/ingestion/overview#unstructured-ingest-python-library) with their provided [source connectors](/api-reference/ingest/source-connectors/overview) and [destination connectors](/api-reference/ingest/destination-connector/overview). \ No newline at end of file diff --git a/snippets/general-shared-text/multi-file-oss-use-connectors.mdx b/snippets/general-shared-text/multi-file-oss-use-connectors.mdx index 59032df9..a86940c1 100644 --- a/snippets/general-shared-text/multi-file-oss-use-connectors.mdx +++ b/snippets/general-shared-text/multi-file-oss-use-connectors.mdx @@ -1 +1 @@ -The `partition` methods and functions in the open source library process single files only. To process multiple files at a time, use the Unstructured CLI or the open source library with their provided [source connectors](/open-source/ingest/source-connectors/overview) and [destination connectors](/open-source/ingest/destination-connectors/overview). \ No newline at end of file +The `partition` methods and functions in the open source library process single files only. To process multiple files at a time, use the [Unstructured Ingest CLI](/ingestion/overview#unstructured-ingest-cli) or the [Unstructured Ingest Python library](/ingestion/overview#unstructured-ingest-python-library) with their provided [source connectors](/open-source/ingest/source-connectors/overview) and [destination connectors](/open-source/ingest/destination-connectors/overview). \ No newline at end of file diff --git a/snippets/general-shared-text/multi-file-partition-via-api.mdx b/snippets/general-shared-text/multi-file-partition-via-api.mdx index d2dc4e1d..63c4da2a 100644 --- a/snippets/general-shared-text/multi-file-partition-via-api.mdx +++ b/snippets/general-shared-text/multi-file-partition-via-api.mdx @@ -1 +1 @@ -The `partition_via_api` function in the open source library processes single files only. To process multiple files at a time, use the Unstructured CLI or the open source library with their provided [source connectors](/open-source/ingest/source-connectors/overview) and [destination connectors](/open-source/ingest/destination-connectors/overview). \ No newline at end of file +The `partition_via_api` function in the open source library processes single files only. To process multiple files at a time, use the [Unstructured Ingest CLI](/ingestion/overview#unstructured-ingest-cli) or the [Unstructured Ingest Python library](/ingestion/overview#unstructured-ingest-python-library) with their provided [source connectors](/open-source/ingest/source-connectors/overview) and [destination connectors](/open-source/ingest/destination-connectors/overview). \ No newline at end of file diff --git a/snippets/general-shared-text/post-api-single-file.mdx b/snippets/general-shared-text/post-api-single-file.mdx index e78851f4..51bda074 100644 --- a/snippets/general-shared-text/post-api-single-file.mdx +++ b/snippets/general-shared-text/post-api-single-file.mdx @@ -1 +1 @@ -Unstructured does not recommend `POST` to process multiple files at a time. Instead, use the Unstructured CLI or the Unstructured Python SDK with their provided [source connectors](/api-reference/ingest/source-connectors/overview) and [destination connectors](/api-reference/ingest/destination-connector/overview). \ No newline at end of file +Unstructured does not recommend `POST` to process multiple files at a time. Instead, use the Unstructured Ingest CLI or Unstructured Ingest Python with their provided [source connectors](/api-reference/ingest/source-connectors/overview) and [destination connectors](/api-reference/ingest/destination-connector/overview). \ No newline at end of file diff --git a/snippets/ingest-configuration-shared/partition-by-api-oss.mdx b/snippets/ingest-configuration-shared/partition-by-api-oss.mdx index daf18b54..949c62d1 100644 --- a/snippets/ingest-configuration-shared/partition-by-api-oss.mdx +++ b/snippets/ingest-configuration-shared/partition-by-api-oss.mdx @@ -1,6 +1,4 @@ -For the Unstructured open source library, ingestion works mostly the same as [Unstructured API services](/api-reference/api-services/overview). - -One of the key differences is with the open source library, you can use the `--partition-by-api` option (CLI) or `parition_by_api` (Python) parameter to specify where files are processed: +For the Unstructured Ingest CLI and the Unstructured Ingest Python library, you can use the `--partition-by-api` option (CLI) or `parition_by_api` (Python) parameter to specify where files are processed: - To do local file processing, omit `--partition-by-api` (CLI) or `partition_by_api` (Python), or explicitly specify `partition_by_api=False` (Python). @@ -10,7 +8,7 @@ One of the key differences is with the open source library, you can use the `--p - `--partition-endpoint $UNSTRUCTURED_API_URL` (CLI) or `partition_endpoint=os.getenv["UNSTRUCTURED_API_URL"]` (Python) - The environment variables `UNSTRUCTURED_API_KEY` and `UNSTRUCTURED_API_URL` -- To send files to Unstructured API services for processing, specify `--partition-by-api` (CLI) or `partition_by_api=True` (Python). +- To send files to [Unstructured API services](/api-reference/api-services/overview) for processing, specify `--partition-by-api` (CLI) or `partition_by_api=True` (Python). Unstructured API services also requires an Unstructured API key and API URL, by adding the following: diff --git a/snippets/sc-shared-text/astradb.mdx b/snippets/sc-shared-text/astradb.mdx index f207a210..f31a17bd 100644 --- a/snippets/sc-shared-text/astradb.mdx +++ b/snippets/sc-shared-text/astradb.mdx @@ -1,4 +1,4 @@ -Connect Astra DB to your preprocessing pipeline, and use the Unstructured CLI or Python to batch process all your documents and store structured outputs locally on your filesystem. +Connect Astra DB to your preprocessing pipeline, and use the Unstructured Ingest CLI or the Unstructured Ingest Python library to batch process all your documents and store structured outputs locally on your filesystem. You will need: diff --git a/snippets/sc-shared-text/azure.mdx b/snippets/sc-shared-text/azure.mdx index 502f49d8..575ab55e 100644 --- a/snippets/sc-shared-text/azure.mdx +++ b/snippets/sc-shared-text/azure.mdx @@ -1,4 +1,4 @@ -Connect Azure Storage to your preprocessing pipeline, and use the Unstructured CLI or Python to batch process all your documents and store structured outputs locally on your filesystem. +Connect Azure Storage to your preprocessing pipeline, and use the Unstructured Ingest CLI or the Unstructured Ingest Python library to batch process all your documents and store structured outputs locally on your filesystem. You will need: diff --git a/snippets/sc-shared-text/google-drive.mdx b/snippets/sc-shared-text/google-drive.mdx index 39e1d094..eb146461 100644 --- a/snippets/sc-shared-text/google-drive.mdx +++ b/snippets/sc-shared-text/google-drive.mdx @@ -1,4 +1,4 @@ -Connect Google Drive to your preprocessing pipeline, and use the Unstructured CLI or Python to batch process all your documents and store structured outputs locally on your filesystem. +Connect Google Drive to your preprocessing pipeline, and use the Unstructured Ingest CLI or the Unstructured Ingest Python library to batch process all your documents and store structured outputs locally on your filesystem. You will need: diff --git a/snippets/sc-shared-text/local.mdx b/snippets/sc-shared-text/local.mdx index 59bbbe56..0389ea1c 100644 --- a/snippets/sc-shared-text/local.mdx +++ b/snippets/sc-shared-text/local.mdx @@ -1,4 +1,4 @@ -Connect local files to your preprocessing pipeline, and use the Unstructured CLI or Python to batch process all your documents and store structured outputs locally on your filesystem. +Connect local files to your preprocessing pipeline, and use the Unstructured Ingest CLI or the Unstructured Ingest Python library to batch process all your documents and store structured outputs locally on your filesystem. You will need the local source connector dependencies: diff --git a/snippets/sc-shared-text/s3.mdx b/snippets/sc-shared-text/s3.mdx index bc619ce1..0f086127 100644 --- a/snippets/sc-shared-text/s3.mdx +++ b/snippets/sc-shared-text/s3.mdx @@ -1,4 +1,4 @@ -Connect S3 to your preprocessing pipeline, and use the Unstructured CLI or Python to batch process all your documents and store structured outputs locally on your filesystem. +Connect S3 to your preprocessing pipeline, and use the Unstructured Ingest CLI or the Unstructured Ingest Python library to batch process all your documents and store structured outputs locally on your filesystem. You will need: diff --git a/welcome.mdx b/welcome.mdx index 1e9b1fc2..b27f0296 100644 --- a/welcome.mdx +++ b/welcome.mdx @@ -22,7 +22,7 @@ Learn more about these products:   [Read the announcement](https://unstructured.io/blog/introducing-unstructured-platform). -
Use scripts or code to call the Unstructured CLI, SDKs, or REST API to get all of your data RAG-ready.

+
Use scripts or code to call the Unstructured Ingest CLI, SDKs, or REST API to get all of your data RAG-ready.

Unstructured Serverless API Services have a [Serverless](api-reference/api-services/saas-api-development-guide) pay-as-you-go edition and a [Free](/api-reference/api-services/free-api_) [limited](/api-reference/api-services/free-api#free-unstructured-api-limitations) edition that process data on Unstructured-hosted compute resources.

If you need to use compute resources that you host instead, there are also [Azure](/api-reference/api-services/azure) pay-as-you-go and [AWS](/api-reference/api-services/aws) pay-as-you-go editions; these editions process data by using the Unstructured API installed on compute resources hosted in your own Azure or AWS account.

[Try the quickstart](#quickstart-unstructured-api-service).

@@ -52,14 +52,14 @@ import SharedPlatform from '/snippets/quickstarts/platform.mdx'; ### Quickstart: Unstructured API service -This quickstart uses your local machine for the source (input) and destination (output) locations, and the [Free Unstructured API](/api-reference/api-services/free-api) edition. Data is processed on Unstructured-hosted compute resources. +This quickstart uses your local machine for the source (input) and destination (output) locations, and the [Unstructured Python SDK](/api-reference/api-services/sdk). Data is processed on Unstructured-hosted compute resources. The Free Unstructured API has [limits](/api-reference/api-services/free-api#free-unstructured-api-limitations). To remove these limits, sign up for the [Unstructured Serverless API](/api-reference/api-services/saas-api-development-guide). You will need: - Python installed on your local machine. -- Compatible files on your local machine to be processed. [See the list of supported file types](/api-reference/api-services/overview#supported-file-types). If you do not have any files available, you can download some from the [example-docs](https://github.com/Unstructured-IO/unstructured/tree/main/example-docs) folder in the Unstructured repo on GitHub. +- A compatible file on your local machine to be processed. [See the list of supported file types](/api-reference/api-services/overview#supported-file-types). If you do not have any files available, you can download some from the [example-docs](https://github.com/Unstructured-IO/unstructured/tree/main/example-docs) folder in the Unstructured repo on GitHub. @@ -73,29 +73,61 @@ You will need: 1. Set an environment variable named `UNSTRUCTURED_API_KEY` to the value of your Unstructured API key. 2. Set another environment variable named `UNSTRUCTURED_API_URL` to the Free Unstructured API URL, which is `https://api.unstructured.io/general/v0/general` - + Run the following command: ```bash - pip install "unstructured[all-docs]" + pip install unstructured-client ``` Run the following command, replacing: - - `` with the source (input) path on your local machine that contains the compatible files for Unstructured to process on its hosted compute resources. - - `` with the destination (output) path on your local machine that will contain the processed data that Unstructured returns from its hosted compute resources. - - ```bash - unstructured-ingest \ - local \ - --input-path \ - --output-dir \ - --partition-by-api \ - --api-key $UNSTRUCTURED_API_KEY \ - --partition-endpoint $UNSTRUCTURED_API_URL + - `` with the source (input) path on your local machine that contains the compatible file for Unstructured to process on its hosted compute resources. + - `` with the destination (output) path on your local machine that will contain the processed data that Unstructured returns from its hosted compute resources. + + ```python + import json, os + + from unstructured_client import UnstructuredClient + from unstructured_client.models import operations, shared + + input_filepath = "" + output_filepath = "" + + client = UnstructuredClient( + api_key_auth=os.getenv("UNSTRUCTURED_API_KEY"), + server_url=os.getenv("UNSTRUCTURED_API_URL"), + ) + + with open(input_filepath, "rb") as f: + files = shared.Files( + content=f.read(), + file_name=input_filepath + ) + + req = operations.PartitionRequest( + shared.PartitionParameters( + files=files, + strategy=shared.Strategy.AUTO + ) + ) + + try: + res = client.general.partition(request=req) + element_dicts = [element for element in res.elements] + json_elements = json.dumps(element_dicts, indent=2) + + # Print the processed data. + print(json_elements) + + # Write the processed data to a local file. + with open(output_filepath, "w") as file: + file.write(json_elements) + except Exception as e: + print(e) ``` - For speed, this quickstart uses the Unstructured CLI with the minimum number of required command options. You can also use the [Unstructured Python SDK](/api-reference/api-services/sdk-python), the [Unstructured JavaScript/TypeScript SDK](/api-reference/api-services/sdk-jsts), or make a direct [POST request](/api-reference/api-services/post-requests) to the Unstructured API services. + The Python SDK works with only a single file. To process multiple files at a time, use the [Unstructured Python Ingest CLI](/ingestion/overview#unstructured-ingest-cli) or the [Unstructured Ingest Python library](/ingestion/overview#unstructured-ingest-python-library) instead. Go to your destination location to view the processed data. @@ -115,7 +147,7 @@ This quickstart uses your local machine for the source (input) and destination ( You will need: - Python installed on your local machine. -- Compatible files on your local machine to be processed. [See the list of supported file types](/api-reference/api-services/overview#supported-file-types). If you do not have any files available, you can download some from the [example-docs](https://github.com/Unstructured-IO/unstructured/tree/main/example-docs) folder in the Unstructured repo on GitHub. +- A compatible file on your local machine to be processed. [See the list of supported file types](/api-reference/api-services/overview#supported-file-types). If you do not have any files available, you can download some from the [example-docs](https://github.com/Unstructured-IO/unstructured/tree/main/example-docs) folder in the Unstructured repo on GitHub. @@ -127,17 +159,33 @@ You will need: Run the following command, replacing: - - `` with the source (input) path on your local machine that contains the compatible files to process. - - `` with the destination (output) path on your local machine that will contain the processed data. + - `` with the source (input) path on your local machine that contains the compatible file to process. + - `` with the destination (output) path on your local machine that will contain the processed data. - ```bash - unstructured-ingest \ - local \ - --input-path \ - --output-dir + ```python + import json + + from unstructured.partition.auto import partition + + input_filepath = "" + output_filepath = "" + + elements = partition( + filename=input_filepath + ) + + element_dicts = [element.to_dict() for element in elements] + json_elements = json.dumps(element_dicts, indent=2) + + # Print the processed data. + print(json_elements) + + # Write the processed data to a local file. + with open(output_filepath, "w") as file: + file.write(json_elements) ``` - For speed, this quickstart uses the open source CLI with the minimum number of required command options. You can also use [Python](/open-source/introduction/quick-start) code. + To process multiple files at a time, use the [Unstructured Python Ingest CLI](/ingestion/overview#unstructured-ingest-cli) or the [Unstructured Ingest Python library](/ingestion/overview#unstructured-ingest-python-library) instead. Go to your destination location to view the processed data.