From a59282a3971947353489ab0f4864edf683ce1fc7 Mon Sep 17 00:00:00 2001 From: Maria Khalusova Date: Mon, 29 Apr 2024 14:38:34 -0400 Subject: [PATCH] Fixed links in the docs (#21) --- open-source/ingest/destination-connectors/astra.mdx | 2 +- .../azure-cognitive-search.mdx | 2 +- open-source/ingest/destination-connectors/azure.mdx | 2 +- open-source/ingest/destination-connectors/box.mdx | 2 +- open-source/ingest/destination-connectors/chroma.mdx | 2 +- .../ingest/destination-connectors/clarifai.mdx | 2 +- .../destination-connectors/databricks-volumes.mdx | 2 +- .../ingest/destination-connectors/delta-table.mdx | 2 +- .../ingest/destination-connectors/dropbox.mdx | 2 +- .../ingest/destination-connectors/elasticsearch.mdx | 2 +- .../destination-connectors/google-cloud-service.mdx | 2 +- .../ingest/destination-connectors/mongodb.mdx | 2 +- .../ingest/destination-connectors/opensearch.mdx | 2 +- .../ingest/destination-connectors/pinecone.mdx | 2 +- open-source/ingest/destination-connectors/qdrant.mdx | 2 +- open-source/ingest/destination-connectors/s3.mdx | 2 +- open-source/ingest/destination-connectors/sql.mdx | 2 +- .../ingest/destination-connectors/vectara.mdx | 2 +- .../ingest/destination-connectors/weaviate.mdx | 2 +- open-source/ingest/overview.mdx | 6 +++--- open-source/ingest/source-connectors/airtable.mdx | 2 +- open-source/ingest/source-connectors/azure.mdx | 2 +- open-source/ingest/source-connectors/biomed.mdx | 2 +- open-source/ingest/source-connectors/box.mdx | 2 +- open-source/ingest/source-connectors/confluence.mdx | 2 +- open-source/ingest/source-connectors/delta-table.mdx | 2 +- open-source/ingest/source-connectors/discord.mdx | 2 +- open-source/ingest/source-connectors/dropbox.mdx | 2 +- .../ingest/source-connectors/elastic-search.mdx | 2 +- open-source/ingest/source-connectors/github.mdx | 2 +- open-source/ingest/source-connectors/gitlab.mdx | 2 +- .../source-connectors/google-cloud-storage.mdx | 2 +- .../ingest/source-connectors/google-drive.mdx | 2 +- open-source/ingest/source-connectors/jira.mdx | 2 +- open-source/ingest/source-connectors/local.mdx | 2 +- open-source/ingest/source-connectors/mongodb.mdx | 2 +- open-source/ingest/source-connectors/notion.mdx | 2 +- open-source/ingest/source-connectors/one-drive.mdx | 2 +- open-source/ingest/source-connectors/opensearch.mdx | 2 +- open-source/ingest/source-connectors/outlook.mdx | 2 +- open-source/ingest/source-connectors/reddit.mdx | 2 +- open-source/ingest/source-connectors/s3.mdx | 2 +- open-source/ingest/source-connectors/salesforce.mdx | 2 +- open-source/ingest/source-connectors/sftp.mdx | 2 +- open-source/ingest/source-connectors/sharepoint.mdx | 2 +- open-source/ingest/source-connectors/slack.mdx | 2 +- open-source/ingest/source-connectors/wikipedia.mdx | 2 +- open-source/integrations.mdx | 10 +++++----- open-source/introduction/key-concepts.mdx | 6 +++--- open-source/introduction/overview.mdx | 12 ++++++------ open-source/introduction/quick-start.mdx | 4 ++-- 51 files changed, 65 insertions(+), 65 deletions(-) diff --git a/open-source/ingest/destination-connectors/astra.mdx b/open-source/ingest/destination-connectors/astra.mdx index cc9103a0..84635542 100644 --- a/open-source/ingest/destination-connectors/astra.mdx +++ b/open-source/ingest/destination-connectors/astra.mdx @@ -28,4 +28,4 @@ import AstraPy from '/snippets/destination_connectors/astra.py.mdx'; For a full list of the options the CLI accepts check `unstructured-ingest astra --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/destination-connectors/azure-cognitive-search.mdx b/open-source/ingest/destination-connectors/azure-cognitive-search.mdx index c10734c4..192e499d 100644 --- a/open-source/ingest/destination-connectors/azure-cognitive-search.mdx +++ b/open-source/ingest/destination-connectors/azure-cognitive-search.mdx @@ -29,7 +29,7 @@ import AzureCognitiveSearchPy from '/snippets/destination_connectors/azure_cogni For a full list of the options the CLI accepts check `unstructured-ingest azure-cognitive-search --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). ## Sample Index Schema diff --git a/open-source/ingest/destination-connectors/azure.mdx b/open-source/ingest/destination-connectors/azure.mdx index 093daeba..0a46f059 100644 --- a/open-source/ingest/destination-connectors/azure.mdx +++ b/open-source/ingest/destination-connectors/azure.mdx @@ -29,4 +29,4 @@ import AzurePy from '/snippets/destination_connectors/azure.py.mdx'; For a full list of the options the CLI accepts check `unstructured-ingest azure --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/destination-connectors/box.mdx b/open-source/ingest/destination-connectors/box.mdx index 078a0de0..047aee13 100644 --- a/open-source/ingest/destination-connectors/box.mdx +++ b/open-source/ingest/destination-connectors/box.mdx @@ -29,4 +29,4 @@ import BoxPy from '/snippets/destination_connectors/box.py.mdx'; For a full list of the options the CLI accepts check `unstructured-ingest box --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/destination-connectors/chroma.mdx b/open-source/ingest/destination-connectors/chroma.mdx index 661e96b9..b172f8ab 100644 --- a/open-source/ingest/destination-connectors/chroma.mdx +++ b/open-source/ingest/destination-connectors/chroma.mdx @@ -29,4 +29,4 @@ import ChromaPy from '/snippets/destination_connectors/chroma.py.mdx'; For a full list of the options the CLI accepts check `unstructured-ingest chroma --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/destination-connectors/clarifai.mdx b/open-source/ingest/destination-connectors/clarifai.mdx index be6e12bf..f2b37c04 100644 --- a/open-source/ingest/destination-connectors/clarifai.mdx +++ b/open-source/ingest/destination-connectors/clarifai.mdx @@ -31,4 +31,4 @@ The upstream connector can be any of the ones supported, but for the convenience For a full list of the options the CLI accepts check `unstructured-ingest clarifai --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/destination-connectors/databricks-volumes.mdx b/open-source/ingest/destination-connectors/databricks-volumes.mdx index 3eca6916..baded002 100644 --- a/open-source/ingest/destination-connectors/databricks-volumes.mdx +++ b/open-source/ingest/destination-connectors/databricks-volumes.mdx @@ -29,4 +29,4 @@ import DatabricksVolumesPy from '/snippets/destination_connectors/databricks_vol For a full list of the options the CLI accepts check `unstructured-ingest databricks-volumes --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/destination-connectors/delta-table.mdx b/open-source/ingest/destination-connectors/delta-table.mdx index 251f6c8e..ddf48d64 100644 --- a/open-source/ingest/destination-connectors/delta-table.mdx +++ b/open-source/ingest/destination-connectors/delta-table.mdx @@ -29,4 +29,4 @@ import DeltaTablePy from '/snippets/destination_connectors/delta_table.py.mdx'; For a full list of the options the CLI accepts check `unstructured-ingest delta-table --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/destination-connectors/dropbox.mdx b/open-source/ingest/destination-connectors/dropbox.mdx index 1e9cb9ec..849acbfd 100644 --- a/open-source/ingest/destination-connectors/dropbox.mdx +++ b/open-source/ingest/destination-connectors/dropbox.mdx @@ -29,4 +29,4 @@ import DropboxPy from '/snippets/destination_connectors/dropbox.py.mdx'; For a full list of the options the CLI accepts check `unstructured-ingest dropbox --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/destination-connectors/elasticsearch.mdx b/open-source/ingest/destination-connectors/elasticsearch.mdx index c06eec32..5b7b6469 100644 --- a/open-source/ingest/destination-connectors/elasticsearch.mdx +++ b/open-source/ingest/destination-connectors/elasticsearch.mdx @@ -28,7 +28,7 @@ import ElasticsearchPy from '/snippets/destination_connectors/elasticsearch.py.m For a full list of the options the CLI accepts check `unstructured-ingest elasticsearch --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). ## Vector Search Sample Mapping diff --git a/open-source/ingest/destination-connectors/google-cloud-service.mdx b/open-source/ingest/destination-connectors/google-cloud-service.mdx index 03f67b44..a4534f97 100644 --- a/open-source/ingest/destination-connectors/google-cloud-service.mdx +++ b/open-source/ingest/destination-connectors/google-cloud-service.mdx @@ -29,4 +29,4 @@ import GCSPy from '/snippets/destination_connectors/gcs.py.mdx'; For a full list of the options the CLI accepts check `unstructured-ingest gcs --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/destination-connectors/mongodb.mdx b/open-source/ingest/destination-connectors/mongodb.mdx index 17d9da83..92ec633d 100644 --- a/open-source/ingest/destination-connectors/mongodb.mdx +++ b/open-source/ingest/destination-connectors/mongodb.mdx @@ -29,4 +29,4 @@ import MongoDBPy from '/snippets/destination_connectors/mongodb.py.mdx'; For a full list of the options the CLI accepts check `unstructured-ingest mongodb --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/destination-connectors/opensearch.mdx b/open-source/ingest/destination-connectors/opensearch.mdx index ce5d9d19..890d47bb 100644 --- a/open-source/ingest/destination-connectors/opensearch.mdx +++ b/open-source/ingest/destination-connectors/opensearch.mdx @@ -29,7 +29,7 @@ import OpensearchPy from '/snippets/destination_connectors/opensearch.py.mdx'; For a full list of the options the CLI accepts check `unstructured-ingest opensearch --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). ## Vector Search Sample Mapping diff --git a/open-source/ingest/destination-connectors/pinecone.mdx b/open-source/ingest/destination-connectors/pinecone.mdx index 2e650474..d1db18f8 100644 --- a/open-source/ingest/destination-connectors/pinecone.mdx +++ b/open-source/ingest/destination-connectors/pinecone.mdx @@ -29,4 +29,4 @@ import PineconePy from '/snippets/destination_connectors/pinecone.py.mdx'; For a full list of the options the CLI accepts check `unstructured-ingest pinecone --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/destination-connectors/qdrant.mdx b/open-source/ingest/destination-connectors/qdrant.mdx index 7189bc6f..99699a91 100644 --- a/open-source/ingest/destination-connectors/qdrant.mdx +++ b/open-source/ingest/destination-connectors/qdrant.mdx @@ -30,4 +30,4 @@ import QdrantPy from '/snippets/destination_connectors/qdrant.py.mdx'; For a full list of the options the CLI accepts check `unstructured-ingest qdrant --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/destination-connectors/s3.mdx b/open-source/ingest/destination-connectors/s3.mdx index 1241ddd9..8436c86b 100644 --- a/open-source/ingest/destination-connectors/s3.mdx +++ b/open-source/ingest/destination-connectors/s3.mdx @@ -29,4 +29,4 @@ import S3Py from '/snippets/destination_connectors/s3.py.mdx'; For a full list of the options the CLI accepts check `unstructured-ingest s3 --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/destination-connectors/sql.mdx b/open-source/ingest/destination-connectors/sql.mdx index b9b96d40..ce8196fb 100644 --- a/open-source/ingest/destination-connectors/sql.mdx +++ b/open-source/ingest/destination-connectors/sql.mdx @@ -31,7 +31,7 @@ import SQLPy from '/snippets/destination_connectors/sql.py.mdx'; For a full list of the options the CLI accepts check `unstructured-ingest sql --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). ## Sample Index Schema diff --git a/open-source/ingest/destination-connectors/vectara.mdx b/open-source/ingest/destination-connectors/vectara.mdx index ac06f534..53c388fc 100644 --- a/open-source/ingest/destination-connectors/vectara.mdx +++ b/open-source/ingest/destination-connectors/vectara.mdx @@ -21,4 +21,4 @@ import VectaraPy from '/snippets/destination_connectors/vectara.py.mdx'; For a full list of the options the CLI accepts check `unstructured-ingest vectara --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/destination-connectors/weaviate.mdx b/open-source/ingest/destination-connectors/weaviate.mdx index e56fd359..0d1a0359 100644 --- a/open-source/ingest/destination-connectors/weaviate.mdx +++ b/open-source/ingest/destination-connectors/weaviate.mdx @@ -29,7 +29,7 @@ import WeaviatePy from '/snippets/destination_connectors/weaviate.py.mdx'; For a full list of the options the CLI accepts check `unstructured-ingest weaviate --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). ## Sample Index Schema diff --git a/open-source/ingest/overview.mdx b/open-source/ingest/overview.mdx index a92649ad..6ad1920a 100644 --- a/open-source/ingest/overview.mdx +++ b/open-source/ingest/overview.mdx @@ -6,15 +6,15 @@ description: The Ingest Library is a powerful tool designed to coordinate the pr ## Library Documentation -[Source Connectors](/ingest/source-connectors/overview) +[Source Connectors](/open-source/ingest/source-connectors/overview) Connect to your favorite data storage platforms for an effortless batch processing of your files. -[Destination Connectors](/ingest/destination-connectors) +[Destination Connectors](/open-source/ingest/destination-connectors) Connect to your favorite data storage platforms to write you ingest results to. -[Ingest Configuration](/ingest/ingest-configuration) +[Ingest Configuration](/open-source/ingest/ingest-configuration) Each configuration used when generating an ingest process. diff --git a/open-source/ingest/source-connectors/airtable.mdx b/open-source/ingest/source-connectors/airtable.mdx index a39169c6..02f7357d 100644 --- a/open-source/ingest/source-connectors/airtable.mdx +++ b/open-source/ingest/source-connectors/airtable.mdx @@ -45,4 +45,4 @@ Additionally, you will need to pass the `--partition-endpoint` if you’re runni For a full list of the options the CLI accepts check `unstructured-ingest airtable --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/source-connectors/azure.mdx b/open-source/ingest/source-connectors/azure.mdx index 38923cd4..1fb514b5 100644 --- a/open-source/ingest/source-connectors/azure.mdx +++ b/open-source/ingest/source-connectors/azure.mdx @@ -45,4 +45,4 @@ Additionally, you will need to pass the `--partition-endpoint` if you’re runni For a full list of the options the CLI accepts check `unstructured-ingest azure --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/source-connectors/biomed.mdx b/open-source/ingest/source-connectors/biomed.mdx index f82eeadf..56e70311 100644 --- a/open-source/ingest/source-connectors/biomed.mdx +++ b/open-source/ingest/source-connectors/biomed.mdx @@ -44,4 +44,4 @@ Additionally, you will need to pass the `--partition-endpoint` if you’re runni For a full list of the options the CLI accepts check `unstructured-ingest biomed --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/source-connectors/box.mdx b/open-source/ingest/source-connectors/box.mdx index 396cc7e5..959564ab 100644 --- a/open-source/ingest/source-connectors/box.mdx +++ b/open-source/ingest/source-connectors/box.mdx @@ -45,4 +45,4 @@ Additionally, you will need to pass the `--partition-endpoint` if you’re runni For a full list of the options the CLI accepts check `unstructured-ingest box --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/source-connectors/confluence.mdx b/open-source/ingest/source-connectors/confluence.mdx index f60b06ed..b31dccda 100644 --- a/open-source/ingest/source-connectors/confluence.mdx +++ b/open-source/ingest/source-connectors/confluence.mdx @@ -44,4 +44,4 @@ Additionally, you will need to pass the `--partition-endpoint` if you’re runni For a full list of the options the CLI accepts check `unstructured-ingest confluence --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/source-connectors/delta-table.mdx b/open-source/ingest/source-connectors/delta-table.mdx index 9edc6977..ae1cedc2 100644 --- a/open-source/ingest/source-connectors/delta-table.mdx +++ b/open-source/ingest/source-connectors/delta-table.mdx @@ -43,4 +43,4 @@ Additionally, you will need to pass the `--partition-endpoint` if you’re runni For a full list of the options the CLI accepts check `unstructured-ingest delta-table --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/source-connectors/discord.mdx b/open-source/ingest/source-connectors/discord.mdx index 1db6f776..dac73266 100644 --- a/open-source/ingest/source-connectors/discord.mdx +++ b/open-source/ingest/source-connectors/discord.mdx @@ -43,4 +43,4 @@ Additionally, you will need to pass the `--partition-endpoint` if you’re runni For a full list of the options the CLI accepts check `unstructured-ingest discord --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/source-connectors/dropbox.mdx b/open-source/ingest/source-connectors/dropbox.mdx index 4d03bd03..7cac16fa 100644 --- a/open-source/ingest/source-connectors/dropbox.mdx +++ b/open-source/ingest/source-connectors/dropbox.mdx @@ -44,4 +44,4 @@ Additionally, you will need to pass the `--partition-endpoint` if you’re runni For a full list of the options the CLI accepts check `unstructured-ingest dropbox --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/source-connectors/elastic-search.mdx b/open-source/ingest/source-connectors/elastic-search.mdx index bf8b8d74..6eda9a2f 100644 --- a/open-source/ingest/source-connectors/elastic-search.mdx +++ b/open-source/ingest/source-connectors/elastic-search.mdx @@ -44,4 +44,4 @@ Additionally, you will need to pass the `--partition-endpoint` if you’re runni For a full list of the options the CLI accepts check `unstructured-ingest elasticsearch --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/source-connectors/github.mdx b/open-source/ingest/source-connectors/github.mdx index 09851c9c..fad1e1db 100644 --- a/open-source/ingest/source-connectors/github.mdx +++ b/open-source/ingest/source-connectors/github.mdx @@ -43,4 +43,4 @@ Additionally, you will need to pass the `--partition-endpoint` if you’re runni For a full list of the options the CLI accepts check `unstructured-ingest github --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/source-connectors/gitlab.mdx b/open-source/ingest/source-connectors/gitlab.mdx index a9117cd6..5c43c407 100644 --- a/open-source/ingest/source-connectors/gitlab.mdx +++ b/open-source/ingest/source-connectors/gitlab.mdx @@ -44,4 +44,4 @@ Additionally, you will need to pass the `--partition-endpoint` if you’re runni For a full list of the options the CLI accepts check `unstructured-ingest gitlab --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/source-connectors/google-cloud-storage.mdx b/open-source/ingest/source-connectors/google-cloud-storage.mdx index f1cc6cfa..deb813d1 100644 --- a/open-source/ingest/source-connectors/google-cloud-storage.mdx +++ b/open-source/ingest/source-connectors/google-cloud-storage.mdx @@ -44,4 +44,4 @@ Additionally, you will need to pass the `--partition-endpoint` if you’re runni For a full list of the options the CLI accepts check `unstructured-ingest gcs --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/source-connectors/google-drive.mdx b/open-source/ingest/source-connectors/google-drive.mdx index bdfc718d..fdde89a2 100644 --- a/open-source/ingest/source-connectors/google-drive.mdx +++ b/open-source/ingest/source-connectors/google-drive.mdx @@ -43,4 +43,4 @@ Additionally, you will need to pass the `--partition-endpoint` if you’re runni For a full list of the options the CLI accepts check `unstructured-ingest gdrive --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/source-connectors/jira.mdx b/open-source/ingest/source-connectors/jira.mdx index 8032afe0..83338b77 100644 --- a/open-source/ingest/source-connectors/jira.mdx +++ b/open-source/ingest/source-connectors/jira.mdx @@ -43,4 +43,4 @@ Additionally, you will need to pass the `--partition-endpoint` if you’re runni For a full list of the options the CLI accepts check `unstructured-ingest jira --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/source-connectors/local.mdx b/open-source/ingest/source-connectors/local.mdx index 2a36c59f..696383fb 100644 --- a/open-source/ingest/source-connectors/local.mdx +++ b/open-source/ingest/source-connectors/local.mdx @@ -35,4 +35,4 @@ Additionally, you will need to pass the `--partition-endpoint` if you’re runni For a full list of the options the CLI accepts check `unstructured-ingest local --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/source-connectors/mongodb.mdx b/open-source/ingest/source-connectors/mongodb.mdx index bb0c44fa..31ff3a74 100644 --- a/open-source/ingest/source-connectors/mongodb.mdx +++ b/open-source/ingest/source-connectors/mongodb.mdx @@ -26,4 +26,4 @@ import MongoDBPy from '/snippets/source_connectors/mongodb.py.mdx'; For a full list of the options the CLI accepts check `unstructured-ingest mongodb --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/source-connectors/notion.mdx b/open-source/ingest/source-connectors/notion.mdx index ab52bcaa..1ea37fe6 100644 --- a/open-source/ingest/source-connectors/notion.mdx +++ b/open-source/ingest/source-connectors/notion.mdx @@ -43,4 +43,4 @@ Additionally, you will need to pass the `--partition-endpoint` if you’re runni For a full list of the options the CLI accepts check `unstructured-ingest notion --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/source-connectors/one-drive.mdx b/open-source/ingest/source-connectors/one-drive.mdx index 9ba5dc0c..2fd30c7c 100644 --- a/open-source/ingest/source-connectors/one-drive.mdx +++ b/open-source/ingest/source-connectors/one-drive.mdx @@ -42,4 +42,4 @@ Additionally, you will need to pass the `--partition-endpoint` if you’re runni For a full list of the options the CLI accepts check `unstructured-ingest onedrive --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/source-connectors/opensearch.mdx b/open-source/ingest/source-connectors/opensearch.mdx index 1e4c59cd..5fae89d7 100644 --- a/open-source/ingest/source-connectors/opensearch.mdx +++ b/open-source/ingest/source-connectors/opensearch.mdx @@ -42,4 +42,4 @@ Additionally, you will need to pass the `--partition-endpoint` if you’re runni For a full list of the options the CLI accepts check `unstructured-ingest opensearch --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/source-connectors/outlook.mdx b/open-source/ingest/source-connectors/outlook.mdx index 133150aa..ff90183c 100644 --- a/open-source/ingest/source-connectors/outlook.mdx +++ b/open-source/ingest/source-connectors/outlook.mdx @@ -44,4 +44,4 @@ Additionally, you will need to pass the `--partition-endpoint` if you’re runni For a full list of the options the CLI accepts check `unstructured-ingest outlook --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/source-connectors/reddit.mdx b/open-source/ingest/source-connectors/reddit.mdx index bd745b89..db3a291a 100644 --- a/open-source/ingest/source-connectors/reddit.mdx +++ b/open-source/ingest/source-connectors/reddit.mdx @@ -43,4 +43,4 @@ Additionally, you will need to pass the `--partition-endpoint` if you’re runni For a full list of the options the CLI accepts check `unstructured-ingest reddit --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/source-connectors/s3.mdx b/open-source/ingest/source-connectors/s3.mdx index 74900422..6b38eb8f 100644 --- a/open-source/ingest/source-connectors/s3.mdx +++ b/open-source/ingest/source-connectors/s3.mdx @@ -45,4 +45,4 @@ Additionally, you will need to pass the `--partition-endpoint` if you’re runni For a full list of the options the CLI accepts check `unstructured-ingest s3 --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/source-connectors/salesforce.mdx b/open-source/ingest/source-connectors/salesforce.mdx index bd808d45..2c18d434 100644 --- a/open-source/ingest/source-connectors/salesforce.mdx +++ b/open-source/ingest/source-connectors/salesforce.mdx @@ -43,4 +43,4 @@ Additionally, you will need to pass the `--partition-endpoint` if you’re runni For a full list of the options the CLI accepts check `unstructured-ingest salesforce --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/source-connectors/sftp.mdx b/open-source/ingest/source-connectors/sftp.mdx index 1b2baf45..8f1f8fbb 100644 --- a/open-source/ingest/source-connectors/sftp.mdx +++ b/open-source/ingest/source-connectors/sftp.mdx @@ -45,4 +45,4 @@ Additionally, you will need to pass the `--partition-endpoint` if you’re runni For a full list of the options the CLI accepts check `unstructured-ingest sftp --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/source-connectors/sharepoint.mdx b/open-source/ingest/source-connectors/sharepoint.mdx index d21a4681..2208aca5 100644 --- a/open-source/ingest/source-connectors/sharepoint.mdx +++ b/open-source/ingest/source-connectors/sharepoint.mdx @@ -43,4 +43,4 @@ Additionally, you will need to pass the `--partition-endpoint` if you’re runni For a full list of the options the CLI accepts check `unstructured-ingest sharepoint --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/source-connectors/slack.mdx b/open-source/ingest/source-connectors/slack.mdx index 7a4e90fe..221cea64 100644 --- a/open-source/ingest/source-connectors/slack.mdx +++ b/open-source/ingest/source-connectors/slack.mdx @@ -43,4 +43,4 @@ Additionally, you will need to pass the `--partition-endpoint` if you’re runni For a full list of the options the CLI accepts check `unstructured-ingest slack --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/ingest/source-connectors/wikipedia.mdx b/open-source/ingest/source-connectors/wikipedia.mdx index 814c7a84..a8678c89 100644 --- a/open-source/ingest/source-connectors/wikipedia.mdx +++ b/open-source/ingest/source-connectors/wikipedia.mdx @@ -43,4 +43,4 @@ Additionally, you will need to pass the `--partition-endpoint` if you’re runni For a full list of the options the CLI accepts check `unstructured-ingest wikipedia --help`. -NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/installation/overview). \ No newline at end of file +NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview). \ No newline at end of file diff --git a/open-source/integrations.mdx b/open-source/integrations.mdx index d66bc7cb..8e986c2e 100644 --- a/open-source/integrations.mdx +++ b/open-source/integrations.mdx @@ -1,20 +1,20 @@ --- title: Integrations sidebarTitle: Overview -description: Integrate your model development pipeline with your favorite machine learning frameworks and libraries, and prepare your data for ingestion into downstream systems. Most of our integrations come in the form of [staging functions](/core-functionality/staging), which take a list of `Element` objects as input and return formatted dictionaries as output. +description: Integrate your model development pipeline with your favorite machine learning frameworks and libraries, and prepare your data for ingestion into downstream systems. Most of our integrations come in the form of [staging functions](/open-source/core-functionality/staging), which take a list of `Element` objects as input and return formatted dictionaries as output. --- ## `Integration with Argilla` -You can convert a list of `Text` elements to an [Argilla](https://www.argilla.io/) `Dataset` using the [stage\_for\_argilla](/core-functionality/staging#stage-for-argilla) staging function. Specify the type of dataset to be generated using the `argilla_task` parameter. Valid values are `"text_classification"`, `"token_classification"`, and `"text2text"`. Follow the link for more details on usage. +You can convert a list of `Text` elements to an [Argilla](https://www.argilla.io/) `Dataset` using the [stage\_for\_argilla](/open-source/core-functionality/staging#stage-for-argilla) staging function. Specify the type of dataset to be generated using the `argilla_task` parameter. Valid values are `"text_classification"`, `"token_classification"`, and `"text2text"`. Follow the link for more details on usage. ## `Integration with Baseplate` -[Baseplate](https://docs.baseplate.ai/introduction) is a backend optimized for use with LLMs that has an easy to use spreadsheet interface. The `unstructured` library offers a staging function to convert a list of `Element` objects into the [rows format](https://docs.baseplate.ai/api-reference/documents/overview) required by the Baseplate API. See the [stage\_for\_baseplate](/core-functionality/staging#stage-for-baseplate) documentation for information on how to stage elements for ingestion into Baseplate. +[Baseplate](https://docs.baseplate.ai/introduction) is a backend optimized for use with LLMs that has an easy to use spreadsheet interface. The `unstructured` library offers a staging function to convert a list of `Element` objects into the [rows format](https://docs.baseplate.ai/api-reference/documents/overview) required by the Baseplate API. See the [stage\_for\_baseplate](/open-source/core-functionality/staging#stage-for-baseplate) documentation for information on how to stage elements for ingestion into Baseplate. ## `Integration with Datasaur` -You can format a list of `Text` elements as input to token based tasks in [Datasaur](https://datasaur.ai/) using the [stage\_for\_datasaur](/core-functionality/staging#stage-for-datasaur) staging function. You will obtain a list of dictionaries indexed by the keys `"text"` with the content of the element, and `"entities"` with an empty list. Follow the link to learn how to customise your entities and for more details on usage. +You can format a list of `Text` elements as input to token based tasks in [Datasaur](https://datasaur.ai/) using the [stage\_for\_datasaur](/open-source/core-functionality/staging#stage-for-datasaur) staging function. You will obtain a list of dictionaries indexed by the keys `"text"` with the content of the element, and `"entities"` with an empty list. Follow the link to learn how to customise your entities and for more details on usage. ## `Integration with Hugging Face` @@ -22,7 +22,7 @@ You can prepare `Text` elements for processing in Hugging Face [Transformers](ht ## `Integration with Labelbox` -You can format your outputs for use with [LabelBox](https://labelbox.com/) using the [stage\_for\_label\_box](/core-functionality/staging#stage-for-label-box) staging function. LabelBox accepts cloud-hosted data and does not support importing text directly. With this integration you can stage the data files in the `output_directory` to be uploaded to a cloud storage service (such as S3 buckets) and get a config of type `List[Dict[str, Any]]` that can be written to a `.json` file and imported into LabelBox. Follow the link to see how to generate the `config.json` file that can be used with LabelBox, how to upload the staged data files to an S3 bucket, and for more details on usage. +You can format your outputs for use with [LabelBox](https://labelbox.com/) using the [stage\_for\_label\_box](/open-source/core-functionality/staging#stage-for-label-box) staging function. LabelBox accepts cloud-hosted data and does not support importing text directly. With this integration you can stage the data files in the `output_directory` to be uploaded to a cloud storage service (such as S3 buckets) and get a config of type `List[Dict[str, Any]]` that can be written to a `.json` file and imported into LabelBox. Follow the link to see how to generate the `config.json` file that can be used with LabelBox, how to upload the staged data files to an S3 bucket, and for more details on usage. ## `Integration with Label Studio` diff --git a/open-source/introduction/key-concepts.mdx b/open-source/introduction/key-concepts.mdx index 46f6c9a1..5c906b9c 100644 --- a/open-source/introduction/key-concepts.mdx +++ b/open-source/introduction/key-concepts.mdx @@ -52,11 +52,11 @@ A RAG workflow can be broken down into the following steps: 1. **Data ingestion**: The first step is acquiring data from your relevant sources. We make this easy with our [source connectors](/ingest/source-connectors/overview). -2. **Data preprocessing and cleaning**: Once you’ve identified and collected your data sources, removing any unnecessary artifacts within the dataset is a good practice. At Unstructured, we have various tools for data processing in our [core functionalities](/core-functionality/overview). +2. **Data preprocessing and cleaning**: Once you’ve identified and collected your data sources, removing any unnecessary artifacts within the dataset is a good practice. At Unstructured, we have various tools for data processing in our [core functionalities](/open-source/core-functionality/overview). -3. **Chunking**: The next step is to break your text into digestible pieces for your LLM to consume. We provide the basic and context-aware chunking strategies. Please refer to the documentation [here](/core-functionality/chunking). +3. **Chunking**: The next step is to break your text into digestible pieces for your LLM to consume. We provide the basic and context-aware chunking strategies. Please refer to the documentation [here](/open-source/core-functionality/chunking). -4. **Embedding**: After chunking, you must convert the text into a numerical representation (vector embedding) that an LLM can understand. To use the various embedding models using Unstructured tools, please refer to [this page](/core-functionality/embedding). +4. **Embedding**: After chunking, you must convert the text into a numerical representation (vector embedding) that an LLM can understand. To use the various embedding models using Unstructured tools, please refer to [this page](/open-source/core-functionality/embedding). 5. **Vector Database**: The next step is to choose a location for storing your chunked embeddings. There are many options for your vector database (AstraDB, ChromaDB, Clarifai, Milvus, Pinecone, Qdrant, Weaviate, and more). For complete list of Unstructured `Destination Connectors`, please visit [this page](/ingest/destination-connectors/overview). diff --git a/open-source/introduction/overview.mdx b/open-source/introduction/overview.mdx index 23597fac..b3cf1ddc 100644 --- a/open-source/introduction/overview.mdx +++ b/open-source/introduction/overview.mdx @@ -33,17 +33,17 @@ sidebarTitle: Overview * **Robust Core Functionality**: Unstructured provides a suite of core functionalities critical for efficient data processing. This includes: - * [Partitioning](/core-functionality/partitioning): The partitioning functions in Unstructured enable the extraction of structured content from raw, unstructured documents. This feature is crucial for transforming unorganized data into usable formats, aiding in efficient data processing and analysis. + * [Partitioning](/open-source/core-functionality/partitioning): The partitioning functions in Unstructured enable the extraction of structured content from raw, unstructured documents. This feature is crucial for transforming unorganized data into usable formats, aiding in efficient data processing and analysis. - * [Cleaning](/core-functionality/cleaning): Data preparation for NLP models often requires cleaning to ensure quality. The Unstructured library includes cleaning functions that assist in sanitizing output, removing unwanted content, and improving the performance of NLP models. This step is essential for maintaining the integrity of data before it is passed to downstream applications. + * [Cleaning](/open-source/core-functionality/cleaning): Data preparation for NLP models often requires cleaning to ensure quality. The Unstructured library includes cleaning functions that assist in sanitizing output, removing unwanted content, and improving the performance of NLP models. This step is essential for maintaining the integrity of data before it is passed to downstream applications. - * [Extracting](/core-functionality/extracting): This functionality allows for the extraction of specific entities within documents. It is designed to identify and isolate relevant pieces of information, making it easier for users to focus on the most pertinent data in their documents. + * [Extracting](/open-source/core-functionality/extracting): This functionality allows for the extraction of specific entities within documents. It is designed to identify and isolate relevant pieces of information, making it easier for users to focus on the most pertinent data in their documents. - * [Staging](/core-functionality/staging): Staging functions help prepare your data for ingestion into downstream systems. Please note that this functionality is being deprecated in favor of `Destination Connectors`. + * [Staging](/open-source/core-functionality/staging): Staging functions help prepare your data for ingestion into downstream systems. Please note that this functionality is being deprecated in favor of `Destination Connectors`. - * [Chunking](/core-functionality/chunking): The chunking process in Unstructured is distinct from conventional methods. Instead of relying solely on text-based features to form chunks, Unstructured uses a deep understanding of document formats to partition documents into semantic units (document elements). + * [Chunking](/open-source/core-functionality/chunking): The chunking process in Unstructured is distinct from conventional methods. Instead of relying solely on text-based features to form chunks, Unstructured uses a deep understanding of document formats to partition documents into semantic units (document elements). - * [Embedding](/core-functionality/chunking): The embedding encoder classes in Unstructured leverage document elements detected through partitioning or grouped via chunking to obtain embeddings for each element. This is particularly useful for applications like Retrieval Augmented Generation (RAG), where precise and contextually relevant embeddings are crucial. + * [Embedding](/open-source/core-functionality/chunking): The embedding encoder classes in Unstructured leverage document elements detected through partitioning or grouped via chunking to obtain embeddings for each element. This is particularly useful for applications like Retrieval Augmented Generation (RAG), where precise and contextually relevant embeddings are crucial. * **High-performant Connectors**: The platform includes optimized connectors for efficient data ingestion and output. These comprise [Source Connectors](/ingest/destination-connectors/overview) for data input and [Destination Connectors](/ingest/destination-connectors/overview) for data export. diff --git a/open-source/introduction/quick-start.mdx b/open-source/introduction/quick-start.mdx index a9b59f93..a2344a5e 100644 --- a/open-source/introduction/quick-start.mdx +++ b/open-source/introduction/quick-start.mdx @@ -1,6 +1,6 @@ --- title: Quick Start -description: This guide offers concise steps to swiftly install and validate your `unstructured` installation. For more comprehensive installation guide, please refer to [this page](http://localhost:63342/CHANGELOG.md/docs/build/html/installing.html). +description: This guide offers concise steps to swiftly install and validate your `unstructured` installation. For more comprehensive installation guide, please refer to [this page](/open-source/installation/overview). --- ## Installation @@ -8,7 +8,7 @@ description: This guide offers concise steps to swiftly install and validate you Plain text files, HTML, XML, JSON, and Emails are immediately supported without any additional dependencies. - If you need to process other document types, you can install the extras required by following the [Full Installation](/installation/full-installation) + If you need to process other document types, you can install the extras required by following the [Full Installation](open-source/installation/full-installation) 2. **System Dependencies**: Ensure the subsequent system dependencies are installed. Your requirements might vary based on the document types you’re handling: