Skip to content

Commit

Permalink
chore: update code snippets (#20)
Browse files Browse the repository at this point in the history
Co-authored-by: ryannikolaidis <[email protected]>
  • Loading branch information
ryannikolaidis and ryannikolaidis authored Apr 26, 2024
1 parent 024bd98 commit b36520c
Show file tree
Hide file tree
Showing 48 changed files with 90 additions and 74 deletions.
4 changes: 3 additions & 1 deletion snippets/destination_connectors/azure.py.mdx
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
```python Python
import os

from unstructured.ingest.connector.fsspec.azure import (
AzureAccessConfig,
AzureWriteConfig,
Expand All @@ -22,7 +24,7 @@ from unstructured.ingest.runner.writers.fsspec.azure import (
def get_writer() -> Writer:
return AzureWriter(
connector_config=SimpleAzureBlobStorageConfig(
access_config=AzureAccessConfig(account_name="azureunstructured1"),
access_config=AzureAccessConfig(account_name=os.getenv("AZURE_ACCOUNT_NAME")),
remote_url="az://unstructured/war-and-peace-output",
),
write_config=AzureWriteConfig(),
Expand Down
2 changes: 1 addition & 1 deletion snippets/destination_connectors/azure.sh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ unstructured-ingest \
--num-processes 2 \
--verbose \
azure \
--account-name azureunstructured1 \
--account-name "$AZURE_ACCOUNT_NAME" \
--remote-url "<your destination path here, ie 'az://unstructured/war-and-peace-output'>"
```
8 changes: 5 additions & 3 deletions snippets/destination_connectors/clarifai.py.mdx
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
```python Python
import os

from unstructured.ingest.connector.clarifai import (
ClarifaiAccessConfig,
ClarifaiWriteConfig,
Expand All @@ -21,9 +23,9 @@ from unstructured.ingest.runner.writers.clarifai import (
def get_writer() -> Writer:
return ClarifaiWriter(
connector_config=SimpleClarifaiConfig(
access_config=ClarifaiAccessConfig(api_key="CLARIFAI_PAT"),
app_id="CLARIFAI_APP",
user_id="CLARIFAI_USER_ID",
access_config=ClarifaiAccessConfig(api_key=os.getenv("CLARIFAI_PAT_KEY")),
app_id=os.getenv("CLARIFAI_APP_ID"),
user_id=os.getenv("CLARIFAI_USER_ID"),
),
write_config=ClarifaiWriteConfig(),
)
Expand Down
6 changes: 3 additions & 3 deletions snippets/destination_connectors/clarifai.sh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ unstructured-ingest \
--num-processes 2 \
--verbose \
clarifai \
--app-id "<your clarifai app name>" \
--user-id "<your clarifai user id>" \
--api-key "<your clarifai PAT key>" \
--app-id "$CLARIFAI_APP_ID" \
--user-id "$CLARIFAI_USER_ID" \
--api-key "$CLARIFAI_PAT_KEY" \
--batch-size 100
```
2 changes: 1 addition & 1 deletion snippets/destination_connectors/qdrant.sh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ unstructured-ingest \
--num-processes 2 \
--verbose \
qdrant \
--collection-name "test" \
--collection-name "$QDRANT_COLLECTION_NAME" \
--location "http://localhost:6333" \
--batch-size 80
```
2 changes: 1 addition & 1 deletion snippets/source_connectors/airtable_api.sh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ unstructured-ingest \
--num-processes 2 \
--reprocess \
--partition-by-api \
--api-key "<UNSTRUCTURED-API-KEY>"
--api-key "$UNSTRUCTURED_API_KEY"
```
4 changes: 3 additions & 1 deletion snippets/source_connectors/azure.py.mdx
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
```python Python
import os

from unstructured.ingest.connector.fsspec.azure import (
AzureAccessConfig,
SimpleAzureBlobStorageConfig,
Expand All @@ -21,7 +23,7 @@ if __name__ == "__main__":
partition_config=PartitionConfig(),
connector_config=SimpleAzureBlobStorageConfig(
access_config=AzureAccessConfig(
account_name="azureunstructured1",
account_name=os.getenv("AZURE_ACCOUNT_NAME"),
),
remote_url="abfs://container1/",
),
Expand Down
2 changes: 1 addition & 1 deletion snippets/source_connectors/azure.sh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
unstructured-ingest \
azure \
--remote-url abfs://container1/ \
--account-name azureunstructured1 \
--account-name "$AZURE_ACCOUNT_NAME" \
--output-dir azure-ingest-output \
--num-processes 2
```
2 changes: 1 addition & 1 deletion snippets/source_connectors/azure_api.py.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ if __name__ == "__main__":
),
connector_config=SimpleAzureBlobStorageConfig(
access_config=AzureAccessConfig(
account_name="azureunstructured1",
account_name=os.getenv("AZURE_ACCOUNT_NAME"),
),
remote_url="abfs://container1/",
),
Expand Down
4 changes: 2 additions & 2 deletions snippets/source_connectors/azure_api.sh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
unstructured-ingest \
azure \
--remote-url abfs://container1/ \
--account-name azureunstructured1 \
--account-name "$AZURE_ACCOUNT_NAME" \
--output-dir azure-ingest-output \
--num-processes 2 \
--partition-by-api \
--api-key "<UNSTRUCTURED-API-KEY>"
--api-key "$UNSTRUCTURED_API_KEY"
```
2 changes: 1 addition & 1 deletion snippets/source_connectors/biomed_api.sh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ unstructured-ingest \
--verbose \
--preserve-downloads \
--partition-by-api \
--api-key "<UNSTRUCTURED-API-KEY>"
--api-key "$UNSTRUCTURED_API_KEY"
```
2 changes: 1 addition & 1 deletion snippets/source_connectors/box_api.sh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ unstructured-ingest \
--recursive \
--verbose \
--partition-by-api \
--api-key "<UNSTRUCTURED-API-KEY>"
--api-key "$UNSTRUCTURED_API_KEY"
```
6 changes: 4 additions & 2 deletions snippets/source_connectors/confluence.py.mdx
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
```python Python
import os

from unstructured.ingest.connector.confluence import ConfluenceAccessConfig, SimpleConfluenceConfig
from unstructured.ingest.interfaces import PartitionConfig, ProcessorConfig, ReadConfig
from unstructured.ingest.runner import ConfluenceRunner
Expand All @@ -16,9 +18,9 @@ if __name__ == "__main__":
),
connector_config=SimpleConfluenceConfig(
access_config=ConfluenceAccessConfig(
api_token="ABCDE1234ABDE1234ABCDE1234",
api_token=os.getenv("CONFLUENCE_API_TOKEN"),
),
user_email="[email protected]",
user_email=os.getenv("CONFLUENCE_USER_EMAIL"),
url="https://unstructured-ingest-test.atlassian.net",
),
)
Expand Down
4 changes: 2 additions & 2 deletions snippets/source_connectors/confluence.sh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ unstructured-ingest \
confluence \
--metadata-exclude filename,file_directory,metadata.data_source.date_processed \
--url https://unstructured-ingest-test.atlassian.net \
--user-email [email protected] \
--api-token ABCDE1234ABDE1234ABCDE1234 \
--user-email "$CONFLUENCE_USER_EMAIL" \
--api-token "$CONFLUENCE_API_TOKEN" \
--output-dir confluence-ingest-output \
--num-processes 2
```
4 changes: 2 additions & 2 deletions snippets/source_connectors/confluence_api.py.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ if __name__ == "__main__":
),
connector_config=SimpleConfluenceConfig(
access_config=ConfluenceAccessConfig(
api_token="ABCDE1234ABDE1234ABCDE1234",
api_token=os.getenv("CONFLUENCE_API_TOKEN"),
),
user_email="[email protected]",
user_email=os.getenv("CONFLUENCE_USER_EMAIL"),
url="https://unstructured-ingest-test.atlassian.net",
),
)
Expand Down
6 changes: 3 additions & 3 deletions snippets/source_connectors/confluence_api.sh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ unstructured-ingest \
confluence \
--metadata-exclude filename,file_directory,metadata.data_source.date_processed \
--url https://unstructured-ingest-test.atlassian.net \
--user-email [email protected] \
--api-token ABCDE1234ABDE1234ABCDE1234 \
--user-email "$CONFLUENCE_USER_EMAIL" \
--api-token "$CONFLUENCE_API_TOKEN" \
--output-dir confluence-ingest-output \
--num-processes 2 \
--partition-by-api \
--api-key "<UNSTRUCTURED-API-KEY>"
--api-key "$UNSTRUCTURED_API_KEY"
```
2 changes: 1 addition & 1 deletion snippets/source_connectors/delta_table_api.sh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ unstructured-ingest \
--storage_options "AWS_REGION=us-east-2,AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY" \
--verbose \
--partition-by-api \
--api-key "<UNSTRUCTURED-API-KEY>"
--api-key "$UNSTRUCTURED_API_KEY"
```
2 changes: 1 addition & 1 deletion snippets/source_connectors/discord_api.sh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ unstructured-ingest \
--preserve-downloads \
--verbose \
--partition-by-api \
--api-key "<UNSTRUCTURED-API-KEY>"
--api-key "$UNSTRUCTURED_API_KEY"
```
2 changes: 1 addition & 1 deletion snippets/source_connectors/dropbox_api.sh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ unstructured-ingest \
--recursive \
--verbose \
--partition-by-api \
--api-key "<UNSTRUCTURED-API-KEY>"
--api-key "$UNSTRUCTURED_API_KEY"
```
2 changes: 1 addition & 1 deletion snippets/source_connectors/elasticsearch_api.sh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ unstructured-ingest \
--output-dir elasticsearch-ingest-output \
--num-processes 2 \
--partition-by-api \
--api-key "<UNSTRUCTURED-API-KEY>"
--api-key "$UNSTRUCTURED_API_KEY"
```
2 changes: 1 addition & 1 deletion snippets/source_connectors/gcs_api.sh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ unstructured-ingest \
--recursive \
--verbose \
--partition-by-api \
--api-key "<UNSTRUCTURED-API-KEY>"
--api-key "$UNSTRUCTURED_API_KEY"
```
2 changes: 1 addition & 1 deletion snippets/source_connectors/github_api.sh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ unstructured-ingest \
--num-processes 2 \
--verbose \
--partition-by-api \
--api-key "<UNSTRUCTURED-API-KEY>"
--api-key "$UNSTRUCTURED_API_KEY"
```
2 changes: 1 addition & 1 deletion snippets/source_connectors/gitlab_api.sh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ unstructured-ingest \
--num-processes 2 \
--verbose \
--partition-by-api \
--api-key "<UNSTRUCTURED-API-KEY>"
--api-key "$UNSTRUCTURED_API_KEY"
```
6 changes: 4 additions & 2 deletions snippets/source_connectors/google_drive.py.mdx
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
```python Python
import os

from unstructured.ingest.connector.google_drive import (
GoogleDriveAccessConfig,
SimpleGoogleDriveConfig,
Expand All @@ -17,10 +19,10 @@ if __name__ == "__main__":
partition_config=PartitionConfig(),
connector_config=SimpleGoogleDriveConfig(
access_config=GoogleDriveAccessConfig(
service_account_key="POPULATE WITH DRIVE SERVICE ACCOUNT KEY"
service_account_key=os.getenv("GOOGLE_DRIVE_ACCOUNT_KEY")
),
recursive=True,
drive_id="POPULATE WITH FILE OR FOLDER ID",
drive_id=os.getenv("GOOGLE_DRIVE_FOLDER_ID"),
),
)
runner.run()
Expand Down
4 changes: 2 additions & 2 deletions snippets/source_connectors/google_drive_api.py.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ if __name__ == "__main__":
),
connector_config=SimpleGoogleDriveConfig(
access_config=GoogleDriveAccessConfig(
service_account_key="POPULATE WITH DRIVE SERVICE ACCOUNT KEY"
service_account_key=os.getenv("GOOGLE_DRIVE_ACCOUNT_KEY")
),
recursive=True,
drive_id="POPULATE WITH FILE OR FOLDER ID",
drive_id=os.getenv("GOOGLE_DRIVE_FOLDER_ID"),
),
)
runner.run()
Expand Down
2 changes: 1 addition & 1 deletion snippets/source_connectors/google_drive_api.sh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ unstructured-ingest \
--recursive \
--verbose \
--partition-by-api \
--api-key "<UNSTRUCTURED-API-KEY>"
--api-key "$UNSTRUCTURED_API_KEY"
```
8 changes: 5 additions & 3 deletions snippets/source_connectors/jira.py.mdx
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
```python Python
import os

from unstructured.ingest.connector.jira import JiraAccessConfig, SimpleJiraConfig
from unstructured.ingest.interfaces import PartitionConfig, ProcessorConfig, ReadConfig
from unstructured.ingest.runner import JiraRunner
Expand All @@ -15,9 +17,9 @@ if __name__ == "__main__":
metadata_exclude=["filename", "file_directory", "metadata.data_source.date_processed"],
),
connector_config=SimpleJiraConfig(
access_config=JiraAccessConfig(api_token="ABCDE1234ABDE1234ABCDE1234"),
url="https://unstructured-jira-connector-test.atlassian.net",
user_email="[email protected]",
access_config=JiraAccessConfig(api_token=os.getenv("JIRA_API_TOKEN")),
url=os.getenv("JIRA_URL"),
user_email=os.getenv("JIRA_EMAIL"),
),
)
runner.run()
Expand Down
6 changes: 3 additions & 3 deletions snippets/source_connectors/jira_api.py.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ if __name__ == "__main__":
api_key=os.getenv("UNSTRUCTURED_API_KEY"),
),
connector_config=SimpleJiraConfig(
access_config=JiraAccessConfig(api_token="ABCDE1234ABDE1234ABCDE1234"),
url="https://unstructured-jira-connector-test.atlassian.net",
user_email="[email protected]",
access_config=JiraAccessConfig(api_token=os.getenv("JIRA_API_TOKEN")),
url=os.getenv("JIRA_URL"),
user_email=os.getenv("JIRA_EMAIL"),
),
)
runner.run()
Expand Down
2 changes: 1 addition & 1 deletion snippets/source_connectors/jira_api.sh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ unstructured-ingest \
--output-dir jira-ingest-output \
--num-processes 2 \
--partition-by-api \
--api-key "<UNSTRUCTURED-API-KEY>"
--api-key "$UNSTRUCTURED_API_KEY"
```
2 changes: 1 addition & 1 deletion snippets/source_connectors/local_api.sh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ unstructured-ingest \
--recursive \
--verbose \
--partition-by-api \
--api-key "<UNSTRUCTURED-API-KEY>"
--api-key "$UNSTRUCTURED_API_KEY"
```
6 changes: 3 additions & 3 deletions snippets/source_connectors/mongodb.sh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
unstructured-ingest \
mongodb \
--metadata-exclude filename,file_directory,metadata.data_source.date_processed \
--uri "<MongoDB uri>" \
--database "<MongoDB Database Name>" \
--collection "<MongoDB Collection name>" \
--uri "$MONGODB_URI" \
--database "$MONGODB_DATABASE" \
--collection "$MONGODB_COLLECTION" \
--output-dir mongodb-ingest-output \
--num-processes 2
```
2 changes: 1 addition & 1 deletion snippets/source_connectors/notion_api.sh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ unstructured-ingest \
--num-processes 2 \
--verbose \
--partition-by-api \
--api-key "<UNSTRUCTURED-API-KEY>"
--api-key "$UNSTRUCTURED_API_KEY"
```
2 changes: 1 addition & 1 deletion snippets/source_connectors/onedrive_api.sh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ unstructured-ingest \
--num-processes 2 \
--verbose \
--partition-by-api \
--api-key "<UNSTRUCTURED-API-KEY>"
--api-key "$UNSTRUCTURED_API_KEY"
```
2 changes: 1 addition & 1 deletion snippets/source_connectors/opensearch_api.sh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ unstructured-ingest \
--output-dir opensearch-ingest-output \
--num-processes 2 \
--partition-by-api \
--api-key "<UNSTRUCTURED-API-KEY>"
--api-key "$UNSTRUCTURED_API_KEY"
```
2 changes: 1 addition & 1 deletion snippets/source_connectors/outlook_api.sh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ unstructured-ingest \
--recursive \
--verbose \
--partition-by-api \
--api-key "<UNSTRUCTURED-API-KEY>"
--api-key "$UNSTRUCTURED_API_KEY"
```
6 changes: 4 additions & 2 deletions snippets/source_connectors/reddit.py.mdx
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
```python Python
import os

from unstructured.ingest.connector.reddit import RedditAccessConfig, SimpleRedditConfig
from unstructured.ingest.interfaces import PartitionConfig, ProcessorConfig, ReadConfig
from unstructured.ingest.runner import RedditRunner
Expand All @@ -14,10 +16,10 @@ if __name__ == "__main__":
partition_config=PartitionConfig(),
connector_config=SimpleRedditConfig(
access_config=RedditAccessConfig(
client_secret="<client secret here>",
client_secret=os.getenv("REDDIT_CLIENT_SECRET"),
),
subreddit_name="machinelearning",
client_id="<client id here>",
client_id=os.getenv("REDDIT_CLIENT_ID"),
user_agent=r"Unstructured Ingest Subreddit fetcher by \\u\...",
search_query="Unstructured",
num_posts=10,
Expand Down
4 changes: 2 additions & 2 deletions snippets/source_connectors/reddit.sh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
unstructured-ingest \
reddit \
--subreddit-name machinelearning \
--client-id "<client id here>" \
--client-secret "<client secret here>" \
--client-id "$REDDIT_CLIENT_ID" \
--client-secret "$REDDIT_CLIENT_SECRET" \
--user-agent "Unstructured Ingest Subreddit fetcher by \u\..." \
--search-query "Unstructured" \
--num-posts 10 \
Expand Down
Loading

0 comments on commit b36520c

Please sign in to comment.