Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

reformat python code samples #380

Merged
merged 2 commits into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 54 additions & 43 deletions fern/pages/-ARCHIVE-/old-tutorials/semantic-search.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ You can find the code in the <a target="_blank" href="https://github.com/cohere-
### 1. Download the Dependencies

```python PYTHON
# Install Cohere for embeddings, Umap to reduce embeddings to 2 dimensions,
# Install Cohere for embeddings, Umap to reduce embeddings to 2 dimensions,
# Altair for visualization, Annoy for approximate nearest neighbor search
pip install cohere umap-learn altair annoy datasets tqdm
!pip install cohere umap-learn altair annoy datasets tqdm
trentfowlercohere marked this conversation as resolved.
Show resolved Hide resolved
```

And if you're running an older version of the SDK, you might need to upgrade it like so:
Expand All @@ -63,7 +63,7 @@ Get your Cohere API key by [signing up here](https://os.cohere.ai/register). Pas
#### 1a. Import the Necessary Dependencies to Run this Example

```python PYTHON
#title Import libraries (Run this cell to execute required code) {display-mode: "form"}
# title Import libraries (Run this cell to execute required code) {display-mode: "form"}

import cohere
import numpy as np
Expand All @@ -76,8 +76,9 @@ import altair as alt
from sklearn.metrics.pairwise import cosine_similarity
from annoy import AnnoyIndex
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_colwidth', None)

warnings.filterwarnings("ignore")
pd.set_option("display.max_colwidth", None)
```

### 2. Get the Archive of Questions
Expand Down Expand Up @@ -128,9 +129,11 @@ input_type_embed = "search_document"
co = cohere.Client(api_key)

# Get the embeddings
embeds = co.embed(texts=list(df['text']),
model=model_name,
input_type=input_type_embed).embeddings
embeds = co.embed(
texts=list(df["text"]),
model=model_name,
input_type=input_type_embed,
).embeddings
```

### 4. Build the Index, search Using an Index and Conduct Nearest Neighbour Search
Expand All @@ -141,13 +144,13 @@ Let's build an index using the library called annoy. Annoy is a library created

```python PYTHON
# Create the search index, pass the size of embedding
search_index = AnnoyIndex(np.array(embeds).shape[1], 'angular')
search_index = AnnoyIndex(np.array(embeds).shape[1], "angular")

# Add all the vectors to the search index
for i in range(len(embeds)):
search_index.add_item(i, embeds[i])
search_index.build(10) # 10 trees
search_index.save('test.ann')
search_index.build(10) # 10 trees
search_index.save("test.ann")
```

After building the index, we can use it to retrieve the nearest neighbours either of existing questions (section 3.1), or of new questions that we embed (section 3.2).
Expand All @@ -161,15 +164,21 @@ If we're only interested in measuring the similarities between the questions in
example_id = 92

# Retrieve nearest neighbors
similar_item_ids = search_index.get_nns_by_item(example_id,10,
include_distances=True)
similar_item_ids = search_index.get_nns_by_item(
example_id, 10, include_distances=True
)

# Format and print the text and distances
results = pd.DataFrame(data={'texts': df.iloc[similar_item_ids[0]]['text'],
'distance': similar_item_ids[1]}).drop(example_id)

results = pd.DataFrame(
data={
"texts": df.iloc[similar_item_ids[0]]["text"],
"distance": similar_item_ids[1],
}
).drop(example_id)

# NOTE: Your results might look slightly different to ours.
print(f"Question:'{df.iloc[example_id]['text']}'\nNearest neighbors:")
print(results) # NOTE: Your results might look slightly different to ours.
print(results)
```

```
Expand Down Expand Up @@ -199,20 +208,26 @@ query = "What is the tallest mountain in the world?"
input_type_query = "search_query"

# Get the query's embedding
query_embed = co.embed(texts=[query],
model=model_name,
input_type=input_type_query).embeddings
query_embed = co.embed(
texts=[query], model=model_name, input_type=input_type_query
).embeddings

# Retrieve the nearest neighbors
similar_item_ids = search_index.get_nns_by_vector(query_embed[0],10,
include_distances=True)
similar_item_ids = search_index.get_nns_by_vector(
query_embed[0], 10, include_distances=True
)
# Format the results
query_results = pd.DataFrame(data={'texts': df.iloc[similar_item_ids[0]]['text'],
'distance': similar_item_ids[1]})
query_results = pd.DataFrame(
data={
"texts": df.iloc[similar_item_ids[0]]["text"],
"distance": similar_item_ids[1],
}
)


# NOTE: Your results might look slightly different to ours.
print(f"Query:'{query}'\nNearest neighbors:")
print(query_results) # NOTE: Your results might look slightly different to ours.
print(query_results)
```

| | texts | distance |
Expand All @@ -233,32 +248,28 @@ print(query_results) # NOTE: Your results might look slightly different to ours.
Use the code below to create a visualization of the embedded archive. As written, this code will only run in a jupyter notebook.

```python PYTHON
#@title Plot the archive {display-mode: "form"}
# @title Plot the archive {display-mode: "form"}

# UMAP reduces the dimensions from 1024 to 2 dimensions that we can plot
reducer = umap.UMAP(n_neighbors=20)
reducer = umap.UMAP(n_neighbors=20)
umap_embeds = reducer.fit_transform(embeds)

# Prepare the data to plot and interactive visualization
# using Altair
df_explore = pd.DataFrame(data={'text': df['text']})
df_explore['x'] = umap_embeds[:,0]
df_explore['y'] = umap_embeds[:,1]
df_explore = pd.DataFrame(data={"text": df["text"]})
df_explore["x"] = umap_embeds[:, 0]
df_explore["y"] = umap_embeds[:, 1]

# Plot
chart = alt.Chart(df_explore).mark_circle(size=60).encode(
x=#'x',
alt.X('x',
scale=alt.Scale(zero=False)
),
y=
alt.Y('y',
scale=alt.Scale(zero=False)
),
tooltip=['text']
).properties(
width=700,
height=400
chart = (
alt.Chart(df_explore)
.mark_circle(size=60)
.encode(
x=alt.X("x", scale=alt.Scale(zero=False)), #'x',
y=alt.Y("y", scale=alt.Scale(zero=False)),
tooltip=["text"],
)
.properties(width=700, height=400)
)
chart.interactive()
```
Expand Down
38 changes: 22 additions & 16 deletions fern/pages/deployment-options/cohere-on-aws/amazon-bedrock.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -45,24 +45,29 @@ co = cohere.BedrockClient(
)

# Input parameters for embed. In this example we are embedding hacker news post titles.
texts = ["Interesting (Non software) books?",
"Non-tech books that have helped you grow professionally?",
"I sold my company last month for $5m. What do I do with the money?",
"How are you getting through (and back from) burning out?",
"I made $24k over the last month. Now what?",
"What kind of personal financial investment do you do?",
"Should I quit the field of software development?"]
texts = [
"Interesting (Non software) books?",
"Non-tech books that have helped you grow professionally?",
"I sold my company last month for $5m. What do I do with the money?",
"How are you getting through (and back from) burning out?",
"I made $24k over the last month. Now what?",
"What kind of personal financial investment do you do?",
"Should I quit the field of software development?",
]
input_type = "clustering"
truncate = "NONE" # optional
model_id = "cohere.embed-english-v3" # or "cohere.embed-multilingual-v3"
truncate = "NONE" # optional
model_id = (
"cohere.embed-english-v3" # or "cohere.embed-multilingual-v3"
)


# Invoke the model and print the response
result = co.embed(
model=model_id,
input_type=input_type,
texts=texts,
truncate=truncate) # aws_client.invoke_model(**params)
model=model_id,
input_type=input_type,
texts=texts,
truncate=truncate,
) # aws_client.invoke_model(**params)

print(result)
```
Expand All @@ -81,9 +86,10 @@ co = cohere.BedrockClient(
aws_session_token="...",
)

result = co.chat(message="Write a LinkedIn post about starting a career in tech:",
model='cohere.command-r-plus-v1:0' # or 'cohere.command-r-v1:0'
)
result = co.chat(
message="Write a LinkedIn post about starting a career in tech:",
model="cohere.command-r-plus-v1:0", # or 'cohere.command-r-v1:0'
)

print(result)
```
Original file line number Diff line number Diff line change
Expand Up @@ -55,24 +55,27 @@ co = cohere.SageMakerClient(
)

# Input parameters for embed. In this example we are embedding hacker news post titles.
texts = ["Interesting (Non software) books?",
"Non-tech books that have helped you grow professionally?",
"I sold my company last month for $5m. What do I do with the money?",
"How are you getting through (and back from) burning out?",
"I made $24k over the last month. Now what?",
"What kind of personal financial investment do you do?",
"Should I quit the field of software development?"]
texts = [
"Interesting (Non software) books?",
"Non-tech books that have helped you grow professionally?",
"I sold my company last month for $5m. What do I do with the money?",
"How are you getting through (and back from) burning out?",
"I made $24k over the last month. Now what?",
"What kind of personal financial investment do you do?",
"Should I quit the field of software development?",
]
input_type = "clustering"
truncate = "NONE" # optional
model_id = "<YOUR ENDPOINT NAME>" # On SageMaker, you create a model name that you'll pass here.
truncate = "NONE" # optional
model_id = "<YOUR ENDPOINT NAME>" # On SageMaker, you create a model name that you'll pass here.


# Invoke the model and print the response
result = co.embed(
model=model_id,
input_type=input_type,
texts=texts,
truncate=truncate)
model=model_id,
input_type=input_type,
texts=texts,
truncate=truncate,
)

print(result)
```
Expand Down
31 changes: 20 additions & 11 deletions fern/pages/deployment-options/cohere-on-microsoft-azure.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,19 @@ data = {
body = str.encode(json.dumps(data))

# Replace the url with your API endpoint
url = "https://your-endpoint.inference.ai.azure.com/v1/chat/completions"
url = (
"https://your-endpoint.inference.ai.azure.com/v1/chat/completions"
)

# Replace this with the key for the endpoint
api_key = "your-auth-key"
if not api_key:
raise Exception("API Key is missing")

headers = {"Content-Type": "application/json", "Authorization": (api_key)}
headers = {
"Content-Type": "application/json",
"Authorization": (api_key),
}

req = urllib.request.Request(url, body, headers)

Expand Down Expand Up @@ -111,9 +116,7 @@ import urllib.request
import json

# Configure payload data sending to API endpoint
data = {
"input": ["hi"]
}
data = {"input": ["hi"]}

body = str.encode(json.dumps(data))

Expand All @@ -125,7 +128,10 @@ api_key = "your-auth-key"
if not api_key:
raise Exception("API Key is missing")

headers = {"Content-Type": "application/json", "Authorization": (api_key)}
headers = {
"Content-Type": "application/json",
"Authorization": (api_key),
}

req = urllib.request.Request(url, body, headers)

Expand All @@ -149,7 +155,7 @@ import cohere

co = cohere.Client(
base_url="https://<endpoint>.<region>.inference.ai.azure.com/v1",
api_key="<key>"
api_key="<key>",
)

documents = [
Expand Down Expand Up @@ -214,19 +220,19 @@ import cohere
# For Command models
co_chat = cohere.Client(
api_key="AZURE_INFERENCE_CREDENTIAL",
base_url="AZURE_MODEL_ENDPOINT", # Example - https://Cohere-command-r-plus-08-2024-xyz.eastus.models.ai.azure.com/
base_url="AZURE_MODEL_ENDPOINT", # Example - https://Cohere-command-r-plus-08-2024-xyz.eastus.models.ai.azure.com/
)

# For Embed models
co_embed = cohere.Client(
api_key="AZURE_INFERENCE_CREDENTIAL",
base_url="AZURE_MODEL_ENDPOINT", # Example - hhttps://cohere-embed-v3-multilingual-xyz.eastus.models.ai.azure.com/
base_url="AZURE_MODEL_ENDPOINT", # Example - hhttps://cohere-embed-v3-multilingual-xyz.eastus.models.ai.azure.com/
)

# For Rerank models
co_rerank = cohere.Client(
api_key="AZURE_INFERENCE_CREDENTIAL",
base_url="AZURE_MODEL_ENDPOINT", # Example - hhttps://cohere-rerank-v3-multilingual-xyz.eastus.models.ai.azure.com/
base_url="AZURE_MODEL_ENDPOINT", # Example - hhttps://cohere-rerank-v3-multilingual-xyz.eastus.models.ai.azure.com/
)
```

Expand Down Expand Up @@ -286,7 +292,10 @@ faqs_short = [
query = "Are there fitness-related perks?"

results = co_rerank.rerank(
query=query, documents=faqs_short, top_n=2, model="rerank-english-v3.0"
query=query,
documents=faqs_short,
top_n=2,
model="rerank-english-v3.0",
)
```

Expand Down
9 changes: 4 additions & 5 deletions fern/pages/deployment-options/cohere-works-everywhere.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,7 @@ const cohere = new CohereClient({
```python PYTHON
import cohere

co = cohere.Client(api_key="",
base_url="<YOUR_DEPLOYMENT_URL>")
co = cohere.Client(api_key="", base_url="<YOUR_DEPLOYMENT_URL>")

response = co.chat(
chat_history=[
Expand Down Expand Up @@ -320,7 +319,7 @@ co = cohere.BedrockClient(
)

response = co.chat(
model="cohere.command-r-plus-v1:0",
model="cohere.command-r-plus-v1:0",
chat_history=[
{"role": "USER", "message": "Who discovered gravity?"},
{
Expand Down Expand Up @@ -513,8 +512,8 @@ const cohere = new CohereClient({
import cohere

co = cohere.Client(
api_key="<azure token>",
base_url="https://Cohere-command-r-plus-phulf-serverless.eastus2.inference.ai.azure.com/v1",
api_key="<azure token>",
base_url="https://Cohere-command-r-plus-phulf-serverless.eastus2.inference.ai.azure.com/v1",
)

response = co.chat(
Expand Down
Loading
Loading