Skip to content

Commit

Permalink
Merge branch 'main' into reformat-text-embedding-section
Browse files Browse the repository at this point in the history
  • Loading branch information
trentfowlercohere authored Jan 14, 2025
2 parents 8362d5a + 047f2cb commit df6c8e4
Show file tree
Hide file tree
Showing 22 changed files with 506 additions and 387 deletions.
12 changes: 8 additions & 4 deletions .github/scripts/check-mdx-frontmatter.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ const validators = [checkDescriptionLength, checkTitleLength];
// List of folders to exclude (relative to mdxDir)
const excludedFolders = ["-ARCHIVE-", "api-reference", "llm-university"];

function logInvalidMessage(message) {
console.error(`[INVALID]: ${message}`);
}

function shouldExcludeFolder(dirPath) {
return excludedFolders.some((excludedFolder) => {
return path.relative(mdxDir, dirPath).startsWith(excludedFolder);
Expand All @@ -28,7 +32,7 @@ async function shouldExcludeFile(filePath) {
const { data } = matter(fileContent);
return data.hidden === true;
} catch (error) {
console.error(`Error reading file "${filePath}":`, error);
console.error(`[ERROR]: Error reading file "${filePath}":`, error);
return false; // In case of error, don't exclude the file
}
}
Expand All @@ -40,14 +44,14 @@ async function checkDescriptionLength(filePath) {
const maxDescriptionLength = 160;

if (!data.description) {
console.error(`File "${filePath}" is missing a description.`);
logInvalidMessage(`File "${filePath}" is missing a description.`);
return false;
}

const descriptionLength = data.description.length;

if (descriptionLength < minDescriptionLength || descriptionLength > maxDescriptionLength) {
console.error(
logInvalidMessage(
`File "${filePath}" has an invalid description length: ${descriptionLength} characters. ` +
`Description should be between ${minDescriptionLength}-${maxDescriptionLength} characters.`
);
Expand All @@ -74,7 +78,7 @@ async function checkTitleLength(filePath) {
if (filesToExclude.includes(filePath)) {
return true;
}
console.error(`File "${filePath}" is missing a title.`);
logInvalidMessage(`File "${filePath}" is missing a title.`);
return false;
}

Expand Down
8 changes: 4 additions & 4 deletions fern/pages/cohere-api/about.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@ createdAt: 'Wed Sep 14 2022 16:37:41 GMT+0000 (Coordinated Universal Time)'
updatedAt: 'Fri Jun 14 2024 16:36:59 GMT+0000 (Coordinated Universal Time)'
---

The Cohere platform builds natural language processing and generation into your product with a few lines of code. Our large language models can solve a broad spectrum of natural language use cases, including classification, semantic search, paraphrasing, summarization, and content generation.
The Cohere platform allows you to leverage the power of [large language models](https://docs.cohere.com/v1/docs/introduction-to-large-language-models) (LLMs) with just a few lines of code and an [API key](https://dashboard.cohere.com/api-keys?_gl=1*14v2pj5*_gcl_au*NTczMTgyMTIzLjE3MzQ1NTY2OTA.*_ga*MTAxNTg1NTM1MS4xNjk1MjMwODQw*_ga_CRGS116RZS*MTczNjI3NzU2NS4xOS4xLjE3MzYyODExMTkuNDkuMC4w).

By [training a custom model](/docs/fine-tuning), users can customize large language models to their use case and trained on their data.
Our [Command](https://docs.cohere.com/v1/docs/command-r7b), [Embed](https://docs.cohere.com/v1/docs/cohere-embed), [Rerank](https://docs.cohere.com/v1/docs/rerank-2), and [Aya](https://docs.cohere.com/v1/docs/aya) models excel at a variety of applications, from the relatively simple ([semantic search](https://docs.cohere.com/v1/docs/semantic-search-embed), and [content generation](https://docs.cohere.com/v1/docs/introduction-to-text-generation-at-cohere)) to the more advanced ([retrieval augmented generation](https://docs.cohere.com/v1/docs/retrieval-augmented-generation-rag) and [agents](https://docs.cohere.com/v1/docs/multi-step-tool-use)). If you have a more specialized use case and custom data, you can also [train a custom model](https://docs.cohere.com/v1/docs/fine-tuning) to get better performance.

The models can be accessed through the [playground](https://dashboard.cohere.com/playground/chat), SDK, and the [CLI](/reference/command) tool.
Check out [our documentation](https://docs.cohere.com/v1/docs/the-cohere-platform) if you're ready to start building, and you might want to check out our [API pricing](https://docs.cohere.com/v1/docs/rate-limits).

## SDKs

We support SDKs in 4 different languages. Please see the following installation methods and snippets to get started.
The Cohere SDK is the primary way of accessing Cohere's models. We support SDKs in four different languages. To get started, please see the installation methods and code snippets below.

### Python

Expand Down
2 changes: 1 addition & 1 deletion fern/pages/models/rerank-2.mdx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
title: Rerank Model (Details and Application)
title: Cohere's Rerank Model (Details and Application)
slug: "docs/rerank-2"

hidden: false
Expand Down
168 changes: 115 additions & 53 deletions fern/pages/text-generation/prompt-engineering/command-r7b-hf.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -157,22 +157,24 @@ tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

# Define conversation input
conversation = [{"role": "user", "content": "What has Man always dreamed of?"}]
conversation = [
{"role": "user", "content": "What has Man always dreamed of?"}
]

# Define documents for retrieval-based generation
documents = [
{
"heading": "The Moon: Our Age-Old Foe",
"body": "Man has always dreamed of destroying the moon. In this essay, I shall..."
"heading": "The Moon: Our Age-Old Foe",
"body": "Man has always dreamed of destroying the moon. In this essay, I shall...",
},
{
"heading": "Love is all you need",
"body": "Man's dream has always been to find love. This profound lesson..."
"body": "Man's dream has always been to find love. This profound lesson...",
},
{
"heading": "The Sun: Our Age-Old Friend",
"body": "Although often underappreciated, the sun provides several notable benefits..."
}
"body": "Although often underappreciated, the sun provides several notable benefits...",
},
]

# Get the Grounded Generation prompt
Expand All @@ -181,14 +183,14 @@ input_prompt = tokenizer.apply_chat_template(
documents=documents,
tokenize=False,
add_generation_prompt=True,
return_tensors="pt"
return_tensors="pt",
)
print("== Grounded Generation prompt:", input_prompt)

# Tokenize the prompt
input_ids = tokenizer.encode_plus(input_prompt, return_tensors="pt")

# Generate a response
# Generate a response
gen_tokens = model.generate(
input_ids,
max_new_tokens=512,
Expand All @@ -200,7 +202,6 @@ gen_tokens = model.generate(
# Decode and print the generated text along with generation prompt
gen_text = tokenizer.decode(gen_tokens[0])
print(gen_text)

````
</Accordion>

Expand Down Expand Up @@ -319,44 +320,49 @@ tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

# Define conversation input
conversation = [{"role": "user", "content": "Can you provide a sales summary for 29th September 2023, and also give me some details about the products in the 'Electronics' category, for example their prices and stock levels?"}]
conversation = [
{
"role": "user",
"content": "Can you provide a sales summary for 29th September 2023, and also give me some details about the products in the 'Electronics' category, for example their prices and stock levels?",
}
]

# Define tools
tools = [
{
"type": "function",
"function": {
"name": "query_daily_sales_report",
"description": "Connects to a database to retrieve overall sales volumes and sales information for a given day.",
"parameters": {
"type": "object",
"properties": {
"day": {
"description": "Retrieves sales data for this day, formatted as YYYY-MM-DD.",
"type": "string",
}
},
"required": ["day"]
{
"type": "function",
"function": {
"name": "query_daily_sales_report",
"description": "Connects to a database to retrieve overall sales volumes and sales information for a given day.",
"parameters": {
"type": "object",
"properties": {
"day": {
"description": "Retrieves sales data for this day, formatted as YYYY-MM-DD.",
"type": "string",
}
},
}
},
{
"type": "function",
"function": {
"name": "query_product_catalog",
"description": "Connects to a a product catalog with information about all the products being sold, including categories, prices, and stock levels.",
"parameters": {
"type": "object",
"properties": {
"category": {
"description": "Retrieves product information data for all products in this category.",
"type": "string",
}
},
"required": ["category"]
"required": ["day"],
},
},
},
{
"type": "function",
"function": {
"name": "query_product_catalog",
"description": "Connects to a a product catalog with information about all the products being sold, including categories, prices, and stock levels.",
"parameters": {
"type": "object",
"properties": {
"category": {
"description": "Retrieves product information data for all products in this category.",
"type": "string",
}
},
}
}
"required": ["category"],
},
},
},
]

# Get the Tool Use prompt
Expand All @@ -373,7 +379,7 @@ print("== Prompt for step 1 of the Agent:", input_prompt)
# Tokenize the prompt
input_ids = tokenizer.encode_plus(input_prompt, return_tensors="pt")

# Generate a response
# Generate a response
gen_tokens = model.generate(
input_ids,
max_new_tokens=512,
Expand All @@ -383,7 +389,9 @@ gen_tokens = model.generate(
)

# Decode and print the generated text along with generation prompt
gen_text = tokenizer.decode(gen_tokens[0][len(input_ids[0]):], skip_special_tokens=True)
gen_text = tokenizer.decode(
gen_tokens[0][len(input_ids[0]) :], skip_special_tokens=True
)
print(gen_text)
````
</Accordion>
Expand Down Expand Up @@ -477,21 +485,73 @@ I can find the sales summary for 29th September 2023 as well as the details abou
If the model generates tool calls, you should add them to the chat history like so:

````python PYTHON
tool_call_0 = {"name": "query_daily_sales_report", "arguments": {"day": "2023-09-29"}}
tool_call_1 = {"name": "query_product_catalog", "arguments": {"category": "Electronics"}}
tool_call_0 = {
"name": "query_daily_sales_report",
"arguments": {"day": "2023-09-29"},
}
tool_call_1 = {
"name": "query_product_catalog",
"arguments": {"category": "Electronics"},
}
tool_plan = "I will use the 'query_daily_sales_report' tool to find the sales summary for 29th September 2023. I will then use the 'query_product_catalog' tool to find the details about the products in the 'Electronics' category."

conversation.append({"role": "assistant", "tool_calls": [{"id": "0", "type": "function", "function": tool_call_0}, {"id": "1", "type": "function", "function": tool_call_1}], "tool_plan": tool_plan})
conversation.append(
{
"role": "assistant",
"tool_calls": [
{"id": "0", "type": "function", "function": tool_call_0},
{"id": "1", "type": "function", "function": tool_call_1},
],
"tool_plan": tool_plan,
}
)
````

and then call the tool and append the result, with the tool role, like below. It is crucial to format the tool results as a dictionary:

````python PYTHON
api_response_query_daily_sales_report = {"date": "2023-09-29", "summary": "Total Sales Amount: 10000, Total Units Sold: 250"} # this needs to be a dictionary!!
api_response_query_product_catalog = {"category": "Electronics", "products": [{"product_id": "E1001", "name": "Smartphone", "price": 500, "stock_level": 20}, {"product_id": "E1002", "name": "Laptop", "price": 1000, "stock_level": 15}, {"product_id": "E1003", "name": "Tablet", "price": 300, "stock_level": 25}]} # this needs to be a dictionary!!
api_response_query_daily_sales_report = {
"date": "2023-09-29",
"summary": "Total Sales Amount: 10000, Total Units Sold: 250",
} # this needs to be a dictionary!!
api_response_query_product_catalog = {
"category": "Electronics",
"products": [
{
"product_id": "E1001",
"name": "Smartphone",
"price": 500,
"stock_level": 20,
},
{
"product_id": "E1002",
"name": "Laptop",
"price": 1000,
"stock_level": 15,
},
{
"product_id": "E1003",
"name": "Tablet",
"price": 300,
"stock_level": 25,
},
],
} # this needs to be a dictionary!!

conversation.append({"role": "tool", "tool_call_id": "0", "content": api_response_query_daily_sales_report})
conversation.append({"role": "tool", "tool_call_id": "1", "content": api_response_query_product_catalog})
conversation.append(
{
"role": "tool",
"tool_call_id": "0",
"content": api_response_query_daily_sales_report,
}
)
conversation.append(
{
"role": "tool",
"tool_call_id": "1",
"content": api_response_query_product_catalog,
}
)
````

After that, you can generate() again to let the model use the tool result in the chat.
Expand Down Expand Up @@ -519,7 +579,7 @@ print("== Prompt for step 2 of the Agent:", input_prompt)
# Tokenize the prompt
input_ids = tokenizer.encode_plus(input_prompt, return_tensors="pt")

# Generate a response
# Generate a response
gen_tokens = model.generate(
input_ids,
max_new_tokens=512,
Expand All @@ -528,7 +588,9 @@ gen_tokens = model.generate(
)

# Decode and print the generated text along with generation prompt
gen_text = tokenizer.decode(gen_tokens[0][len(input_ids[0]):], skip_special_tokens=True)
gen_text = tokenizer.decode(
gen_tokens[0][len(input_ids[0]) :], skip_special_tokens=True
)
print(gen_text)
````
</Accordion>
Expand Down
Loading

0 comments on commit df6c8e4

Please sign in to comment.