Merge branch 'main' into reformat-text-embedding-section

cohere-ai · Jan 14, 2025 · df6c8e4 · df6c8e4
2 parents 8362d5a + 047f2cb
commit df6c8e4
Show file tree

Hide file tree

Showing 22 changed files with 506 additions and 387 deletions.
diff --git a/.github/scripts/check-mdx-frontmatter.cjs b/.github/scripts/check-mdx-frontmatter.cjs
@@ -16,6 +16,10 @@ const validators = [checkDescriptionLength, checkTitleLength];
 // List of folders to exclude (relative to mdxDir)
 const excludedFolders = ["-ARCHIVE-", "api-reference", "llm-university"];
 
+function logInvalidMessage(message) {
+    console.error(`[INVALID]: ${message}`);
+}
+
 function shouldExcludeFolder(dirPath) {
   return excludedFolders.some((excludedFolder) => {
     return path.relative(mdxDir, dirPath).startsWith(excludedFolder);
@@ -28,7 +32,7 @@ async function shouldExcludeFile(filePath) {
     const { data } = matter(fileContent);
     return data.hidden === true;
   } catch (error) {
-    console.error(`Error reading file "${filePath}":`, error);
+    console.error(`[ERROR]: Error reading file "${filePath}":`, error);
     return false; // In case of error, don't exclude the file
   }
 }
@@ -40,14 +44,14 @@ async function checkDescriptionLength(filePath) {
   const maxDescriptionLength = 160;
 
   if (!data.description) {
-    console.error(`File "${filePath}" is missing a description.`);
+    logInvalidMessage(`File "${filePath}" is missing a description.`);
     return false;
   }
 
   const descriptionLength = data.description.length;
 
   if (descriptionLength < minDescriptionLength || descriptionLength > maxDescriptionLength) {
-    console.error(
+    logInvalidMessage(
       `File "${filePath}" has an invalid description length: ${descriptionLength} characters. ` +
       `Description should be between ${minDescriptionLength}-${maxDescriptionLength} characters.`
     );
@@ -74,7 +78,7 @@ async function checkTitleLength(filePath) {
         if (filesToExclude.includes(filePath)) {
             return true;
         }
-        console.error(`File "${filePath}" is missing a title.`);
+        logInvalidMessage(`File "${filePath}" is missing a title.`);
         return false;
     }
 

diff --git a/fern/pages/cohere-api/about.mdx b/fern/pages/cohere-api/about.mdx
@@ -10,15 +10,15 @@ createdAt: 'Wed Sep 14 2022 16:37:41 GMT+0000 (Coordinated Universal Time)'
 updatedAt: 'Fri Jun 14 2024 16:36:59 GMT+0000 (Coordinated Universal Time)'
 ---
 
-The Cohere platform builds natural language processing and generation into your product with a few lines of code. Our large language models can solve a broad spectrum of natural language use cases, including classification, semantic search, paraphrasing, summarization, and content generation.
+The Cohere platform allows you to leverage the power of [large language models](https://docs.cohere.com/v1/docs/introduction-to-large-language-models) (LLMs) with just a few lines of code and an [API key](https://dashboard.cohere.com/api-keys?_gl=1*14v2pj5*_gcl_au*NTczMTgyMTIzLjE3MzQ1NTY2OTA.*_ga*MTAxNTg1NTM1MS4xNjk1MjMwODQw*_ga_CRGS116RZS*MTczNjI3NzU2NS4xOS4xLjE3MzYyODExMTkuNDkuMC4w).
 
-By [training a custom model](/docs/fine-tuning), users can customize large language models to their use case and trained on their data.
+Our [Command](https://docs.cohere.com/v1/docs/command-r7b), [Embed](https://docs.cohere.com/v1/docs/cohere-embed), [Rerank](https://docs.cohere.com/v1/docs/rerank-2), and [Aya](https://docs.cohere.com/v1/docs/aya) models excel at a variety of applications, from the relatively simple ([semantic search](https://docs.cohere.com/v1/docs/semantic-search-embed), and [content generation](https://docs.cohere.com/v1/docs/introduction-to-text-generation-at-cohere)) to the more advanced ([retrieval augmented generation](https://docs.cohere.com/v1/docs/retrieval-augmented-generation-rag) and [agents](https://docs.cohere.com/v1/docs/multi-step-tool-use)). If you have a more specialized use case and custom data, you can also [train a custom model](https://docs.cohere.com/v1/docs/fine-tuning) to get better performance.
 
-The models can be accessed through the [playground](https://dashboard.cohere.com/playground/chat), SDK, and the [CLI](/reference/command) tool.
+Check out [our documentation](https://docs.cohere.com/v1/docs/the-cohere-platform) if you're ready to start building, and you might want to check out our [API pricing](https://docs.cohere.com/v1/docs/rate-limits).
 
 ## SDKs
 
-We support SDKs in 4 different languages. Please see the following installation methods and snippets to get started.
+The Cohere SDK is the primary way of accessing Cohere's models. We support SDKs in four different languages. To get started, please see the installation methods and code snippets below.
 
 ### Python
 

diff --git a/fern/pages/models/rerank-2.mdx b/fern/pages/models/rerank-2.mdx
@@ -1,5 +1,5 @@
 ---
-title: Rerank Model (Details and Application)
+title: Cohere's Rerank Model (Details and Application)
 slug: "docs/rerank-2"
 
 hidden: false

diff --git a/fern/pages/text-generation/prompt-engineering/command-r7b-hf.mdx b/fern/pages/text-generation/prompt-engineering/command-r7b-hf.mdx
@@ -157,22 +157,24 @@ tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(model_id)
 
 # Define conversation input
-conversation = [{"role": "user", "content": "What has Man always dreamed of?"}]
+conversation = [
+    {"role": "user", "content": "What has Man always dreamed of?"}
+]
 
 # Define documents for retrieval-based generation
 documents = [
     {
-        "heading": "The Moon: Our Age-Old Foe", 
-        "body": "Man has always dreamed of destroying the moon. In this essay, I shall..."
+        "heading": "The Moon: Our Age-Old Foe",
+        "body": "Man has always dreamed of destroying the moon. In this essay, I shall...",
     },
     {
         "heading": "Love is all you need",
-        "body": "Man's dream has always been to find love. This profound lesson..."
+        "body": "Man's dream has always been to find love. This profound lesson...",
     },
     {
         "heading": "The Sun: Our Age-Old Friend",
-        "body": "Although often underappreciated, the sun provides several notable benefits..."
-    }
+        "body": "Although often underappreciated, the sun provides several notable benefits...",
+    },
 ]
 
 # Get the Grounded Generation prompt
@@ -181,14 +183,14 @@ input_prompt = tokenizer.apply_chat_template(
     documents=documents,
     tokenize=False,
     add_generation_prompt=True,
-    return_tensors="pt"
+    return_tensors="pt",
 )
 print("== Grounded Generation prompt:", input_prompt)
 
 # Tokenize the prompt
 input_ids = tokenizer.encode_plus(input_prompt, return_tensors="pt")
 
-# Generate a response 
+# Generate a response
 gen_tokens = model.generate(
     input_ids,
     max_new_tokens=512,
@@ -200,7 +202,6 @@ gen_tokens = model.generate(
 # Decode and print the generated text along with generation prompt
 gen_text = tokenizer.decode(gen_tokens[0])
 print(gen_text)
-
 ````
 </Accordion>
 
@@ -319,44 +320,49 @@ tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(model_id)
 
 # Define conversation input
-conversation = [{"role": "user", "content": "Can you provide a sales summary for 29th September 2023, and also give me some details about the products in the 'Electronics' category, for example their prices and stock levels?"}]
+conversation = [
+    {
+        "role": "user",
+        "content": "Can you provide a sales summary for 29th September 2023, and also give me some details about the products in the 'Electronics' category, for example their prices and stock levels?",
+    }
+]
 
 # Define tools
 tools = [
-    { 
-"type": "function", 
-"function": {
-"name": "query_daily_sales_report",
-"description": "Connects to a database to retrieve overall sales volumes and sales information for a given day.",
-"parameters": {
-                    "type": "object",
-                    "properties": {
-                        "day": {
-                            "description": "Retrieves sales data for this day, formatted as YYYY-MM-DD.",
-                            "type": "string",
-                        }
-                    },
-                    "required": ["day"]
+    {
+        "type": "function",
+        "function": {
+            "name": "query_daily_sales_report",
+            "description": "Connects to a database to retrieve overall sales volumes and sales information for a given day.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "day": {
+                        "description": "Retrieves sales data for this day, formatted as YYYY-MM-DD.",
+                        "type": "string",
+                    }
                 },
-}
-    }, 
-    { 
-"type": "function", 
-"function": {
-"name": "query_product_catalog",
-"description": "Connects to a a product catalog with information about all the products being sold, including categories, prices, and stock levels.",
-"parameters": {
-                    "type": "object",
-                    "properties": {
-                        "category": {
-                            "description": "Retrieves product information data for all products in this category.",
-                            "type": "string",
-                        }
-                    },
-                    "required": ["category"]
+                "required": ["day"],
+            },
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "query_product_catalog",
+            "description": "Connects to a a product catalog with information about all the products being sold, including categories, prices, and stock levels.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "category": {
+                        "description": "Retrieves product information data for all products in this category.",
+                        "type": "string",
+                    }
                 },
-}
-    }
+                "required": ["category"],
+            },
+        },
+    },
 ]
 
 # Get the Tool Use prompt
@@ -373,7 +379,7 @@ print("== Prompt for step 1 of the Agent:", input_prompt)
 # Tokenize the prompt
 input_ids = tokenizer.encode_plus(input_prompt, return_tensors="pt")
 
-# Generate a response 
+# Generate a response
 gen_tokens = model.generate(
     input_ids,
     max_new_tokens=512,
@@ -383,7 +389,9 @@ gen_tokens = model.generate(
 )
 
 # Decode and print the generated text along with generation prompt
-gen_text = tokenizer.decode(gen_tokens[0][len(input_ids[0]):], skip_special_tokens=True)
+gen_text = tokenizer.decode(
+    gen_tokens[0][len(input_ids[0]) :], skip_special_tokens=True
+)
 print(gen_text)
 ````
 </Accordion>
@@ -477,21 +485,73 @@ I can find the sales summary for 29th September 2023 as well as the details abou
 If the model generates tool calls, you should add them to the chat history like so:
 
 ````python PYTHON
-tool_call_0 = {"name": "query_daily_sales_report", "arguments": {"day": "2023-09-29"}}
-tool_call_1 = {"name": "query_product_catalog", "arguments": {"category": "Electronics"}}
+tool_call_0 = {
+    "name": "query_daily_sales_report",
+    "arguments": {"day": "2023-09-29"},
+}
+tool_call_1 = {
+    "name": "query_product_catalog",
+    "arguments": {"category": "Electronics"},
+}
 tool_plan = "I will use the 'query_daily_sales_report' tool to find the sales summary for 29th September 2023. I will then use the 'query_product_catalog' tool to find the details about the products in the 'Electronics' category."
 
-conversation.append({"role": "assistant", "tool_calls": [{"id": "0", "type": "function", "function": tool_call_0}, {"id": "1", "type": "function", "function": tool_call_1}], "tool_plan": tool_plan})
+conversation.append(
+    {
+        "role": "assistant",
+        "tool_calls": [
+            {"id": "0", "type": "function", "function": tool_call_0},
+            {"id": "1", "type": "function", "function": tool_call_1},
+        ],
+        "tool_plan": tool_plan,
+    }
+)
 ````
 
 and then call the tool and append the result, with the tool role, like below. It is crucial to format the tool results as a dictionary:
 
 ````python PYTHON
-api_response_query_daily_sales_report = {"date": "2023-09-29", "summary": "Total Sales Amount: 10000, Total Units Sold: 250"} # this needs to be a dictionary!!
-api_response_query_product_catalog = {"category": "Electronics", "products": [{"product_id": "E1001", "name": "Smartphone", "price": 500, "stock_level": 20}, {"product_id": "E1002", "name": "Laptop", "price": 1000, "stock_level": 15}, {"product_id": "E1003", "name": "Tablet", "price": 300, "stock_level": 25}]} # this needs to be a dictionary!!
+api_response_query_daily_sales_report = {
+    "date": "2023-09-29",
+    "summary": "Total Sales Amount: 10000, Total Units Sold: 250",
+}  # this needs to be a dictionary!!
+api_response_query_product_catalog = {
+    "category": "Electronics",
+    "products": [
+        {
+            "product_id": "E1001",
+            "name": "Smartphone",
+            "price": 500,
+            "stock_level": 20,
+        },
+        {
+            "product_id": "E1002",
+            "name": "Laptop",
+            "price": 1000,
+            "stock_level": 15,
+        },
+        {
+            "product_id": "E1003",
+            "name": "Tablet",
+            "price": 300,
+            "stock_level": 25,
+        },
+    ],
+}  # this needs to be a dictionary!!
 
-conversation.append({"role": "tool", "tool_call_id": "0", "content": api_response_query_daily_sales_report})
-conversation.append({"role": "tool", "tool_call_id": "1", "content": api_response_query_product_catalog})
+conversation.append(
+    {
+        "role": "tool",
+        "tool_call_id": "0",
+        "content": api_response_query_daily_sales_report,
+    }
+)
+conversation.append(
+    {
+        "role": "tool",
+        "tool_call_id": "1",
+        "content": api_response_query_product_catalog,
+    }
+)
 ````
 
 After that, you can generate() again to let the model use the tool result in the chat.
@@ -519,7 +579,7 @@ print("== Prompt for step 2 of the Agent:", input_prompt)
 # Tokenize the prompt
 input_ids = tokenizer.encode_plus(input_prompt, return_tensors="pt")
 
-# Generate a response 
+# Generate a response
 gen_tokens = model.generate(
     input_ids,
     max_new_tokens=512,
@@ -528,7 +588,9 @@ gen_tokens = model.generate(
 )
 
 # Decode and print the generated text along with generation prompt
-gen_text = tokenizer.decode(gen_tokens[0][len(input_ids[0]):], skip_special_tokens=True)
+gen_text = tokenizer.decode(
+    gen_tokens[0][len(input_ids[0]) :], skip_special_tokens=True
+)
 print(gen_text)
 ````
 </Accordion>