Skip to content

Commit

Permalink
Merge branch 'langchain-ai:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
lvliang-intel authored Mar 1, 2024
2 parents 96fe6cc + 0d7fb5f commit f259e2d
Show file tree
Hide file tree
Showing 316 changed files with 13,677 additions and 3,243 deletions.
9 changes: 6 additions & 3 deletions .github/scripts/check_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@

LANGCHAIN_DIRS = [
"libs/core",
"libs/text-splitters",
"libs/community",
"libs/langchain",
"libs/experimental",
"libs/community",
]

if __name__ == "__main__":
Expand Down Expand Up @@ -48,9 +49,11 @@
dirs_to_run["extended-test"].add(dir_)
elif file.startswith("libs/partners"):
partner_dir = file.split("/")[2]
if os.path.isdir(f"libs/partners/{partner_dir}"):
if os.path.isdir(f"libs/partners/{partner_dir}") and os.listdir(
f"libs/partners/{partner_dir}"
) != ["README.md"]:
dirs_to_run["test"].add(f"libs/partners/{partner_dir}")
# Skip if the directory was deleted
# Skip if the directory was deleted or is just a tombstone readme
elif file.startswith("libs/"):
raise ValueError(
f"Unknown lib: {file}. check_diff.py likely needs "
Expand Down
2 changes: 1 addition & 1 deletion .github/scripts/get_min_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from packaging.version import parse as parse_version
import re

MIN_VERSION_LIBS = ["langchain-core", "langchain-community", "langchain"]
MIN_VERSION_LIBS = ["langchain-core", "langchain-community", "langchain", "langchain-text-splitters"]


def get_min_version(version: str) -> str:
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/_integration_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ jobs:
ES_CLOUD_ID: ${{ secrets.ES_CLOUD_ID }}
ES_API_KEY: ${{ secrets.ES_API_KEY }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # for airbyte
MONGODB_ATLAS_URI: ${{ secrets.MONGODB_ATLAS_URI }}
run: |
make integration_tests
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ jobs:
ES_CLOUD_ID: ${{ secrets.ES_CLOUD_ID }}
ES_API_KEY: ${{ secrets.ES_API_KEY }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # for airbyte
MONGODB_ATLAS_URI: ${{ secrets.MONGODB_ATLAS_URI }}
run: make integration_tests
working-directory: ${{ inputs.working-directory }}

Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/api_doc_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ jobs:
run: |
poetry run python -m pip install --upgrade --no-cache-dir pip setuptools
poetry run python -m pip install --upgrade --no-cache-dir sphinx readthedocs-sphinx-ext
poetry run python -m pip install ./libs/partners/*
# skip airbyte and ibm due to pandas dependency issue
poetry run python -m pip install $(ls ./libs/partners | grep -vE "airbyte|ibm" | xargs -I {} echo "./libs/partners/{}")
poetry run python -m pip install --exists-action=w --no-cache-dir -r docs/api_reference/requirements.txt
- name: Build docs
Expand Down
2 changes: 1 addition & 1 deletion cookbook/Multi_modal_RAG.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"from unstructured.partition.pdf import partition_pdf\n",
"\n",
"\n",
Expand Down
2 changes: 1 addition & 1 deletion cookbook/advanced_rag_eval.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
"pdf_pages = loader.load()\n",
"\n",
"# Split\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n",
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
"all_splits_pypdf = text_splitter.split_documents(pdf_pages)\n",
Expand Down
2 changes: 1 addition & 1 deletion cookbook/agent_vectorstore.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@
"outputs": [],
"source": [
"from langchain.chains import RetrievalQA\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.vectorstores import Chroma\n",
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n",
"llm = OpenAI(temperature=0)"
]
Expand Down
2 changes: 1 addition & 1 deletion cookbook/autogpt/marathon_times.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,8 @@
" BaseCombineDocumentsChain,\n",
" load_qa_with_sources_chain,\n",
")\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain.tools import BaseTool, DuckDuckGoSearchRun\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"from pydantic import Field\n",
"\n",
"\n",
Expand Down
4 changes: 2 additions & 2 deletions cookbook/code-analysis-deeplake.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"source": [
"1. Prepare data:\n",
" 1. Upload all python project files using the `langchain_community.document_loaders.TextLoader`. We will call these files the **documents**.\n",
" 2. Split all documents to chunks using the `langchain.text_splitter.CharacterTextSplitter`.\n",
" 2. Split all documents to chunks using the `langchain_text_splitters.CharacterTextSplitter`.\n",
" 3. Embed chunks and upload them into the DeepLake using `langchain.embeddings.openai.OpenAIEmbeddings` and `langchain_community.vectorstores.DeepLake`\n",
"2. Question-Answering:\n",
" 1. Build a chain from `langchain.chat_models.ChatOpenAI` and `langchain.chains.ConversationalRetrievalChain`\n",
Expand Down Expand Up @@ -621,7 +621,7 @@
}
],
"source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"texts = text_splitter.split_documents(docs)\n",
Expand Down
6 changes: 3 additions & 3 deletions cookbook/deeplake_semantic_search_over_chat.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,12 @@
"import os\n",
"\n",
"from langchain.chains import RetrievalQA\n",
"from langchain.text_splitter import (\n",
"from langchain_community.vectorstores import DeepLake\n",
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import (\n",
" CharacterTextSplitter,\n",
" RecursiveCharacterTextSplitter,\n",
")\n",
"from langchain_community.vectorstores import DeepLake\n",
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
"activeloop_token = getpass.getpass(\"Activeloop Token:\")\n",
Expand Down
2 changes: 1 addition & 1 deletion cookbook/fireworks_rag.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@
"data = loader.load()\n",
"\n",
"# Split\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n",
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)\n",
"all_splits = text_splitter.split_documents(data)\n",
Expand Down
2 changes: 1 addition & 1 deletion cookbook/hypothetical_document_embeddings.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,8 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.vectorstores import Chroma\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n",
"with open(\"../../state_of_the_union.txt\") as f:\n",
" state_of_the_union = f.read()\n",
Expand Down
2 changes: 1 addition & 1 deletion cookbook/nomic_embedding_rag.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n",
"text_splitter = CharacterTextSplitter.from_tiktoken_encoder(\n",
" chunk_size=7500, chunk_overlap=100\n",
Expand Down
4 changes: 2 additions & 2 deletions cookbook/openai_functions_retrieval_qa.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@
"outputs": [],
"source": [
"from langchain.chains import RetrievalQA\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import Chroma\n",
"from langchain_openai import OpenAIEmbeddings"
"from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions cookbook/qianfan_baidu_elasticesearch_RAG.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,13 @@
"from baidubce.auth.bce_credentials import BceCredentials\n",
"from baidubce.bce_client_configuration import BceClientConfiguration\n",
"from langchain.chains.retrieval_qa import RetrievalQA\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_community.document_loaders.baiducloud_bos_directory import (\n",
" BaiduBOSDirectoryLoader,\n",
")\n",
"from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings\n",
"from langchain_community.llms.baidu_qianfan_endpoint import QianfanLLMEndpoint\n",
"from langchain_community.vectorstores import BESVectorStore"
"from langchain_community.vectorstores import BESVectorStore\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter"
]
},
{
Expand Down
6 changes: 3 additions & 3 deletions cookbook/rag_with_quantized_embeddings.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,16 @@
"from bs4 import BeautifulSoup as Soup\n",
"from langchain.retrievers.multi_vector import MultiVectorRetriever\n",
"from langchain.storage import InMemoryByteStore, LocalFileStore\n",
"\n",
"# For our example, we'll load docs from the web\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter # noqa\n",
"from langchain_community.document_loaders.recursive_url_loader import (\n",
" RecursiveUrlLoader,\n",
")\n",
"\n",
"# noqa\n",
"from langchain_community.vectorstores import Chroma\n",
"\n",
"# For our example, we'll load docs from the web\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter # noqa\n",
"\n",
"DOCSTORE_DIR = \".\"\n",
"DOCSTORE_ID_KEY = \"doc_id\""
]
Expand Down
2 changes: 1 addition & 1 deletion cookbook/sales_agent_with_context.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,11 @@
"from langchain.chains.base import Chain\n",
"from langchain.prompts import PromptTemplate\n",
"from langchain.prompts.base import StringPromptTemplate\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.llms import BaseLLM\n",
"from langchain_community.vectorstores import Chroma\n",
"from langchain_core.agents import AgentAction, AgentFinish\n",
"from langchain_openai import ChatOpenAI, OpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"from pydantic import BaseModel, Field"
]
},
Expand Down
2 changes: 1 addition & 1 deletion cookbook/together_ai.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
"data = loader.load()\n",
"\n",
"# Split\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n",
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)\n",
"all_splits = text_splitter.split_documents(data)\n",
Expand Down
2 changes: 1 addition & 1 deletion cookbook/twitter-the-algorithm-analysis-deeplake.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2610,7 +2610,7 @@
}
],
"source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"texts = text_splitter.split_documents(docs)"
Expand Down
1 change: 1 addition & 0 deletions docs/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/.quarto/
9 changes: 9 additions & 0 deletions docs/docs/additional_resources/tutorials.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Below are links to tutorials and courses on LangChain. For written guides on com
[LangChain Cheatsheet](https://pub.towardsai.net/langchain-cheatsheet-all-secrets-on-a-single-page-8be26b721cde) by **Ivan Reznikov**

### Short Tutorials

[LangChain Explained in 13 Minutes | QuickStart Tutorial for Beginners](https://youtu.be/aywZrzNaKjs) by [Rabbitmetrics](https://www.youtube.com/@rabbitmetrics)

[LangChain Crash Course: Build an AutoGPT app in 25 minutes](https://youtu.be/MlK6SIjcjE8) by [Nicholas Renotte](https://www.youtube.com/@NicholasRenotte)
Expand All @@ -33,6 +34,14 @@ Below are links to tutorials and courses on LangChain. For written guides on com

[LangChain 101 Course](https://medium.com/@ivanreznikov/langchain-101-course-updated-668f7b41d6cb) by **Ivan Reznikov**

### Code Alongs

DataCamp has developed a [Become a Generative AI Developer series](https://www.datacamp.com/ai-code-alongs) featuring 9 free code-alongs, including ones on building chatbots using LangChain and the OpenAI and Pinecone APIs. When you start a code along, you are launched into a fully configured notebook environment with an expert-led video to guide you through the project.

[Prompt Engineering with GPT & LangChain](https://www.datacamp.com/code-along/prompt-engineering-gpt-langchain)

[Retrieval Augmented Generation with the OpenAI API & Pinecone]https://www.datacamp.com/code-along/retrieval-augmented-generation-openai-api-pinecone

## Tutorials

### [LangChain for Gen AI and LLMs](https://www.youtube.com/playlist?list=PLIUOU7oqGTLieV9uTIFMm6_4PXg-hlN6F) by [James Briggs](https://www.youtube.com/@jamesbriggs)
Expand Down
4 changes: 2 additions & 2 deletions docs/docs/get_started/quickstart.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ Then we can build our index:

```python
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_text_splitters import RecursiveCharacterTextSplitter


text_splitter = RecursiveCharacterTextSplitter()
Expand Down Expand Up @@ -531,7 +531,7 @@ from langchain_openai import ChatOpenAI
from langchain_community.document_loaders import WebBaseLoader
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.tools.retriever import create_retriever_tool
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_openai import ChatOpenAI
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -643,9 +643,9 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_community.vectorstores import FAISS\n",
"from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n",
"# 2. Load the data: In our case data's already loaded\n",
"# 3. Anonymize the data before indexing\n",
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/callbacks/confident.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -215,10 +215,10 @@
"source": [
"import requests\n",
"from langchain.chains import RetrievalQA\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import Chroma\n",
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n",
"text_file_url = \"https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt\"\n",
"\n",
Expand Down
5 changes: 5 additions & 0 deletions docs/docs/integrations/document_loaders/airbyte.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@
"id": "3dd92c62",
"metadata": {},
"source": [
"Note: Currently, the `airbyte` library does not support Pydantic v2.\n",
"Please downgrade to Pydantic v1 to use this package.\n",
"\n",
"Note: This package also currently requires Python 3.10+.\n",
"\n",
"## Loading Documents\n",
"\n",
"By default, the `AirbyteLoader` will load any structured data from a stream and output yaml-formatted documents."
Expand Down
4 changes: 2 additions & 2 deletions docs/docs/integrations/document_loaders/psychic.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@
"outputs": [],
"source": [
"from langchain.chains import RetrievalQAWithSourcesChain\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.vectorstores import Chroma\n",
"from langchain_openai import OpenAI, OpenAIEmbeddings"
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
]
},
{
Expand Down
7 changes: 4 additions & 3 deletions docs/docs/integrations/document_loaders/source_code.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@
"warnings.filterwarnings(\"ignore\")\n",
"from pprint import pprint\n",
"\n",
"from langchain.text_splitter import Language\n",
"from langchain_community.document_loaders.generic import GenericLoader\n",
"from langchain_community.document_loaders.parsers import LanguageParser"
"from langchain_community.document_loaders.parsers import LanguageParser\n",
"from langchain_text_splitters import Language"
]
},
{
Expand Down Expand Up @@ -323,7 +323,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.text_splitter import (\n",
"from langchain_text_splitters import (\n",
" Language,\n",
" RecursiveCharacterTextSplitter,\n",
")"
Expand Down Expand Up @@ -426,6 +426,7 @@
},
{
"cell_type": "markdown",
"id": "7fb27b941602401d91542211134fc71a",
"metadata": {},
"source": [
"## Adding Languages using Tree-sitter Template\n",
Expand Down
4 changes: 2 additions & 2 deletions docs/docs/integrations/document_loaders/youtube_audio.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -168,9 +168,9 @@
"outputs": [],
"source": [
"from langchain.chains import RetrievalQA\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_community.vectorstores import FAISS\n",
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings"
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/llms/llm_caching.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1463,7 +1463,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n",
"text_splitter = CharacterTextSplitter()"
]
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/llms/manifest.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@
"# Map reduce example\n",
"from langchain.chains.mapreduce import MapReduceChain\n",
"from langchain.prompts import PromptTemplate\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n",
"_prompt = \"\"\"Write a concise summary of the following:\n",
"\n",
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/platforms/openai.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ for OpenAI LLMs.

You can also use it to count tokens when splitting documents with
```python
from langchain.text_splitter import CharacterTextSplitter
from langchain_text_splitters import CharacterTextSplitter
CharacterTextSplitter.from_tiktoken_encoder(...)
```
For a more detailed walkthrough of this, see [this notebook](/docs/modules/data_connection/document_transformers/split_by_token#tiktoken)
Expand Down
Loading

0 comments on commit f259e2d

Please sign in to comment.