diff --git a/graph_rag/KG_web/KG_web_example.ipynb b/graph_rag/KG_web/KG_web_example.ipynb new file mode 100644 index 0000000..11dbd91 --- /dev/null +++ b/graph_rag/KG_web/KG_web_example.ipynb @@ -0,0 +1,460 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "CY4Kfo8rfhC7", + "outputId": "de06c71c-c45d-4d08-8058-dec5ba37addd" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Cloning into 'Project-Explainer'...\n", + "remote: Enumerating objects: 206, done.\u001b[K\n", + "remote: Counting objects: 100% (73/73), done.\u001b[K\n", + "remote: Compressing objects: 100% (61/61), done.\u001b[K\n", + "remote: Total 206 (delta 21), reused 35 (delta 11), pack-reused 133 (from 1)\u001b[K\n", + "Receiving objects: 100% (206/206), 12.04 MiB | 18.29 MiB/s, done.\n", + "Resolving deltas: 100% (67/67), done.\n" + ] + } + ], + "source": [ + "!git clone https://github.com/AsH1605/Project-Explainer" + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install requests beautifulsoup4 llama-index" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OgmEGhxtf6Ut", + "outputId": "2cb74c2f-6806-43cb-8ecd-fa70553842b1" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (2.32.3)\n", + "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (4.12.3)\n", + "Collecting llama-index\n", + " Downloading llama_index-0.11.22-py3-none-any.whl.metadata (11 kB)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests) (3.4.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests) (2.2.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests) (2024.8.30)\n", + "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4) (2.6)\n", + "Collecting llama-index-agent-openai<0.4.0,>=0.3.4 (from llama-index)\n", + " Downloading llama_index_agent_openai-0.3.4-py3-none-any.whl.metadata (728 bytes)\n", + "Collecting llama-index-cli<0.4.0,>=0.3.1 (from llama-index)\n", + " Downloading llama_index_cli-0.3.1-py3-none-any.whl.metadata (1.5 kB)\n", + "Collecting llama-index-core<0.12.0,>=0.11.22 (from llama-index)\n", + " Downloading llama_index_core-0.11.22-py3-none-any.whl.metadata (2.4 kB)\n", + "Collecting llama-index-embeddings-openai<0.3.0,>=0.2.4 (from llama-index)\n", + " Downloading llama_index_embeddings_openai-0.2.5-py3-none-any.whl.metadata (686 bytes)\n", + "Collecting llama-index-indices-managed-llama-cloud>=0.3.0 (from llama-index)\n", + " Downloading llama_index_indices_managed_llama_cloud-0.4.0-py3-none-any.whl.metadata (3.8 kB)\n", + "Collecting llama-index-legacy<0.10.0,>=0.9.48 (from llama-index)\n", + " Downloading llama_index_legacy-0.9.48.post3-py3-none-any.whl.metadata (8.5 kB)\n", + "Collecting llama-index-llms-openai<0.3.0,>=0.2.10 (from llama-index)\n", + " Downloading llama_index_llms_openai-0.2.16-py3-none-any.whl.metadata (3.3 kB)\n", + "Collecting llama-index-multi-modal-llms-openai<0.3.0,>=0.2.0 (from llama-index)\n", + " Downloading llama_index_multi_modal_llms_openai-0.2.3-py3-none-any.whl.metadata (729 bytes)\n", + "Collecting llama-index-program-openai<0.3.0,>=0.2.0 (from llama-index)\n", + " Downloading llama_index_program_openai-0.2.0-py3-none-any.whl.metadata (766 bytes)\n", + "Collecting llama-index-question-gen-openai<0.3.0,>=0.2.0 (from llama-index)\n", + " Downloading llama_index_question_gen_openai-0.2.0-py3-none-any.whl.metadata (785 bytes)\n", + "Collecting llama-index-readers-file<0.3.0,>=0.2.0 (from llama-index)\n", + " Downloading llama_index_readers_file-0.2.2-py3-none-any.whl.metadata (5.4 kB)\n", + "Collecting llama-index-readers-llama-parse>=0.3.0 (from llama-index)\n", + " Downloading llama_index_readers_llama_parse-0.3.0-py3-none-any.whl.metadata (3.5 kB)\n", + "Collecting nltk>3.8.1 (from llama-index)\n", + " Downloading nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)\n", + "Requirement already satisfied: openai>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-agent-openai<0.4.0,>=0.3.4->llama-index) (1.52.2)\n", + "Requirement already satisfied: PyYAML>=6.0.1 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.22->llama-index) (6.0.2)\n", + "Requirement already satisfied: SQLAlchemy>=1.4.49 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy[asyncio]>=1.4.49->llama-index-core<0.12.0,>=0.11.22->llama-index) (2.0.36)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.8.6 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.22->llama-index) (3.10.10)\n", + "Collecting dataclasses-json (from llama-index-core<0.12.0,>=0.11.22->llama-index)\n", + " Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)\n", + "Requirement already satisfied: deprecated>=1.2.9.3 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.22->llama-index) (1.2.14)\n", + "Collecting dirtyjson<2.0.0,>=1.0.8 (from llama-index-core<0.12.0,>=0.11.22->llama-index)\n", + " Downloading dirtyjson-1.0.8-py3-none-any.whl.metadata (11 kB)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.22->llama-index) (2024.10.0)\n", + "Requirement already satisfied: httpx in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.22->llama-index) (0.27.2)\n", + "Requirement already satisfied: nest-asyncio<2.0.0,>=1.5.8 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.22->llama-index) (1.6.0)\n", + "Requirement already satisfied: networkx>=3.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.22->llama-index) (3.4.2)\n", + "Requirement already satisfied: numpy<2.0.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.22->llama-index) (1.26.4)\n", + "Requirement already satisfied: pillow>=9.0.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.22->llama-index) (10.4.0)\n", + "Requirement already satisfied: pydantic<3.0.0,>=2.7.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.22->llama-index) (2.9.2)\n", + "Collecting tenacity!=8.4.0,<9.0.0,>=8.2.0 (from llama-index-core<0.12.0,>=0.11.22->llama-index)\n", + " Downloading tenacity-8.5.0-py3-none-any.whl.metadata (1.2 kB)\n", + "Collecting tiktoken>=0.3.3 (from llama-index-core<0.12.0,>=0.11.22->llama-index)\n", + " Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n", + "Requirement already satisfied: tqdm<5.0.0,>=4.66.1 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.22->llama-index) (4.66.6)\n", + "Requirement already satisfied: typing-extensions>=4.5.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.22->llama-index) (4.12.2)\n", + "Collecting typing-inspect>=0.8.0 (from llama-index-core<0.12.0,>=0.11.22->llama-index)\n", + " Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)\n", + "Requirement already satisfied: wrapt in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.22->llama-index) (1.16.0)\n", + "Collecting llama-cloud>=0.0.11 (from llama-index-indices-managed-llama-cloud>=0.3.0->llama-index)\n", + " Downloading llama_cloud-0.1.4-py3-none-any.whl.metadata (814 bytes)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from llama-index-legacy<0.10.0,>=0.9.48->llama-index) (2.2.2)\n", + "Collecting pypdf<5.0.0,>=4.0.1 (from llama-index-readers-file<0.3.0,>=0.2.0->llama-index)\n", + " Downloading pypdf-4.3.1-py3-none-any.whl.metadata (7.4 kB)\n", + "Collecting striprtf<0.0.27,>=0.0.26 (from llama-index-readers-file<0.3.0,>=0.2.0->llama-index)\n", + " Downloading striprtf-0.0.26-py3-none-any.whl.metadata (2.1 kB)\n", + "Collecting llama-parse>=0.5.0 (from llama-index-readers-llama-parse>=0.3.0->llama-index)\n", + " Downloading llama_parse-0.5.13-py3-none-any.whl.metadata (6.9 kB)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk>3.8.1->llama-index) (8.1.7)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk>3.8.1->llama-index) (1.4.2)\n", + "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk>3.8.1->llama-index) (2024.9.11)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.22->llama-index) (2.4.3)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.22->llama-index) (1.3.1)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.22->llama-index) (24.2.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.22->llama-index) (1.5.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.22->llama-index) (6.1.0)\n", + "Requirement already satisfied: yarl<2.0,>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.22->llama-index) (1.17.0)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.22->llama-index) (4.0.3)\n", + "Requirement already satisfied: anyio in /usr/local/lib/python3.10/dist-packages (from httpx->llama-index-core<0.12.0,>=0.11.22->llama-index) (3.7.1)\n", + "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx->llama-index-core<0.12.0,>=0.11.22->llama-index) (1.0.6)\n", + "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx->llama-index-core<0.12.0,>=0.11.22->llama-index) (1.3.1)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx->llama-index-core<0.12.0,>=0.11.22->llama-index) (0.14.0)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from openai>=1.14.0->llama-index-agent-openai<0.4.0,>=0.3.4->llama-index) (1.9.0)\n", + "Requirement already satisfied: jiter<1,>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from openai>=1.14.0->llama-index-agent-openai<0.4.0,>=0.3.4->llama-index) (0.6.1)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3.0.0,>=2.7.0->llama-index-core<0.12.0,>=0.11.22->llama-index) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.23.4 in /usr/local/lib/python3.10/dist-packages (from pydantic<3.0.0,>=2.7.0->llama-index-core<0.12.0,>=0.11.22->llama-index) (2.23.4)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy>=1.4.49->SQLAlchemy[asyncio]>=1.4.49->llama-index-core<0.12.0,>=0.11.22->llama-index) (3.1.1)\n", + "Collecting mypy-extensions>=0.3.0 (from typing-inspect>=0.8.0->llama-index-core<0.12.0,>=0.11.22->llama-index)\n", + " Downloading mypy_extensions-1.0.0-py3-none-any.whl.metadata (1.1 kB)\n", + "Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json->llama-index-core<0.12.0,>=0.11.22->llama-index)\n", + " Downloading marshmallow-3.23.1-py3-none-any.whl.metadata (7.5 kB)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-index-legacy<0.10.0,>=0.9.48->llama-index) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-index-legacy<0.10.0,>=0.9.48->llama-index) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-index-legacy<0.10.0,>=0.9.48->llama-index) (2024.2)\n", + "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio->httpx->llama-index-core<0.12.0,>=0.11.22->llama-index) (1.2.2)\n", + "Requirement already satisfied: packaging>=17.0 in /usr/local/lib/python3.10/dist-packages (from marshmallow<4.0.0,>=3.18.0->dataclasses-json->llama-index-core<0.12.0,>=0.11.22->llama-index) (24.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->llama-index-legacy<0.10.0,>=0.9.48->llama-index) (1.16.0)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from yarl<2.0,>=1.12.0->aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.22->llama-index) (0.2.0)\n", + "Downloading llama_index-0.11.22-py3-none-any.whl (6.8 kB)\n", + "Downloading llama_index_agent_openai-0.3.4-py3-none-any.whl (13 kB)\n", + "Downloading llama_index_cli-0.3.1-py3-none-any.whl (27 kB)\n", + "Downloading llama_index_core-0.11.22-py3-none-any.whl (1.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m21.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading llama_index_embeddings_openai-0.2.5-py3-none-any.whl (6.1 kB)\n", + "Downloading llama_index_indices_managed_llama_cloud-0.4.0-py3-none-any.whl (10 kB)\n", + "Downloading llama_index_legacy-0.9.48.post3-py3-none-any.whl (1.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m33.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading llama_index_llms_openai-0.2.16-py3-none-any.whl (13 kB)\n", + "Downloading llama_index_multi_modal_llms_openai-0.2.3-py3-none-any.whl (5.9 kB)\n", + "Downloading llama_index_program_openai-0.2.0-py3-none-any.whl (5.3 kB)\n", + "Downloading llama_index_question_gen_openai-0.2.0-py3-none-any.whl (2.9 kB)\n", + "Downloading llama_index_readers_file-0.2.2-py3-none-any.whl (38 kB)\n", + "Downloading llama_index_readers_llama_parse-0.3.0-py3-none-any.whl (2.5 kB)\n", + "Downloading nltk-3.9.1-py3-none-any.whl (1.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m72.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading dirtyjson-1.0.8-py3-none-any.whl (25 kB)\n", + "Downloading llama_cloud-0.1.4-py3-none-any.whl (176 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m176.8/176.8 kB\u001b[0m \u001b[31m10.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading llama_parse-0.5.13-py3-none-any.whl (13 kB)\n", + "Downloading pypdf-4.3.1-py3-none-any.whl (295 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m295.8/295.8 kB\u001b[0m \u001b[31m23.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading striprtf-0.0.26-py3-none-any.whl (6.9 kB)\n", + "Downloading tenacity-8.5.0-py3-none-any.whl (28 kB)\n", + "Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m58.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n", + "Downloading dataclasses_json-0.6.7-py3-none-any.whl (28 kB)\n", + "Downloading marshmallow-3.23.1-py3-none-any.whl (49 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.5/49.5 kB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n", + "Installing collected packages: striprtf, dirtyjson, tenacity, pypdf, nltk, mypy-extensions, marshmallow, typing-inspect, tiktoken, llama-cloud, dataclasses-json, llama-index-legacy, llama-index-core, llama-parse, llama-index-readers-file, llama-index-llms-openai, llama-index-indices-managed-llama-cloud, llama-index-embeddings-openai, llama-index-readers-llama-parse, llama-index-multi-modal-llms-openai, llama-index-cli, llama-index-agent-openai, llama-index-program-openai, llama-index-question-gen-openai, llama-index\n", + " Attempting uninstall: tenacity\n", + " Found existing installation: tenacity 9.0.0\n", + " Uninstalling tenacity-9.0.0:\n", + " Successfully uninstalled tenacity-9.0.0\n", + " Attempting uninstall: nltk\n", + " Found existing installation: nltk 3.8.1\n", + " Uninstalling nltk-3.8.1:\n", + " Successfully uninstalled nltk-3.8.1\n", + "Successfully installed dataclasses-json-0.6.7 dirtyjson-1.0.8 llama-cloud-0.1.4 llama-index-0.11.22 llama-index-agent-openai-0.3.4 llama-index-cli-0.3.1 llama-index-core-0.11.22 llama-index-embeddings-openai-0.2.5 llama-index-indices-managed-llama-cloud-0.4.0 llama-index-legacy-0.9.48.post3 llama-index-llms-openai-0.2.16 llama-index-multi-modal-llms-openai-0.2.3 llama-index-program-openai-0.2.0 llama-index-question-gen-openai-0.2.0 llama-index-readers-file-0.2.2 llama-index-readers-llama-parse-0.3.0 llama-parse-0.5.13 marshmallow-3.23.1 mypy-extensions-1.0.0 nltk-3.9.1 pypdf-4.3.1 striprtf-0.0.26 tenacity-8.5.0 tiktoken-0.8.0 typing-inspect-0.9.0\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install llama-index-embeddings-huggingface\n", + "!pip install llama-index-embeddings-langchain\n", + "!pip install llama-index-llms-ollama" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dosEj_efgjDa", + "outputId": "1deb7743-6ea9-4f01-bb14-5720b3dd78f1" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting llama-index-embeddings-huggingface\n", + " Downloading llama_index_embeddings_huggingface-0.3.1-py3-none-any.whl.metadata (718 bytes)\n", + "Requirement already satisfied: huggingface-hub>=0.19.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub[inference]>=0.19.0->llama-index-embeddings-huggingface) (0.24.7)\n", + "Requirement already satisfied: llama-index-core<0.12.0,>=0.11.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-embeddings-huggingface) (0.11.22)\n", + "Requirement already satisfied: sentence-transformers>=2.6.1 in /usr/local/lib/python3.10/dist-packages (from llama-index-embeddings-huggingface) (3.2.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.19.0->huggingface-hub[inference]>=0.19.0->llama-index-embeddings-huggingface) (3.16.1)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.19.0->huggingface-hub[inference]>=0.19.0->llama-index-embeddings-huggingface) (2024.10.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.19.0->huggingface-hub[inference]>=0.19.0->llama-index-embeddings-huggingface) (24.1)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.19.0->huggingface-hub[inference]>=0.19.0->llama-index-embeddings-huggingface) (6.0.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.19.0->huggingface-hub[inference]>=0.19.0->llama-index-embeddings-huggingface) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.19.0->huggingface-hub[inference]>=0.19.0->llama-index-embeddings-huggingface) (4.66.6)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.19.0->huggingface-hub[inference]>=0.19.0->llama-index-embeddings-huggingface) (4.12.2)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from huggingface-hub[inference]>=0.19.0->llama-index-embeddings-huggingface) (3.10.10)\n", + "Collecting minijinja>=1.0 (from huggingface-hub[inference]>=0.19.0->llama-index-embeddings-huggingface)\n", + " Downloading minijinja-2.2.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.8 kB)\n", + "Requirement already satisfied: SQLAlchemy>=1.4.49 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy[asyncio]>=1.4.49->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-huggingface) (2.0.36)\n", + "Requirement already satisfied: dataclasses-json in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-huggingface) (0.6.7)\n", + "Requirement already satisfied: deprecated>=1.2.9.3 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-huggingface) (1.2.14)\n", + "Requirement already satisfied: dirtyjson<2.0.0,>=1.0.8 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-huggingface) (1.0.8)\n", + "Requirement already satisfied: httpx in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-huggingface) (0.27.2)\n", + "Requirement already satisfied: nest-asyncio<2.0.0,>=1.5.8 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-huggingface) (1.6.0)\n", + "Requirement already satisfied: networkx>=3.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-huggingface) (3.4.2)\n", + "Requirement already satisfied: nltk>3.8.1 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-huggingface) (3.9.1)\n", + "Requirement already satisfied: numpy<2.0.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-huggingface) (1.26.4)\n", + "Requirement already satisfied: pillow>=9.0.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-huggingface) (10.4.0)\n", + "Requirement already satisfied: pydantic<3.0.0,>=2.7.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-huggingface) (2.9.2)\n", + "Requirement already satisfied: tenacity!=8.4.0,<9.0.0,>=8.2.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-huggingface) (8.5.0)\n", + "Requirement already satisfied: tiktoken>=0.3.3 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-huggingface) (0.8.0)\n", + "Requirement already satisfied: typing-inspect>=0.8.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-huggingface) (0.9.0)\n", + "Requirement already satisfied: wrapt in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-huggingface) (1.16.0)\n", + "Requirement already satisfied: transformers<5.0.0,>=4.41.0 in /usr/local/lib/python3.10/dist-packages (from sentence-transformers>=2.6.1->llama-index-embeddings-huggingface) (4.44.2)\n", + "Requirement already satisfied: torch>=1.11.0 in /usr/local/lib/python3.10/dist-packages (from sentence-transformers>=2.6.1->llama-index-embeddings-huggingface) (2.5.0+cu121)\n", + "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from sentence-transformers>=2.6.1->llama-index-embeddings-huggingface) (1.5.2)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from sentence-transformers>=2.6.1->llama-index-embeddings-huggingface) (1.13.1)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->huggingface-hub[inference]>=0.19.0->llama-index-embeddings-huggingface) (2.4.3)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->huggingface-hub[inference]>=0.19.0->llama-index-embeddings-huggingface) (1.3.1)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->huggingface-hub[inference]>=0.19.0->llama-index-embeddings-huggingface) (24.2.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->huggingface-hub[inference]>=0.19.0->llama-index-embeddings-huggingface) (1.5.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->huggingface-hub[inference]>=0.19.0->llama-index-embeddings-huggingface) (6.1.0)\n", + "Requirement already satisfied: yarl<2.0,>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->huggingface-hub[inference]>=0.19.0->llama-index-embeddings-huggingface) (1.17.0)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->huggingface-hub[inference]>=0.19.0->llama-index-embeddings-huggingface) (4.0.3)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk>3.8.1->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-huggingface) (8.1.7)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk>3.8.1->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-huggingface) (1.4.2)\n", + "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk>3.8.1->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-huggingface) (2024.9.11)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3.0.0,>=2.7.0->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-huggingface) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.23.4 in /usr/local/lib/python3.10/dist-packages (from pydantic<3.0.0,>=2.7.0->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-huggingface) (2.23.4)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.19.0->huggingface-hub[inference]>=0.19.0->llama-index-embeddings-huggingface) (3.4.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.19.0->huggingface-hub[inference]>=0.19.0->llama-index-embeddings-huggingface) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.19.0->huggingface-hub[inference]>=0.19.0->llama-index-embeddings-huggingface) (2.2.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.19.0->huggingface-hub[inference]>=0.19.0->llama-index-embeddings-huggingface) (2024.8.30)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy>=1.4.49->SQLAlchemy[asyncio]>=1.4.49->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-huggingface) (3.1.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers>=2.6.1->llama-index-embeddings-huggingface) (3.1.4)\n", + "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers>=2.6.1->llama-index-embeddings-huggingface) (1.13.1)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy==1.13.1->torch>=1.11.0->sentence-transformers>=2.6.1->llama-index-embeddings-huggingface) (1.3.0)\n", + "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.41.0->sentence-transformers>=2.6.1->llama-index-embeddings-huggingface) (0.4.5)\n", + "Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.41.0->sentence-transformers>=2.6.1->llama-index-embeddings-huggingface) (0.19.1)\n", + "Requirement already satisfied: mypy-extensions>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from typing-inspect>=0.8.0->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-huggingface) (1.0.0)\n", + "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /usr/local/lib/python3.10/dist-packages (from dataclasses-json->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-huggingface) (3.23.1)\n", + "Requirement already satisfied: anyio in /usr/local/lib/python3.10/dist-packages (from httpx->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-huggingface) (3.7.1)\n", + "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-huggingface) (1.0.6)\n", + "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-huggingface) (1.3.1)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-huggingface) (0.14.0)\n", + "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->sentence-transformers>=2.6.1->llama-index-embeddings-huggingface) (3.5.0)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from yarl<2.0,>=1.12.0->aiohttp->huggingface-hub[inference]>=0.19.0->llama-index-embeddings-huggingface) (0.2.0)\n", + "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio->httpx->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-huggingface) (1.2.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.11.0->sentence-transformers>=2.6.1->llama-index-embeddings-huggingface) (3.0.2)\n", + "Downloading llama_index_embeddings_huggingface-0.3.1-py3-none-any.whl (8.6 kB)\n", + "Downloading minijinja-2.2.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (861 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m861.9/861.9 kB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: minijinja, llama-index-embeddings-huggingface\n", + "Successfully installed llama-index-embeddings-huggingface-0.3.1 minijinja-2.2.0\n", + "Requirement already satisfied: llama-index-embeddings-langchain in /usr/local/lib/python3.10/dist-packages (0.2.1)\n", + "Requirement already satisfied: llama-index-core<0.12.0,>=0.11.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-embeddings-langchain) (0.11.22)\n", + "Requirement already satisfied: PyYAML>=6.0.1 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (6.0.2)\n", + "Requirement already satisfied: SQLAlchemy>=1.4.49 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy[asyncio]>=1.4.49->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (2.0.36)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.8.6 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (3.10.10)\n", + "Requirement already satisfied: dataclasses-json in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (0.6.7)\n", + "Requirement already satisfied: deprecated>=1.2.9.3 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (1.2.14)\n", + "Requirement already satisfied: dirtyjson<2.0.0,>=1.0.8 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (1.0.8)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (2024.10.0)\n", + "Requirement already satisfied: httpx in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (0.27.2)\n", + "Requirement already satisfied: nest-asyncio<2.0.0,>=1.5.8 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (1.6.0)\n", + "Requirement already satisfied: networkx>=3.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (3.4.2)\n", + "Requirement already satisfied: nltk>3.8.1 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (3.9.1)\n", + "Requirement already satisfied: numpy<2.0.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (1.26.4)\n", + "Requirement already satisfied: pillow>=9.0.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (10.4.0)\n", + "Requirement already satisfied: pydantic<3.0.0,>=2.7.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (2.9.2)\n", + "Requirement already satisfied: requests>=2.31.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (2.32.3)\n", + "Requirement already satisfied: tenacity!=8.4.0,<9.0.0,>=8.2.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (8.5.0)\n", + "Requirement already satisfied: tiktoken>=0.3.3 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (0.8.0)\n", + "Requirement already satisfied: tqdm<5.0.0,>=4.66.1 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (4.66.6)\n", + "Requirement already satisfied: typing-extensions>=4.5.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (4.12.2)\n", + "Requirement already satisfied: typing-inspect>=0.8.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (0.9.0)\n", + "Requirement already satisfied: wrapt in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (1.16.0)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (2.4.3)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (1.3.1)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (24.2.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (1.5.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (6.1.0)\n", + "Requirement already satisfied: yarl<2.0,>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (1.17.0)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (4.0.3)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk>3.8.1->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (8.1.7)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk>3.8.1->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (1.4.2)\n", + "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk>3.8.1->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (2024.9.11)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3.0.0,>=2.7.0->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.23.4 in /usr/local/lib/python3.10/dist-packages (from pydantic<3.0.0,>=2.7.0->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (2.23.4)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (3.4.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (2.2.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (2024.8.30)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy>=1.4.49->SQLAlchemy[asyncio]>=1.4.49->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (3.1.1)\n", + "Requirement already satisfied: mypy-extensions>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from typing-inspect>=0.8.0->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (1.0.0)\n", + "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /usr/local/lib/python3.10/dist-packages (from dataclasses-json->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (3.23.1)\n", + "Requirement already satisfied: anyio in /usr/local/lib/python3.10/dist-packages (from httpx->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (3.7.1)\n", + "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (1.0.6)\n", + "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (1.3.1)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (0.14.0)\n", + "Requirement already satisfied: packaging>=17.0 in /usr/local/lib/python3.10/dist-packages (from marshmallow<4.0.0,>=3.18.0->dataclasses-json->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (24.1)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from yarl<2.0,>=1.12.0->aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (0.2.0)\n", + "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio->httpx->llama-index-core<0.12.0,>=0.11.0->llama-index-embeddings-langchain) (1.2.2)\n", + "Requirement already satisfied: llama-index-llms-ollama in /usr/local/lib/python3.10/dist-packages (0.3.5)\n", + "Requirement already satisfied: llama-index-core<0.12.0,>=0.11.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-llms-ollama) (0.11.22)\n", + "Requirement already satisfied: ollama>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-llms-ollama) (0.3.3)\n", + "Requirement already satisfied: PyYAML>=6.0.1 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (6.0.2)\n", + "Requirement already satisfied: SQLAlchemy>=1.4.49 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy[asyncio]>=1.4.49->llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (2.0.36)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.8.6 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (3.10.10)\n", + "Requirement already satisfied: dataclasses-json in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (0.6.7)\n", + "Requirement already satisfied: deprecated>=1.2.9.3 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (1.2.14)\n", + "Requirement already satisfied: dirtyjson<2.0.0,>=1.0.8 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (1.0.8)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (2024.10.0)\n", + "Requirement already satisfied: httpx in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (0.27.2)\n", + "Requirement already satisfied: nest-asyncio<2.0.0,>=1.5.8 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (1.6.0)\n", + "Requirement already satisfied: networkx>=3.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (3.4.2)\n", + "Requirement already satisfied: nltk>3.8.1 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (3.9.1)\n", + "Requirement already satisfied: numpy<2.0.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (1.26.4)\n", + "Requirement already satisfied: pillow>=9.0.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (10.4.0)\n", + "Requirement already satisfied: pydantic<3.0.0,>=2.7.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (2.9.2)\n", + "Requirement already satisfied: requests>=2.31.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (2.32.3)\n", + "Requirement already satisfied: tenacity!=8.4.0,<9.0.0,>=8.2.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (8.5.0)\n", + "Requirement already satisfied: tiktoken>=0.3.3 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (0.8.0)\n", + "Requirement already satisfied: tqdm<5.0.0,>=4.66.1 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (4.66.6)\n", + "Requirement already satisfied: typing-extensions>=4.5.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (4.12.2)\n", + "Requirement already satisfied: typing-inspect>=0.8.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (0.9.0)\n", + "Requirement already satisfied: wrapt in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (1.16.0)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (2.4.3)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (1.3.1)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (24.2.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (1.5.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (6.1.0)\n", + "Requirement already satisfied: yarl<2.0,>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (1.17.0)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (4.0.3)\n", + "Requirement already satisfied: anyio in /usr/local/lib/python3.10/dist-packages (from httpx->llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (3.7.1)\n", + "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx->llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (2024.8.30)\n", + "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx->llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (1.0.6)\n", + "Requirement already satisfied: idna in /usr/local/lib/python3.10/dist-packages (from httpx->llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (3.10)\n", + "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx->llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (1.3.1)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx->llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (0.14.0)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk>3.8.1->llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (8.1.7)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk>3.8.1->llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (1.4.2)\n", + "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk>3.8.1->llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (2024.9.11)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3.0.0,>=2.7.0->llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.23.4 in /usr/local/lib/python3.10/dist-packages (from pydantic<3.0.0,>=2.7.0->llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (2.23.4)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (3.4.0)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (2.2.3)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy>=1.4.49->SQLAlchemy[asyncio]>=1.4.49->llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (3.1.1)\n", + "Requirement already satisfied: mypy-extensions>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from typing-inspect>=0.8.0->llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (1.0.0)\n", + "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /usr/local/lib/python3.10/dist-packages (from dataclasses-json->llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (3.23.1)\n", + "Requirement already satisfied: packaging>=17.0 in /usr/local/lib/python3.10/dist-packages (from marshmallow<4.0.0,>=3.18.0->dataclasses-json->llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (24.1)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from yarl<2.0,>=1.12.0->aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (0.2.0)\n", + "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio->httpx->llama-index-core<0.12.0,>=0.11.0->llama-index-llms-ollama) (1.2.2)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "#enter url\n", + "!python3 /content/Project-Explainer/graph_rag/KG_web/build.py" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lXTV9H3kgQZN", + "outputId": "0afe914b-1296-44fa-d46c-3d9b0d0d944b" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2024-11-06 17:46:16.505471: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-11-06 17:46:16.520543: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-11-06 17:46:16.524776: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-11-06 17:46:16.535343: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2024-11-06 17:46:17.752890: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", + "Enter the URL of the documentation to scrape: https://in.pycon.org/2024/\n", + "\n", + "Document 1\n", + "URL: https://in.pycon.org/2024/\n", + "Content Snippet: PyCon India 2024Skip to main contentAboutOur TeamMentors & ReviewersFAQHealth & SafetyChild CareScholarshipsDiversity & InclusionsJobs BoardScheduleVenueAbout VenueAbout BengaluruExhibitor GuideProgra...\n", + "\n", + "\n", + "Document 2\n", + "URL: https://in.pycon.org/2024/#maincontent\n", + "Content Snippet: PyCon India 2024Skip to main contentAboutOur TeamMentors & ReviewersFAQHealth & SafetyChild CareScholarshipsDiversity & InclusionsJobs BoardScheduleVenueAbout VenueAbout BengaluruExhibitor GuideProgra...\n", + "\n", + "modules.json: 100% 349/349 [00:00<00:00, 1.80MB/s]\n", + "config_sentence_transformers.json: 100% 124/124 [00:00<00:00, 860kB/s]\n", + "README.md: 100% 94.6k/94.6k [00:00<00:00, 10.6MB/s]\n", + "sentence_bert_config.json: 100% 52.0/52.0 [00:00<00:00, 379kB/s]\n", + "config.json: 100% 777/777 [00:00<00:00, 5.43MB/s]\n", + "model.safetensors: 100% 438M/438M [00:03<00:00, 116MB/s]\n", + "tokenizer_config.json: 100% 366/366 [00:00<00:00, 2.28MB/s]\n", + "vocab.txt: 100% 232k/232k [00:00<00:00, 38.7MB/s]\n", + "tokenizer.json: 100% 711k/711k [00:00<00:00, 1.02MB/s]\n", + "special_tokens_map.json: 100% 125/125 [00:00<00:00, 838kB/s]\n", + "1_Pooling/config.json: 100% 190/190 [00:00<00:00, 1.45MB/s]\n", + "WARNING:root:Removing unpickleable private attribute _chunking_tokenizer_fn\n", + "WARNING:root:Removing unpickleable private attribute _split_fns\n", + "WARNING:root:Removing unpickleable private attribute _sub_sentence_split_fns\n", + "\n", + "Indexing complete. The index has been saved to 'documentation_index.pkl'.\n" + ] + } + ] + } + ] +} \ No newline at end of file diff --git a/graph_rag/KG_web/Scraper.py b/graph_rag/KG_web/Scraper.py new file mode 100644 index 0000000..cd6ba8c --- /dev/null +++ b/graph_rag/KG_web/Scraper.py @@ -0,0 +1,62 @@ +""" +This file Scrapes the documentation from a given website including all sublinks using BeautifulSoup + +Functions: +scrape_page: Scrapes page to extract relevant details +scrape_website: A function to recursively scrape all pages starting from the main documentation URL +build_index_from_documents: Builds an index from scraped documents +""" + + +import requests +from bs4 import BeautifulSoup +from urllib.parse import urljoin +from llama_index.core import Document, VectorStoreIndex, Settings +from llama_index.embeddings.huggingface import HuggingFaceEmbedding +from llama_index.llms.ollama import Ollama + +import time + +def scrape_page(url): + try: + response = requests.get(url) + response.raise_for_status() + soup = BeautifulSoup(response.text, 'html.parser') + content = soup.get_text(strip=True) + links = [urljoin(url, a['href']) for a in soup.find_all('a', href=True)] + return content, links + except requests.exceptions.RequestException as e: + print(f"Error scraping {url}: {e}") + return None, [] + +def scrape_website(base_url, max_depth=3): + visited = set() + to_visit = [base_url] + all_documents = [] + + while to_visit: + current_url = to_visit.pop(0) + if current_url in visited or len(visited) >= max_depth: + continue + + visited.add(current_url) + + content, links = scrape_page(current_url) + if content: + document = Document( + text=content, + metadata={'url': current_url} + ) + all_documents.append(document) + + to_visit.extend(links) + + time.sleep(1) + return all_documents + +def build_index_from_documents(documents): + Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5") + Settings.llm = Ollama(model="llama3", request_timeout=360.0) + + index = VectorStoreIndex.from_documents(documents,) + return index \ No newline at end of file diff --git a/graph_rag/KG_web/Screenshot from 2024-11-06 22-29-54.png b/graph_rag/KG_web/Screenshot from 2024-11-06 22-29-54.png new file mode 100644 index 0000000..e4fdb4a Binary files /dev/null and b/graph_rag/KG_web/Screenshot from 2024-11-06 22-29-54.png differ diff --git a/graph_rag/KG_web/build.py b/graph_rag/KG_web/build.py new file mode 100644 index 0000000..e23875e --- /dev/null +++ b/graph_rag/KG_web/build.py @@ -0,0 +1,18 @@ +import Scraper +import pickle + +base_url = input("Enter the URL of the documentation to scrape: ") + +scraped_documents = Scraper.scrape_website(base_url) + +for i, doc in enumerate(scraped_documents, 1): + print(f"\nDocument {i}") + print(f"URL: {doc.metadata.get('url', 'No URL found')}") + print(f"Content Snippet: {doc.text[:200]}...\n") + +index = Scraper.build_index_from_documents(scraped_documents) + +with open("documentation_index.pkl", "wb") as file: + pickle.dump(index, file) + +print("\nIndexing complete. The index has been saved to 'documentation_index.pkl'.")