diff --git a/application/core/settings.py b/application/core/settings.py index 6ae5475ce..bdb60b8c2 100644 --- a/application/core/settings.py +++ b/application/core/settings.py @@ -61,6 +61,9 @@ class Settings(BaseSettings): QDRANT_PATH: Optional[str] = None QDRANT_DISTANCE_FUNC: str = "Cosine" + # ORACLE_URI="your_username/your_password@localhost:1521/docsgpt" + ORACLE_URI: Optional[str] = None # Oracle DB connection string + BRAVE_SEARCH_API_KEY: Optional[str] = None FLASK_DEBUG_MODE: bool = False diff --git a/application/requirements.txt b/application/requirements.txt index b072885d3..b637bd8ef 100644 --- a/application/requirements.txt +++ b/application/requirements.txt @@ -16,6 +16,7 @@ javalang==0.13.0 langchain==0.1.4 langchain-openai==0.0.5 openapi3_parser==1.1.16 +oracledb==2.2.1 pandas==2.2.0 pydantic_settings==2.1.0 pymongo==4.6.3 diff --git a/application/vectorstore/oracledb.py b/application/vectorstore/oracledb.py new file mode 100644 index 000000000..fac642a5b --- /dev/null +++ b/application/vectorstore/oracledb.py @@ -0,0 +1,101 @@ +# oracle_db.py +from application.vectorstore.base import BaseVectorStore +from application.core.settings import settings +from application.vectorstore.document_class import Document + + +class OracleDBVectorStore(BaseVectorStore): + def __init__( + self, + embeddings_key: str = "embeddings", + table: str = "documents", + text_key: str = "text", + embedding_key: str = "embedding", + database: str = "docsgpt", + ): + self._table = table + self._text_key = text_key + self._embedding_key = embedding_key + self._embeddings_key = embeddings_key + self._oracle_uri = settings.ORACLE_URI + self._embedding = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key) + + try: + import oracledb + except ImportError: + raise ImportError( + "Could not import oracledb python package. " + "Please install it with `pip install oracledb`." + ) + + self._connection = oracledb.connect(self._oracle_uri) + self._cursor = self._connection.cursor() + + def search(self, question, k=2, *args, **kwargs): + query_vector = self._embedding.embed_query(question) + + query = f""" + SELECT {self._text_key}, {self._embedding_key}, METADATA + FROM {self._table} + ORDER BY SDO_GEOM.SDO_DISTANCE(SDO_GEOMETRY({query_vector}), + SDO_GEOMETRY({self._embedding_key})) ASC + FETCH FIRST {k} ROWS ONLY + """ + + self._cursor.execute(query) + results = [] + for row in self._cursor.fetchall(): + text, embedding, metadata = row + results.append(Document(text, metadata)) + return results + + def _insert_texts(self, texts, metadatas): + if not texts: + return [] + + embeddings = self._embedding.embed_documents(texts) + to_insert = [ + (t, embedding, m) for t, m, embedding in zip(texts, metadatas, embeddings) + ] + + query = f""" + INSERT INTO {self._table} ({self._text_key}, {self._embedding_key}, METADATA) + VALUES (:1, :2, :3) + """ + + self._cursor.executemany(query, to_insert) + self._connection.commit() + return [i[0] for i in self._cursor.fetchall()] + + def add_texts( + self, + texts, + metadatas=None, + ids=None, + refresh_indices=True, + create_index_if_not_exists=True, + bulk_kwargs=None, + **kwargs, + ): + batch_size = 100 + _metadatas = metadatas or ({} for _ in texts) + texts_batch = [] + metadatas_batch = [] + result_ids = [] + + for i, (text, metadata) in enumerate(zip(texts, _metadatas)): + texts_batch.append(text) + metadatas_batch.append(metadata) + if (i + 1) % batch_size == 0: + result_ids.extend(self._insert_texts(texts_batch, metadatas_batch)) + texts_batch = [] + metadatas_batch = [] + + if texts_batch: + result_ids.extend(self._insert_texts(texts_batch, metadatas_batch)) + return result_ids + + def delete_index(self, *args, **kwargs): + query = f"DELETE FROM {self._table} WHERE 1=1" + self._cursor.execute(query) + self._connection.commit() diff --git a/application/vectorstore/vector_creator.py b/application/vectorstore/vector_creator.py index 27b386450..6a6007a6c 100644 --- a/application/vectorstore/vector_creator.py +++ b/application/vectorstore/vector_creator.py @@ -2,6 +2,7 @@ from application.vectorstore.elasticsearch import ElasticsearchStore from application.vectorstore.mongodb import MongoDBVectorStore from application.vectorstore.qdrant import QdrantStore +from application.vectorstore.oracledb import OracleDBVectorStore class VectorCreator: @@ -10,6 +11,7 @@ class VectorCreator: "elasticsearch": ElasticsearchStore, "mongodb": MongoDBVectorStore, "qdrant": QdrantStore, + "oracledb": OracleDBVectorStore, } @classmethod