Skip to content

Commit

Permalink
Update the version to 0.1.25 (#367)
Browse files Browse the repository at this point in the history
Signed-off-by: SimFG <[email protected]>
  • Loading branch information
SimFG authored May 18, 2023
1 parent b507a99 commit 485929a
Show file tree
Hide file tree
Showing 13 changed files with 80 additions and 22 deletions.
16 changes: 15 additions & 1 deletion .github/workflows/Nightly_CI_main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,17 @@ on:

jobs:
nightly-CI-gpt-cache-cli-main:
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
strategy:
fail-fast: false
services:
postgres:
image: ankane/pgvector
ports:
- 5432:5432
env:
POSTGRES_PASSWORD: postgres
options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5
steps:
- uses: actions/checkout@main

Expand All @@ -49,6 +57,12 @@ jobs:
run: |
pip install -r requirements.txt
- name: Download the `en_core_web_sm` model
shell: bash
working-directory: tests
run: |
python3 -m spacy download en_core_web_sm
- name: Nightly CI Tests
timeout-minutes: 10
shell: bash
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pylint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ on:

jobs:
pylint:
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
steps:
- uses: actions/[email protected]
- name: Setup Python
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/unit_test_main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ on:

jobs:
unit-test-main:
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
strategy:
fail-fast: false
services:
Expand Down
20 changes: 20 additions & 0 deletions docs/release_note.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,26 @@ To read the following content, you need to understand the basic use of GPTCache,
- [Readme doc](https://github.com/zilliztech/GPTCache)
- [Usage doc](https://github.com/zilliztech/GPTCache/blob/main/docs/usage.md)

## v0.1.25 (2023.5.18)

1. Support the DocArray vector database

```python
from gptcache.manager import manager_factory

data_manager = manager_factory("sqlite,docarray")
```

2. Add rwkv model for embedding

```python
from gptcache.embedding import Rwkv

test_sentence = 'Hello, world.'
encoder = Rwkv(model='sgugger/rwkv-430M-pile')
embed = encoder.to_embeddings(test_sentence)
```

## v0.1.24 (2023.5.15)

1. Support the langchain embedding
Expand Down
17 changes: 14 additions & 3 deletions gptcache/embedding/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
__all__ = ["OpenAI", "Huggingface", "SBERT", "Cohere", "Onnx", "FastText", "Data2VecAudio", "Timm", "ViT", "LangChain"]
__all__ = [
"OpenAI",
"Huggingface",
"SBERT",
"Cohere",
"Onnx",
"FastText",
"Data2VecAudio",
"Timm",
"ViT",
"LangChain",
"Rwkv",
]


from gptcache.utils.lazy_import import LazyImport
Expand Down Expand Up @@ -52,10 +64,9 @@ def ViT(model="google/vit-base-patch16-384"):
return vit.ViT(model)


def LangChain(embeddings, dimension = 0):
def LangChain(embeddings, dimension=0):
return langchain.LangChain(embeddings, dimension)


def Rwkv(model="sgugger/rwkv-430M-pile"):
return rwkv.Rwkv(model)

6 changes: 4 additions & 2 deletions gptcache/embedding/rwkv.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import numpy as np

from gptcache.utils import import_huggingface
from gptcache.embedding.base import BaseEmbedding
from gptcache.utils import import_huggingface

import_huggingface()

from transformers import AutoTokenizer, RwkvModel # pylint: disable=C0413


class Rwkv(BaseEmbedding):
"""Generate sentence embedding for given text using RWKV models.
Expand All @@ -23,6 +24,7 @@ class Rwkv(BaseEmbedding):
encoder = Rwkv(model='sgugger/rwkv-430M-pile')
embed = encoder.to_embeddings(test_sentence)
"""

def __init__(self, model: str = "sgugger/rwkv-430M-pile"):
self.model = RwkvModel.from_pretrained(model)
self.model.eval()
Expand All @@ -46,7 +48,7 @@ def to_embeddings(self, data, **_):
"""
inputs = self.tokenizer(data, return_tensors="pt")
outputs = self.model(inputs["input_ids"])
emb = outputs.last_hidden_state[0,0,:].detach().numpy()
emb = outputs.last_hidden_state[0, 0, :].detach().numpy()
return np.array(emb).astype("float32")

@property
Expand Down
12 changes: 6 additions & 6 deletions gptcache/manager/factory.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import os
from typing import Union, Callable
from pathlib import Path
from gptcache.manager.data_manager import SSDataManager, MapDataManager
from typing import Union, Callable

from gptcache.manager import CacheBase, VectorBase, ObjectBase
from gptcache.manager.data_manager import SSDataManager, MapDataManager


def manager_factory(manager="map",
Expand Down Expand Up @@ -74,10 +75,9 @@ def manager_factory(manager="map",

if vector_params is None:
vector_params = {}
if vector == "faiss":
vector_params["index_path"] = os.path.join(data_dir, "faiss.index")
elif vector == "hnswlib":
vector_params["index_path"] = os.path.join(data_dir, "hnswlib.index")
local_vector_type = ["faiss", "hnswlib", "docarray"]
if vector in local_vector_type:
vector_params["index_path"] = os.path.join(data_dir, f"{vector}.index")
elif vector == "milvus" and vector_params.get("local_mode", False) is True:
vector_params["local_data"] = os.path.join(data_dir, "milvus_data")
v = VectorBase(name=vector, **vector_params)
Expand Down
4 changes: 2 additions & 2 deletions gptcache/processor/context/summarization_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class SummarizationContextProcess(ContextProcess):
:param target_length: The length of the summarized text.
:type target_length: int
"""
def __init__(self, summarizer = transformers.pipeline("summarization", model="facebook/bart-large-cnn"),
def __init__(self, summarizer=transformers.pipeline("summarization", model="facebook/bart-large-cnn"),
tokenizer=None, target_length=512):
self.summarizer = summarizer
self.target_length = target_length
Expand All @@ -30,7 +30,7 @@ def __init__(self, summarizer = transformers.pipeline("summarization", model="fa
self.tokenizer = tokenizer
self.content = ""

def summarize_to_sentence(self, summarizer, sentences, target_size = 1000):
def summarize_to_sentence(self, summarizer, sentences, target_size=1000):
lengths = []
for sentence in sentences:
lengths.append(len(sentence))
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def parse_requirements(file_name: str) -> List[str]:
setuptools.setup(
name="gptcache",
packages=find_packages(),
version="0.1.24",
version="0.1.25",
author="SimFG",
author_email="[email protected]",
description="GPTCache, a powerful caching library that can be used to speed up and lower the cost of chat "
Expand Down
4 changes: 3 additions & 1 deletion tests/unit_tests/embedding/test_rwkv.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import pytest
from gptcache.embedding import Rwkv

from gptcache.adapter.api import _get_model
from gptcache.embedding import Rwkv


@pytest.mark.tags("L2")
def test_rwkv():
Expand Down
3 changes: 3 additions & 0 deletions tests/unit_tests/manager/test_chromadb.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import unittest

import numpy as np
import pytest

from gptcache.manager import VectorBase
from gptcache.manager.vector_data.base import VectorData


@pytest.mark.tags("L2")
class TestChromadb(unittest.TestCase):
def test_normal(self):
db = VectorBase("chromadb", client_settings={}, top_k=3)
Expand Down
11 changes: 7 additions & 4 deletions tests/unit_tests/manager/test_local_index.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
import unittest
import numpy as np
from functools import partial
from tempfile import TemporaryDirectory
from pathlib import Path
from tempfile import TemporaryDirectory

import numpy as np
import pytest

from gptcache.manager.vector_data import VectorBase
from gptcache.manager.vector_data.base import VectorData
from gptcache.manager.vector_data.docarray_index import DocArrayIndex
from gptcache.manager.vector_data.faiss import Faiss
from gptcache.manager.vector_data.hnswlib_store import Hnswlib
from gptcache.manager.vector_data import VectorBase
from gptcache.manager.vector_data.base import VectorData

DIM = 512
MAX_ELEMENTS = 10000
Expand All @@ -30,6 +32,7 @@ def test_faiss(self):
name='faiss', top_k=3, dimension=DIM, index_path=index_path
)

@pytest.mark.tags("L2")
def test_hnswlib(self):
cls = partial(Hnswlib, max_elements=MAX_ELEMENTS, dimension=DIM)
self._internal_test_normal(cls)
Expand Down
3 changes: 3 additions & 0 deletions tests/unit_tests/manager/test_pgvector.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import os
import unittest

import numpy as np
import pytest

from gptcache.manager.vector_data import VectorBase
from gptcache.manager.vector_data.base import VectorData


@pytest.mark.tags("L2")
class TestPgvector(unittest.TestCase):
def test_normal(self):
size = 1000
Expand Down

0 comments on commit 485929a

Please sign in to comment.