Add Cohere model

guidance-ai · Nov 29, 2023 · 7923cb6 · 7923cb6
1 parent a5b5db7
commit 7923cb6
Show file tree

Hide file tree

Showing 7 changed files with 93 additions and 20 deletions.
diff --git a/.gitignore b/.gitignore
@@ -9,4 +9,5 @@ __pycache__/
 .ipynb_checkpoints
 node_modules
 /client
-.eggs/
+.eggs/
+.env
diff --git a/guidance/models/__init__.py b/guidance/models/__init__.py
@@ -5,4 +5,5 @@
 from ._llama_cpp import LlamaCpp, LlamaCppChat
 from ._mock import Mock, MockChat
 from ._lite_llm import LiteLLMChat, LiteLLMInstruct, LiteLLMCompletion
+from ._cohere import CohereCompletion, CohereInstruct
 from . import transformers
diff --git a/guidance/models/_cohere.py b/guidance/models/_cohere.py
@@ -0,0 +1,28 @@
+from ._lite_llm import LiteLLM, LiteLLMCompletion, LiteLLMInstruct
+
+class Cohere(LiteLLM):
+    def __init__(self, model, tokenizer=None, echo=True, caching=True, api_base=None, api_key=None, custom_llm_provider=None, temperature=0.0, max_streaming_tokens=1000, **kwargs):
+        try:
+            import tokenizers
+        except ImportError:
+            raise Exception("Please install the HuggingFace tokenizers package using `pip install tokenizers -U` in order to use guidance.models.Cohere!")
+
+        # get the tokenizer
+        if tokenizer is None:
+            try:
+                tokenizer = tokenizers.Tokenizer.from_pretrained("Cohere/"+model)
+            except:
+                tokenizer = tokenizers.Tokenizer.from_pretrained("Cohere/command-nightly")
+
+        super().__init__(
+            model, tokenizer=tokenizer, echo=echo,
+            caching=caching, temperature=temperature,
+            max_streaming_tokens=max_streaming_tokens, **kwargs
+        )
+
+
+class CohereCompletion(Cohere, LiteLLMCompletion):
+    pass
+
+class CohereInstruct(Cohere, LiteLLMInstruct):
+    pass
diff --git a/guidance/models/_lite_llm.py b/guidance/models/_lite_llm.py
@@ -13,7 +13,6 @@
 from ._model import Chat, Instruct
 from ._remote import Remote
 
-# chat_model_pattern = r'^(ft:)?(gpt-3\.5-turbo|gpt-4)((-\w+)+)?(:[\w-]+(?:[:\w-]+)*)?(::\w+)?$'
 
 class LiteLLM(Remote):
     def __init__(self, model, tokenizer=None, echo=True, caching=True, api_base=None, api_key=None, custom_llm_provider=None, temperature=0.0, max_streaming_tokens=1000, **kwargs):
@@ -26,28 +25,25 @@ def __init__(self, model, tokenizer=None, echo=True, caching=True, api_base=None
         if self.__class__ is LiteLLM:
             raise Exception("The LightLLM class is not meant to be used directly! Please use LiteLLMChat, LiteLLMInstruct, or LiteLLMCompletion depending on the model you are using.")
 
-        # # Configure an AsyncOpenAI Client with user params.
-        # if api_key is None:
-        #     api_key = os.environ.get("OPENAI_API_KEY")
-
-        # if organization is None:
-        #     organization = os.environ.get("OPENAI_ORG_ID")
 
         self.litellm = litellm
 
         # self.client = openai_package.OpenAI(api_key=api_key, organization=organization, base_url=base_url)
         self.model_name = model
-
-        # self.tokenizer = tiktoken.encoding_for_model(model)
-        # self.eos_token = b"<|endoftext|>"
+
+        # we pretend it tokenizes like gpt2 if tiktoken does not know about it... TODO: make this better
+        if tokenizer is None:
+            try:
+                tokenizer = tiktoken.encoding_for_model(model)
+            except:
+                tokenizer = tiktoken.get_encoding("gpt2")
 
         super().__init__(
-            model, tokenizer=tiktoken.encoding_for_model(model), echo=echo,
+            model, tokenizer=tokenizer, echo=echo,
             caching=caching, temperature=temperature,
             max_streaming_tokens=max_streaming_tokens, **kwargs
         )
-
-
+
 
 class LiteLLMCompletion(LiteLLM, Instruct):
 
@@ -69,8 +65,6 @@ def _generator(self, prompt):
             raise e
 
         for part in generator:
-            # chunk = part.choices[0].text or ""
-            # yield chunk.encode("utf8")
             chunk = part.choices[0].delta.content or ""
             yield chunk.encode("utf8")
 
@@ -115,8 +109,6 @@ def _generator(self, prompt):
             raise e
 
         for part in generator:
-            # chunk = part.choices[0].text or ""
-            # yield chunk.encode("utf8")
             chunk = part.choices[0].delta.content or ""
             yield chunk.encode("utf8")
 

diff --git a/guidance/models/_remote.py b/guidance/models/_remote.py
@@ -51,6 +51,21 @@ def __init__(self, model, tokenizer=None, echo=True, caching=True, temperature=0
             bos_token_id = tokenizer.bos_token_id
             eos_token_id = tokenizer.eos_token_id
 
+        # a HuggingFace tokenizers tokenizer was given with id_to_token
+        elif hasattr(tokenizer, "id_to_token"):
+            a_token_ids = tokenizer.encode("a").ids
+            if len(a_token_ids) == 3:
+                bos_token_id = a_token_ids[0]
+                a_id = a_token_ids[1]
+                eos_token_id = a_token_ids[2]
+            else:
+                raise Exception("This tokenizer does not seem to have a BOS and EOS, support for this need to be implemented still.")
+
+            byte_tokens = [bytes(tokenizer.decode([a_id, i])[1:], encoding="utf8") for i in range(tokenizer.get_vocab_size())]
+            for i,b in enumerate(byte_tokens):
+                if b == b'':
+                    byte_tokens[i] = bytes(tokenizer.id_to_token(i), encoding="utf8")
+
         else:
             raise Exception("The tokenizer given was not of a recognized type!")
 

diff --git a/tests/models/test_cohere.py b/tests/models/test_cohere.py
@@ -0,0 +1,25 @@
+import pytest
+import guidance
+from guidance import gen, capture, select, user, system, assistant, instruction
+
+def test_lite_llm_basic():
+    try:
+        lm = guidance.models.CohereCompletion("command-nightly")
+    except:
+        pytest.skip("Skipping Cohere test because we can't load the model!")
+    lm += "Count to 20: 1,2,3,4,"
+    nl = "\n"
+    lm += f"""\
+    5,6,7"""
+    lm += f"""{gen(max_tokens=1, suffix=nl)}aaaaaa"""
+    assert str(lm)[-5:] == "aaaaa"
+
+def test_lite_llm_instruct():
+    try:
+        lm = guidance.models.CohereInstruct("command-nightly")
+    except:
+        pytest.skip("Skipping LiteLLM test because we can't load the model!")
+    with instruction():
+        lm += "Count to 20."
+    lm += gen('val', max_tokens=1)
+    assert len(lm['val']) > 0
diff --git a/tests/models/test_lite_llm.py b/tests/models/test_lite_llm.py
@@ -3,12 +3,23 @@
 from guidance import gen, capture, select, user, system, assistant
 from ..utils import get_model
 
-def test_lite_llm_basic():
+def test_lite_llm_basic_openai():
     try:
         lm = guidance.models.LiteLLMCompletion("text-curie-001")
     except:
         pytest.skip("Skipping LiteLLM test because we can't load the model!")
-    # lm = guidance.models.Transformers("gpt2")
+    lm += "Count to 20: 1,2,3,4,"
+    nl = "\n"
+    lm += f"""\
+    5,6,7"""
+    lm += f"""{gen(max_tokens=1, suffix=nl)}aaaaaa"""
+    assert str(lm)[-5:] == "aaaaa"
+
+def test_lite_llm_basic_cohere():
+    try:
+        lm = guidance.models.LiteLLMCompletion("command-nightly")
+    except:
+        pytest.skip("Skipping LiteLLM test because we can't load the model!")
     lm += "Count to 20: 1,2,3,4,"
     nl = "\n"
     lm += f"""\
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,4 +9,5 @@ __pycache__/ @@
     .ipynb_checkpoints
     node_modules
     /client
-    .eggs/
+    .eggs/
+    .env