Skip to content

Commit

Permalink
Add Cohere model
Browse files Browse the repository at this point in the history
  • Loading branch information
slundberg committed Nov 29, 2023
1 parent a5b5db7 commit 7923cb6
Show file tree
Hide file tree
Showing 7 changed files with 93 additions and 20 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ __pycache__/
.ipynb_checkpoints
node_modules
/client
.eggs/
.eggs/
.env
1 change: 1 addition & 0 deletions guidance/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@
from ._llama_cpp import LlamaCpp, LlamaCppChat
from ._mock import Mock, MockChat
from ._lite_llm import LiteLLMChat, LiteLLMInstruct, LiteLLMCompletion
from ._cohere import CohereCompletion, CohereInstruct
from . import transformers
28 changes: 28 additions & 0 deletions guidance/models/_cohere.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from ._lite_llm import LiteLLM, LiteLLMCompletion, LiteLLMInstruct

class Cohere(LiteLLM):
def __init__(self, model, tokenizer=None, echo=True, caching=True, api_base=None, api_key=None, custom_llm_provider=None, temperature=0.0, max_streaming_tokens=1000, **kwargs):
try:
import tokenizers
except ImportError:
raise Exception("Please install the HuggingFace tokenizers package using `pip install tokenizers -U` in order to use guidance.models.Cohere!")

# get the tokenizer
if tokenizer is None:
try:
tokenizer = tokenizers.Tokenizer.from_pretrained("Cohere/"+model)
except:
tokenizer = tokenizers.Tokenizer.from_pretrained("Cohere/command-nightly")

super().__init__(
model, tokenizer=tokenizer, echo=echo,
caching=caching, temperature=temperature,
max_streaming_tokens=max_streaming_tokens, **kwargs
)


class CohereCompletion(Cohere, LiteLLMCompletion):
pass

class CohereInstruct(Cohere, LiteLLMInstruct):
pass
26 changes: 9 additions & 17 deletions guidance/models/_lite_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from ._model import Chat, Instruct
from ._remote import Remote

# chat_model_pattern = r'^(ft:)?(gpt-3\.5-turbo|gpt-4)((-\w+)+)?(:[\w-]+(?:[:\w-]+)*)?(::\w+)?$'

class LiteLLM(Remote):
def __init__(self, model, tokenizer=None, echo=True, caching=True, api_base=None, api_key=None, custom_llm_provider=None, temperature=0.0, max_streaming_tokens=1000, **kwargs):
Expand All @@ -26,28 +25,25 @@ def __init__(self, model, tokenizer=None, echo=True, caching=True, api_base=None
if self.__class__ is LiteLLM:
raise Exception("The LightLLM class is not meant to be used directly! Please use LiteLLMChat, LiteLLMInstruct, or LiteLLMCompletion depending on the model you are using.")

# # Configure an AsyncOpenAI Client with user params.
# if api_key is None:
# api_key = os.environ.get("OPENAI_API_KEY")

# if organization is None:
# organization = os.environ.get("OPENAI_ORG_ID")

self.litellm = litellm

# self.client = openai_package.OpenAI(api_key=api_key, organization=organization, base_url=base_url)
self.model_name = model

# self.tokenizer = tiktoken.encoding_for_model(model)
# self.eos_token = b"<|endoftext|>"

# we pretend it tokenizes like gpt2 if tiktoken does not know about it... TODO: make this better
if tokenizer is None:
try:
tokenizer = tiktoken.encoding_for_model(model)
except:
tokenizer = tiktoken.get_encoding("gpt2")

super().__init__(
model, tokenizer=tiktoken.encoding_for_model(model), echo=echo,
model, tokenizer=tokenizer, echo=echo,
caching=caching, temperature=temperature,
max_streaming_tokens=max_streaming_tokens, **kwargs
)




class LiteLLMCompletion(LiteLLM, Instruct):

Expand All @@ -69,8 +65,6 @@ def _generator(self, prompt):
raise e

for part in generator:
# chunk = part.choices[0].text or ""
# yield chunk.encode("utf8")
chunk = part.choices[0].delta.content or ""
yield chunk.encode("utf8")

Expand Down Expand Up @@ -115,8 +109,6 @@ def _generator(self, prompt):
raise e

for part in generator:
# chunk = part.choices[0].text or ""
# yield chunk.encode("utf8")
chunk = part.choices[0].delta.content or ""
yield chunk.encode("utf8")

Expand Down
15 changes: 15 additions & 0 deletions guidance/models/_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,21 @@ def __init__(self, model, tokenizer=None, echo=True, caching=True, temperature=0
bos_token_id = tokenizer.bos_token_id
eos_token_id = tokenizer.eos_token_id

# a HuggingFace tokenizers tokenizer was given with id_to_token
elif hasattr(tokenizer, "id_to_token"):
a_token_ids = tokenizer.encode("a").ids
if len(a_token_ids) == 3:
bos_token_id = a_token_ids[0]
a_id = a_token_ids[1]
eos_token_id = a_token_ids[2]
else:
raise Exception("This tokenizer does not seem to have a BOS and EOS, support for this need to be implemented still.")

byte_tokens = [bytes(tokenizer.decode([a_id, i])[1:], encoding="utf8") for i in range(tokenizer.get_vocab_size())]
for i,b in enumerate(byte_tokens):
if b == b'':
byte_tokens[i] = bytes(tokenizer.id_to_token(i), encoding="utf8")

else:
raise Exception("The tokenizer given was not of a recognized type!")

Expand Down
25 changes: 25 additions & 0 deletions tests/models/test_cohere.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import pytest
import guidance
from guidance import gen, capture, select, user, system, assistant, instruction

def test_lite_llm_basic():
try:
lm = guidance.models.CohereCompletion("command-nightly")
except:
pytest.skip("Skipping Cohere test because we can't load the model!")
lm += "Count to 20: 1,2,3,4,"
nl = "\n"
lm += f"""\
5,6,7"""
lm += f"""{gen(max_tokens=1, suffix=nl)}aaaaaa"""
assert str(lm)[-5:] == "aaaaa"

def test_lite_llm_instruct():
try:
lm = guidance.models.CohereInstruct("command-nightly")
except:
pytest.skip("Skipping LiteLLM test because we can't load the model!")
with instruction():
lm += "Count to 20."
lm += gen('val', max_tokens=1)
assert len(lm['val']) > 0
15 changes: 13 additions & 2 deletions tests/models/test_lite_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,23 @@
from guidance import gen, capture, select, user, system, assistant
from ..utils import get_model

def test_lite_llm_basic():
def test_lite_llm_basic_openai():
try:
lm = guidance.models.LiteLLMCompletion("text-curie-001")
except:
pytest.skip("Skipping LiteLLM test because we can't load the model!")
# lm = guidance.models.Transformers("gpt2")
lm += "Count to 20: 1,2,3,4,"
nl = "\n"
lm += f"""\
5,6,7"""
lm += f"""{gen(max_tokens=1, suffix=nl)}aaaaaa"""
assert str(lm)[-5:] == "aaaaa"

def test_lite_llm_basic_cohere():
try:
lm = guidance.models.LiteLLMCompletion("command-nightly")
except:
pytest.skip("Skipping LiteLLM test because we can't load the model!")
lm += "Count to 20: 1,2,3,4,"
nl = "\n"
lm += f"""\
Expand Down

0 comments on commit 7923cb6

Please sign in to comment.