diff --git a/.gitignore b/.gitignore index ea6588bbb..1988256ea 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,5 @@ __pycache__/ .ipynb_checkpoints node_modules /client -.eggs/ \ No newline at end of file +.eggs/ +.env \ No newline at end of file diff --git a/guidance/models/__init__.py b/guidance/models/__init__.py index e0dacb3f0..8b1a7e619 100644 --- a/guidance/models/__init__.py +++ b/guidance/models/__init__.py @@ -5,4 +5,5 @@ from ._llama_cpp import LlamaCpp, LlamaCppChat from ._mock import Mock, MockChat from ._lite_llm import LiteLLMChat, LiteLLMInstruct, LiteLLMCompletion +from ._cohere import CohereCompletion, CohereInstruct from . import transformers \ No newline at end of file diff --git a/guidance/models/_cohere.py b/guidance/models/_cohere.py new file mode 100644 index 000000000..6dd8a18ed --- /dev/null +++ b/guidance/models/_cohere.py @@ -0,0 +1,28 @@ +from ._lite_llm import LiteLLM, LiteLLMCompletion, LiteLLMInstruct + +class Cohere(LiteLLM): + def __init__(self, model, tokenizer=None, echo=True, caching=True, api_base=None, api_key=None, custom_llm_provider=None, temperature=0.0, max_streaming_tokens=1000, **kwargs): + try: + import tokenizers + except ImportError: + raise Exception("Please install the HuggingFace tokenizers package using `pip install tokenizers -U` in order to use guidance.models.Cohere!") + + # get the tokenizer + if tokenizer is None: + try: + tokenizer = tokenizers.Tokenizer.from_pretrained("Cohere/"+model) + except: + tokenizer = tokenizers.Tokenizer.from_pretrained("Cohere/command-nightly") + + super().__init__( + model, tokenizer=tokenizer, echo=echo, + caching=caching, temperature=temperature, + max_streaming_tokens=max_streaming_tokens, **kwargs + ) + + +class CohereCompletion(Cohere, LiteLLMCompletion): + pass + +class CohereInstruct(Cohere, LiteLLMInstruct): + pass \ No newline at end of file diff --git a/guidance/models/_lite_llm.py b/guidance/models/_lite_llm.py index 6e3bbb505..c050ceea9 100644 --- a/guidance/models/_lite_llm.py +++ b/guidance/models/_lite_llm.py @@ -13,7 +13,6 @@ from ._model import Chat, Instruct from ._remote import Remote -# chat_model_pattern = r'^(ft:)?(gpt-3\.5-turbo|gpt-4)((-\w+)+)?(:[\w-]+(?:[:\w-]+)*)?(::\w+)?$' class LiteLLM(Remote): def __init__(self, model, tokenizer=None, echo=True, caching=True, api_base=None, api_key=None, custom_llm_provider=None, temperature=0.0, max_streaming_tokens=1000, **kwargs): @@ -26,28 +25,25 @@ def __init__(self, model, tokenizer=None, echo=True, caching=True, api_base=None if self.__class__ is LiteLLM: raise Exception("The LightLLM class is not meant to be used directly! Please use LiteLLMChat, LiteLLMInstruct, or LiteLLMCompletion depending on the model you are using.") - # # Configure an AsyncOpenAI Client with user params. - # if api_key is None: - # api_key = os.environ.get("OPENAI_API_KEY") - - # if organization is None: - # organization = os.environ.get("OPENAI_ORG_ID") self.litellm = litellm # self.client = openai_package.OpenAI(api_key=api_key, organization=organization, base_url=base_url) self.model_name = model - - # self.tokenizer = tiktoken.encoding_for_model(model) - # self.eos_token = b"<|endoftext|>" + + # we pretend it tokenizes like gpt2 if tiktoken does not know about it... TODO: make this better + if tokenizer is None: + try: + tokenizer = tiktoken.encoding_for_model(model) + except: + tokenizer = tiktoken.get_encoding("gpt2") super().__init__( - model, tokenizer=tiktoken.encoding_for_model(model), echo=echo, + model, tokenizer=tokenizer, echo=echo, caching=caching, temperature=temperature, max_streaming_tokens=max_streaming_tokens, **kwargs ) - - + class LiteLLMCompletion(LiteLLM, Instruct): @@ -69,8 +65,6 @@ def _generator(self, prompt): raise e for part in generator: - # chunk = part.choices[0].text or "" - # yield chunk.encode("utf8") chunk = part.choices[0].delta.content or "" yield chunk.encode("utf8") @@ -115,8 +109,6 @@ def _generator(self, prompt): raise e for part in generator: - # chunk = part.choices[0].text or "" - # yield chunk.encode("utf8") chunk = part.choices[0].delta.content or "" yield chunk.encode("utf8") diff --git a/guidance/models/_remote.py b/guidance/models/_remote.py index 5740b6ebf..33884a630 100644 --- a/guidance/models/_remote.py +++ b/guidance/models/_remote.py @@ -51,6 +51,21 @@ def __init__(self, model, tokenizer=None, echo=True, caching=True, temperature=0 bos_token_id = tokenizer.bos_token_id eos_token_id = tokenizer.eos_token_id + # a HuggingFace tokenizers tokenizer was given with id_to_token + elif hasattr(tokenizer, "id_to_token"): + a_token_ids = tokenizer.encode("a").ids + if len(a_token_ids) == 3: + bos_token_id = a_token_ids[0] + a_id = a_token_ids[1] + eos_token_id = a_token_ids[2] + else: + raise Exception("This tokenizer does not seem to have a BOS and EOS, support for this need to be implemented still.") + + byte_tokens = [bytes(tokenizer.decode([a_id, i])[1:], encoding="utf8") for i in range(tokenizer.get_vocab_size())] + for i,b in enumerate(byte_tokens): + if b == b'': + byte_tokens[i] = bytes(tokenizer.id_to_token(i), encoding="utf8") + else: raise Exception("The tokenizer given was not of a recognized type!") diff --git a/tests/models/test_cohere.py b/tests/models/test_cohere.py new file mode 100644 index 000000000..e2f8e26d0 --- /dev/null +++ b/tests/models/test_cohere.py @@ -0,0 +1,25 @@ +import pytest +import guidance +from guidance import gen, capture, select, user, system, assistant, instruction + +def test_lite_llm_basic(): + try: + lm = guidance.models.CohereCompletion("command-nightly") + except: + pytest.skip("Skipping Cohere test because we can't load the model!") + lm += "Count to 20: 1,2,3,4," + nl = "\n" + lm += f"""\ + 5,6,7""" + lm += f"""{gen(max_tokens=1, suffix=nl)}aaaaaa""" + assert str(lm)[-5:] == "aaaaa" + +def test_lite_llm_instruct(): + try: + lm = guidance.models.CohereInstruct("command-nightly") + except: + pytest.skip("Skipping LiteLLM test because we can't load the model!") + with instruction(): + lm += "Count to 20." + lm += gen('val', max_tokens=1) + assert len(lm['val']) > 0 \ No newline at end of file diff --git a/tests/models/test_lite_llm.py b/tests/models/test_lite_llm.py index 1dca04297..c0e618038 100644 --- a/tests/models/test_lite_llm.py +++ b/tests/models/test_lite_llm.py @@ -3,12 +3,23 @@ from guidance import gen, capture, select, user, system, assistant from ..utils import get_model -def test_lite_llm_basic(): +def test_lite_llm_basic_openai(): try: lm = guidance.models.LiteLLMCompletion("text-curie-001") except: pytest.skip("Skipping LiteLLM test because we can't load the model!") - # lm = guidance.models.Transformers("gpt2") + lm += "Count to 20: 1,2,3,4," + nl = "\n" + lm += f"""\ + 5,6,7""" + lm += f"""{gen(max_tokens=1, suffix=nl)}aaaaaa""" + assert str(lm)[-5:] == "aaaaa" + +def test_lite_llm_basic_cohere(): + try: + lm = guidance.models.LiteLLMCompletion("command-nightly") + except: + pytest.skip("Skipping LiteLLM test because we can't load the model!") lm += "Count to 20: 1,2,3,4," nl = "\n" lm += f"""\