diff --git a/README.md b/README.md index 952c07782a..f556b6a6e5 100644 --- a/README.md +++ b/README.md @@ -208,7 +208,7 @@ $ xinference registrations | LLM | code-llama-python | ['en'] | ['generate'] | | LLM | falcon | ['en'] | ['embed', 'generate'] | | LLM | falcon-instruct | ['en'] | ['embed', 'chat'] | -| LLM | glaive-coder | ['en'] | ['chat'] | +| LLM | glaive-coder | ['en'] | ['chat'] | | LLM | gpt-2 | ['en'] | ['generate'] | | LLM | internlm-7b | ['en', 'zh'] | ['embed', 'generate'] | | LLM | internlm-chat-7b | ['en', 'zh'] | ['embed', 'chat'] | @@ -217,6 +217,8 @@ $ xinference registrations | LLM | llama-2-chat | ['en'] | ['embed', 'chat'] | | LLM | opt | ['en'] | ['embed', 'generate'] | | LLM | orca | ['en'] | ['embed', 'chat'] | +| LLM | phi1 | ['en'] | ['generate'] | +| LLM | phi1.5 | ['en'] | ['generate'] | | LLM | qwen-chat | ['en', 'zh'] | ['embed', 'chat'] | | LLM | starchat-beta | ['en'] | ['embed', 'chat'] | | LLM | starcoder | ['en'] | ['generate'] | diff --git a/README_ja_JP.md b/README_ja_JP.md index ed9b3ad336..4222c4f376 100644 --- a/README_ja_JP.md +++ b/README_ja_JP.md @@ -196,6 +196,8 @@ $ xinference registrations | LLM | llama-2-chat | ['en'] | ['embed', 'chat'] | | LLM | opt | ['en'] | ['embed', 'generate'] | | LLM | orca | ['en'] | ['embed', 'chat'] | +| LLM | phi1 | ['en'] | ['generate'] | +| LLM | phi1.5 | ['en'] | ['generate'] | | LLM | qwen-chat | ['en', 'zh'] | ['embed', 'chat'] | | LLM | starchat-beta | ['en'] | ['embed', 'chat'] | | LLM | starcoder | ['en'] | ['generate'] | diff --git a/README_zh_CN.md b/README_zh_CN.md index 1dea7d9bd2..53531266ea 100644 --- a/README_zh_CN.md +++ b/README_zh_CN.md @@ -190,7 +190,7 @@ $ xinference registrations | LLM | code-llama-python | ['en'] | ['generate'] | | LLM | falcon | ['en'] | ['embed', 'generate'] | | LLM | falcon-instruct | ['en'] | ['embed', 'chat'] | -| LLM | glaive-coder | ['en'] | ['chat'] | +| LLM | glaive-coder | ['en'] | ['chat'] | | LLM | gpt-2 | ['en'] | ['generate'] | | LLM | internlm-7b | ['en', 'zh'] | ['embed', 'generate'] | | LLM | internlm-chat-7b | ['en', 'zh'] | ['embed', 'chat'] | @@ -199,6 +199,8 @@ $ xinference registrations | LLM | llama-2-chat | ['en'] | ['embed', 'chat'] | | LLM | opt | ['en'] | ['embed', 'generate'] | | LLM | orca | ['en'] | ['embed', 'chat'] | +| LLM | phi1 | ['en'] | ['generate'] | +| LLM | phi1.5 | ['en'] | ['generate'] | | LLM | qwen-chat | ['en', 'zh'] | ['embed', 'chat'] | | LLM | starchat-beta | ['en'] | ['embed', 'chat'] | | LLM | starcoder | ['en'] | ['generate'] | diff --git a/setup.cfg b/setup.cfg index 74e2148c70..2d98c3ebff 100644 --- a/setup.cfg +++ b/setup.cfg @@ -65,7 +65,7 @@ dev = all = ctransformers llama-cpp-python>=0.2.0 - transformers>=4.31.0 + transformers>=4.34.0 torch accelerate>=0.20.3 sentencepiece diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json index 957585f18c..f38b64b5f6 100644 --- a/xinference/model/llm/llm_family.json +++ b/xinference/model/llm/llm_family.json @@ -756,6 +756,56 @@ } ] }, + { + "version": 1, + "context_length": 2048, + "model_name": "phi-1", + "model_lang": [ + "en" + ], + "model_ability": [ + "generate" + ], + "model_description": "Phi-1 is a Transformer based LLM, trained on code and synthetic data for research on model safety.", + "model_specs": [ + { + "model_format": "pytorch", + "model_size_in_billions": 1, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "microsoft/phi-1", + "model_revision": "633bca15031ed2150e5badd538ec29d4e2403138" + } + ] + }, + { + "version": 1, + "context_length": 2048, + "model_name": "phi-1_5", + "model_lang": [ + "en" + ], + "model_ability": [ + "generate" + ], + "model_description": "Phi-1.5 is a Transformer based LLM, trained on additional NLP synthetic texts for research on model safety.", + "model_specs": [ + { + "model_format": "pytorch", + "model_size_in_billions": 1, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "microsoft/phi-1_5", + "model_revision": "b6a7e2fe15c21f5847279f23e280cc5a0e7049ef" + } + ] + }, { "version": 1, "context_length": 2048,