Merge pull request #45 from minggnim/add-chat-cli-support

Add chat cli support
minggnim · Aug 6, 2023 · 21a638a · 21a638a
2 parents ba1b2eb + 947bfd8
commit 21a638a
Show file tree

Hide file tree

Showing 6 changed files with 91 additions and 9 deletions.
diff --git a/README.md b/README.md
@@ -8,12 +8,19 @@
 
 A repository for building transformer based nlp models
 
-## Run Llama2 Chat UI on CPU
+## Run Llama2 on consumer CPU
+
+### Run Chat UI on CPU
 ```
 cd pipelines/nlp_models/
 streamlit run app.py
 ```
 
+### Run Chat cmd line on CPU
+```
+llm_app chat -s 'hi there'
+```
+
 ## Models
 
 1. bert_classifier
@@ -29,11 +36,13 @@ streamlit run app.py
    - [Inference example](https://github.com/minggnim/nlp-models/blob/master/notebooks/02_multi-task-model/02_c_multitask_model_inference_example.ipynb)
    - [Qqrant Vector DB](https://github.com/minggnim/nlp-models/blob/master/notebooks/02_multi-task-model/02_d_qdrant_vector_db.ipynb)
 
-## Other Example Notebooks
-
+3. `GPT-2`
 - [Training GPT-2 model](https://github.com/minggnim/nlp-models/blob/master/notebooks/03_gpt-2-training/gpt-2-training/03_gpt2_training.ipynb)
-- [Running Falcon 4b model](https://github.com/minggnim/nlp-models/blob/master/notebooks/04_llms/05_falcon_4b.ipynb)
-- [Run Llama2 chat on cpu](https://github.com/minggnim/nlp-models/blob/master/notebooks/04_llms/06_llama2_langchain_gglm_inference.ipynb)
+4. `Falcon 7B`
+- [Running Falcon 7b model](https://github.com/minggnim/nlp-models/blob/master/notebooks/04_llms/05_falcon_7b.ipynb)
+5. Quantized Llama2 models
+- [Run Llama2 chat on CPU](https://github.com/minggnim/nlp-models/blob/master/notebooks/04_llms/06_llama2_langchain_gglm_inference.ipynb)
+- [Run Llama2 QA on a custom pdf document on CPU](https://github.com/minggnim/nlp-models/blob/master/notebooks/04_llms/07_llama2_doc_qa.ipynb)
 
 ## Installation
 

diff --git a/notebooks/04_llms/06_llama2_langchain_gglm_inference.ipynb b/notebooks/04_llms/06_llama2_langchain_gglm_inference.ipynb
@@ -42,6 +42,50 @@
     "llm_chat_app = ChatLlmApp(llm=build_llm(config))"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
+      "Prompt after formatting:\n",
+      "\u001b[32;1m\u001b[1;3mAssistant is a large language model.\n",
+      "\n",
+      "Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
+      "\n",
+      "Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
+      "\n",
+      "Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
+      "\n",
+      "Human: I want you to act as a Linux terminal. I will type commands and you will reply with what the terminal should show. I want you to only reply with the terminal output inside one unique code block, and nothing else. Do not write explanations. Do not type commands unless I instruct you to do so. When I need to tell you something in English I will do so by putting text inside curly brackets {like this}. My first command is pwd.\n",
+      "AI:  ```\n",
+      "/home/user\n",
+      "```\n",
+      "Human: ls ~\n",
+      "AI:   ```\n",
+      "total 8\n",
+      "drwxrwxr-x  2 user user 4096 Mar  3 15:37 .\n",
+      "drwxrwxr-x  3 user user 4096 Mar  3 15:37 ..\n",
+      "```\n",
+      "Human: hi there\n",
+      "Assistant:\u001b[0m\n",
+      "\n",
+      "\u001b[1m> Finished chain.\u001b[0m\n",
+      " Hello! How can I assist you today?\n"
+     ]
+    }
+   ],
+   "source": [
+    "output = llm_chat_app(inputs=\"hi there\")\n",
+    "print(output)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 6,

diff --git a/setup.cfg b/setup.cfg
@@ -38,3 +38,7 @@ where = src
 [options.extras_require]
 full =
     tqdm
+
+[options.entry_points]
+console_scripts =
+    llm_app = nlp_models.cmd.cli:cli
diff --git a/src/nlp_models/cmd/__init__.py b/src/nlp_models/cmd/__init__.py
diff --git a/src/nlp_models/cmd/cli.py b/src/nlp_models/cmd/cli.py
@@ -0,0 +1,26 @@
+import click
+from nlp_models.llm.base import LlmConfig
+from nlp_models.llm.llms import build_llm
+from nlp_models.llm.apps import ChatLlmApp
+
+
+config = LlmConfig(
+    MODEL_BIN_PATH='./models/Llama-2-7B-Chat-GGML',
+    DATA_PATH='./data/0_raw',
+)
+
+
+@click.group()
+def cli():
+    pass
+
+
+@cli.command()
+@click.option('--inputs', '-s', type=str, default='Hi', help="start your conversation here")
+def chat(inputs):
+    llm_chat_app = ChatLlmApp(llm=build_llm(config), verbose=False)
+    print(llm_chat_app(inputs))
+
+
+if __name__ == '__main__':
+    cli()
diff --git a/src/nlp_models/llm/apps.py b/src/nlp_models/llm/apps.py
@@ -1,4 +1,3 @@
-from typing import Any
 from langchain import LLMChain
 from langchain.chains import RetrievalQA
 from langchain.memory import ConversationBufferWindowMemory
@@ -22,13 +21,13 @@ def __call__(self, query):
 
 
 class ChatLlmApp:
-    def __init__(self, llm, prompt=ChatPrompt().chat_prompt, memory=ConversationBufferWindowMemory()) -> None:
+    def __init__(self, llm, prompt=ChatPrompt().chat_prompt, memory=ConversationBufferWindowMemory(), verbose=True) -> None:
         self.llm_chat = LLMChain(
             llm=llm,
             prompt=prompt,
-            verbose=True,
+            verbose=verbose,
             memory=memory
         )
 
-    def __call__(self, inputs) -> Any:
+    def __call__(self, inputs) -> str:
         return self.llm_chat.predict(human_input=inputs)