Llama-2 Model Selection

OpenInterpreter · Aug 23, 2023 · ede5564 · ede5564
1 parent a46f97e
commit ede5564
Show file tree

Hide file tree

Showing 2 changed files with 47 additions and 14 deletions.
diff --git a/interpreter/interpreter.py b/interpreter/interpreter.py
@@ -43,8 +43,6 @@
 # Message for when users don't have an OpenAI API key.
 # `---` is at the bottom for aesthetic reasons.
 missing_api_key_message = """
----
-
 🔑 OpenAI API key not found.
 
 To use `GPT-4` (recommended) please provide an OpenAI API key. You can [get one here](https://platform.openai.com/account/api-keys).
@@ -243,7 +241,7 @@ def verify_api_key(self):
         print('', Markdown("# **Welcome to Open Interpreter.**"), '')
         time.sleep(1)
 
-        print(Markdown(missing_api_key_message), '')
+        print(Markdown("---"), '', Markdown(missing_api_key_message), '')
         response = input("OpenAI API key: ")
 
         if response == "":
@@ -252,7 +250,7 @@ def verify_api_key(self):
 
             print(Markdown("> Switching to `Llama-2`...\n\n**Tip:** Run `interpreter --local` to automatically use `Llama-2`."), '')
             time.sleep(2)
-            print(Markdown("---"), '')
+            print(Markdown("---"))
             return
 
         else:

diff --git a/interpreter/llama_2.py b/interpreter/llama_2.py
@@ -1,6 +1,7 @@
 import os
 import sys
 import appdirs
+import inquirer
 import subprocess
 import contextlib
 from rich import print
@@ -9,8 +10,44 @@
 
 def get_llama_2_instance():
 
-    # Define the file name
-    file_name = "llama-2-7b-chat.ggmlv3.q2_K.bin"
+    # First, we ask them which model they want to use.
+    print('', Markdown("Please select a `Llama-2` model."), '')
+
+    models_7b = [
+        # Smallest/Fastest
+        {'Name': 'llama-2-7b-chat.ggmlv3.q2_K.bin', 'Param': '7B', 'Bits': 2, 'Size': '2.87 GB', 'RAM': '5.37 GB', 'Description': 'New k-quant method. Uses GGML_TYPE_Q4_K for the attention.vw and feed_forward.w2 tensors, GGML_TYPE_Q2_K for the other tensors.'},
+        # Middle Ground
+        {'Name': 'llama-2-7b-chat.ggmlv3.q4_1.bin', 'Param': '7B', 'Bits': 4, 'Size': '4.21 GB', 'RAM': '6.71 GB', 'Description': 'Original quant method, 4-bit. Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.'},
+        # Middle Ground
+        {'Name': 'llama-2-7b-chat.ggmlv3.q5_0.bin', 'Param': '7B', 'Bits': 5, 'Size': '4.63 GB', 'RAM': '7.13 GB', 'Description': 'Original quant method, 5-bit. Higher accuracy, higher resource usage and slower inference.'},
+        # Best/Slowest
+        {'Name': 'llama-2-7b-chat.ggmlv3.q8_0.bin', 'Param': '7B', 'Bits': 8, 'Size': '7.16 GB', 'RAM': '9.66 GB', 'Description': 'Original quant method, 8-bit. Almost indistinguishable from float16. High resource use and slow. Not recommended for most users.'}
+    ]
+    models_13b = [
+        # Smallest/Fastest
+        {'Name': 'llama-2-13b-chat.ggmlv3.q2_K.bin', 'Param': '13B', 'Bits': 2, 'Size': '5.51 GB', 'RAM': '8.01 GB', 'Description': 'New k-quant method. Uses GGML_TYPE_Q4_K for the attention.vw and feed_forward.w2 tensors, GGML_TYPE_Q2_K for the other tensors.'},
+        # Middle Ground
+        {'Name': 'llama-2-13b-chat.ggmlv3.q3_K_L.bin', 'Param': '13B', 'Bits': 3, 'Size': '6.93 GB', 'RAM': '9.43 GB', 'Description': 'New k-quant method. Uses GGML_TYPE_Q5_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else GGML_TYPE_Q3_K'},
+        # Middle Ground
+        {'Name': 'llama-2-13b-chat.ggmlv3.q4_1.bin', 'Param': '13B', 'Bits': 4, 'Size': '8.14 GB', 'RAM': '10.64 GB', 'Description': 'Original quant method, 4-bit. Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.'},
+        # Best/Slowest
+        {'Name': 'llama-2-13b-chat.ggmlv3.q8_0.bin', 'Param': '13B', 'Bits': 8, 'Size': '13.83 GB', 'RAM': '16.33 GB', 'Description': 'Original quant method, 8-bit. Almost indistinguishable from float16. High resource use and slow. Not recommended for most users.'}
+    ]
+
+    all_models = models_7b + models_13b
+
+    # Function to format the model choice for display
+    def format_choice(model):
+        return f"{model['Param']} Parameter, {model['Bits']}-Bit | Size: {model['Size']}, RAM usage: {model['RAM']}"
+
+    questions = [
+        inquirer.List('model',
+                      choices=[(format_choice(m), m['Name']) for m in all_models])
+    ]
+
+    answers = inquirer.prompt(questions)
+
+    file_name = answers['model']
 
     # Get user data directory for your application
     user_data_dir = appdirs.user_data_dir("open-interpreter")
@@ -36,12 +73,12 @@ def get_llama_2_instance():
     else:
         # If the file was not found, ask for confirmation to download it
         download_path = os.path.join(default_path, file_name)
-        message = f"Llama-2 not found. Would you like to download the `3GB` file to `{download_path}`?"
+        message = f"This instance of `Llama-2` was not found. Would you like to download it to `{download_path}`?"
         if confirm_action(message):
-            url = "https://huggingface.co/TheBloke/Llama-2-7B-chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q2_K.bin"
+            url = f"https://huggingface.co/TheBloke/Llama-2-7B-chat-GGML/resolve/main/{file_name}"
             subprocess.run(f"curl -L '{url}' -o '{download_path}'", shell=True)
             model_path = download_path
-            print('\n', "Finished downloading Llama-2.", '\n')
+            print('\n', "Finished downloading `Llama-2`.", '\n')
         else:
             print('\n', "Download cancelled. Exiting.", '\n')
             return None
@@ -50,20 +87,18 @@ def get_llama_2_instance():
         from llama_cpp import Llama
     except:
         # Ask for confirmation to install the required pip package
-        message = "Llama-2 interface package not found. Install `llama-cpp-python` package?"
+        message = "`Llama-2` interface package not found. Install `llama-cpp-python` package?"
         if confirm_action(message):
             subprocess.run(["pip", "install", "llama-cpp-python"])
             from llama_cpp import Llama
-            print('', "Finished downloading Llama-2 interface.", '')
+            print('', "Finished downloading `Llama-2` interface.", '')
         else:
             print('', "Installation cancelled. Exiting.", '')
             return None
 
     # Initialize and return Llama-2
     llama_2 = Llama(model_path=model_path)
-
-    # print("\n✅ Llama-2 loaded.", '')
-
+
     return llama_2
 
 def confirm_action(message):