0.1.0 (#1)

* Inital implementation * Include generation script from llama.cpp repo * Include calculator and book examples * Add install instructions to README * Fix package name type
rhohndorf · Apr 2, 2024 · 72e90d5 · 72e90d5
1 parent 1bd6126
commit 72e90d5
Show file tree

Hide file tree

Showing 6 changed files with 1,714 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -1,2 +1,187 @@
-# pydantic-gbnf-gammar-generator
-Generates llama.cpp compatible grammars from pydantic objects
+# Pydantic GBNF Grammar Generator
+
+Pydantic GBNF Grammar Generator facilitates the conversion of Pydantic data models into GBNF grammars.
+This library was created to use it in combination with llama.cpp and is at this point in fact just a repackaged version of some Python scripts contained in the [llama.cpp repository](https://github.com/ggerganov/llama.cpp) to make integration into other projects easier.
+
+## Installation
+The easiest way to install is from PYPI
+```shell
+pip install pydantic-gbnf-grammar-generator
+```
+
+Alternatively, you can install from source
+```shell
+git clone https://github.com/rhohndorf/pydantic-gbnf-gammar-generator.git
+cd pydantic-gbnf-gammar-generator
+pip install -e .
+```
+
+## Usage
+The following example demonstrates the technical usage of the library. All examples can be found in the examples folder.
+To run the examples a llama.cpp server listing on port 8080 is required.
+
+### Structured Output
+
+```Python
+from enum import Enum
+import json
+from typing import Optional, List
+
+from pydantic import BaseModel, Field
+import requests
+
+from pydantic_gbnf_grammar_generator import generate_gbnf_grammar_and_documentation
+
+# Function to get completion on the llama.cpp server with grammar. 
+def create_completion(prompt, grammar):
+    headers = {"Content-Type": "application/json"}
+    data = {"prompt": prompt, "grammar": grammar, "stop": ["<|im_end|>"]}
+
+    response = requests.post("http://127.0.0.1:8080/completion", headers=headers, json=data)
+    data = response.json()
+
+    print(data["content"])
+    return data["content"]
+
+# A example structured output based on pydantic models. The LLM will create an entry for a Book database out of an unstructured text.
+class Category(Enum):
+    """
+    The category of the book.
+    """
+    Fiction = "Fiction"
+    NonFiction = "Non-Fiction"
+
+
+class Book(BaseModel):
+    """
+    Represents an entry about a book.
+    """
+    title: str = Field(..., description="Title of the book.")
+    author: str = Field(..., description="Author of the book.")
+    published_year: Optional[int] = Field(..., description="Publishing year of the book.")
+    keywords: List[str] = Field(..., description="A list of keywords.")
+    category: Category = Field(..., description="Category of the book.")
+    summary: str = Field(..., description="Summary of the book.")
+
+
+# We need no additional parameters other than our list of pydantic models.
+gbnf_grammar, documentation = generate_gbnf_grammar_and_documentation([Book])
+print(gbnf_grammar)
+
+system_message = "You are an advanced AI, tasked to create a dataset entry in JSON for a Book. The following is the expected output model:\n\n" + documentation
+
+text = """The Feynman Lectures on Physics is a physics textbook based on some lectures by Richard Feynman, a Nobel laureate who has sometimes been called "The Great Explainer". The lectures were presented before undergraduate students at the California Institute of Technology (Caltech), during 1961–1963. The book's co-authors are Feynman, Robert B. Leighton, and Matthew Sands."""
+prompt = f"<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{text}<|im_end|>\n<|im_start|>assistant"
+
+text = create_completion(prompt=prompt, grammar=gbnf_grammar)
+
+json_data = json.loads(text)
+
+print(Book(**json_data))
+```
+
+### Function Calling
+
+```Python
+from enum import Enum
+import json
+from typing import Union
+
+from pydantic import BaseModel, Field
+import requests
+
+from pydantic_gbnf_grammar_generator import generate_gbnf_grammar_and_documentation
+
+
+# Function to get completion on the llama.cpp server with grammar.
+def create_completion(prompt, grammar):
+    headers = {"Content-Type": "application/json"}
+    data = {"prompt": prompt, "grammar": grammar, "stop": ["<|im_end|>"]}
+
+    response = requests.post("http://127.0.0.1:8080/completion", headers=headers, json=data)
+    data = response.json()
+
+    print(data["content"])
+    return data["content"]
+
+
+# A function for the agent to send a message to the user.
+class SendMessageToUser(BaseModel):
+    """
+    Send a message to the User.
+    """
+
+    chain_of_thought: str = Field(..., description="Your chain of thought while sending the message.")
+    message: str = Field(..., description="Message you want to send to the user.")
+
+    def run(self):
+        print(self.message)
+
+
+# Enum for the calculator tool.
+class MathOperation(Enum):
+    ADD = "add"
+    SUBTRACT = "subtract"
+    MULTIPLY = "multiply"
+    DIVIDE = "divide"
+
+
+# Simple pydantic calculator tool for the agent that can add, subtract, multiply, and divide. Docstring and description of fields will be used in system prompt.
+class Calculator(BaseModel):
+    """
+    Perform a math operation on two numbers.
+    """
+
+    number_one: Union[int, float] = Field(..., description="First number.")
+    operation: MathOperation = Field(..., description="Math operation to perform.")
+    number_two: Union[int, float] = Field(..., description="Second number.")
+
+    def run(self):
+        if self.operation == MathOperation.ADD:
+            return self.number_one + self.number_two
+        elif self.operation == MathOperation.SUBTRACT:
+            return self.number_one - self.number_two
+        elif self.operation == MathOperation.MULTIPLY:
+            return self.number_one * self.number_two
+        elif self.operation == MathOperation.DIVIDE:
+            return self.number_one / self.number_two
+        else:
+            raise ValueError("Unknown operation.")
+
+
+# Here the grammar gets generated by passing the available function models to generate_gbnf_grammar_and_documentation function. This also generates a documentation usable by the LLM.
+# pydantic_model_list is the list of pydanitc models
+# outer_object_name is an optional name for an outer object around the actual model object. Like a "function" object with "function_parameters" which contains the actual model object. If None, no outer object will be generated
+# outer_object_content is the name of outer object content.
+# model_prefix is the optional prefix for models in the documentation. (Default="Output Model")
+# fields_prefix is the prefix for the model fields in the documentation. (Default="Output Fields")
+gbnf_grammar, documentation = generate_gbnf_grammar_and_documentation(
+    pydantic_model_list=[SendMessageToUser, Calculator],
+    outer_object_name="function",
+    outer_object_content="function_parameters",
+    model_prefix="Function",
+    fields_prefix="Parameters",
+)
+
+print(gbnf_grammar)
+print(documentation)
+
+system_message = (
+    "You are an advanced AI, tasked to assist the user by calling functions in JSON format. The following are the available functions and their parameters and types:\n\n"
+    + documentation
+)
+
+user_message = "What is 42 * 42?"
+prompt = (
+    f"<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{user_message}<|im_end|>\n<|im_start|>assistant"
+)
+
+text = create_completion(prompt=prompt, grammar=gbnf_grammar)
+function_dictionary = json.loads(text)
+if function_dictionary["function"] == "Calculator":
+    function_parameters = {**function_dictionary["function_parameters"]}
+
+    print(Calculator(**function_parameters).run())
+    # This should output: 1764
+
+```
diff --git a/examples/books.py b/examples/books.py
@@ -0,0 +1,56 @@
+from enum import Enum
+import json
+from typing import Optional, List
+
+from pydantic import BaseModel, Field
+import requests
+
+from pydantic_gbnf_grammar_generator import generate_gbnf_grammar_and_documentation
+
+# Function to get completion on the llama.cpp server with grammar.
+def create_completion(prompt, grammar):
+    headers = {"Content-Type": "application/json"}
+    data = {"prompt": prompt, "grammar": grammar, "stop": ["<|im_end|>"]}
+
+    response = requests.post("http://127.0.0.1:8080/completion", headers=headers, json=data)
+    data = response.json()
+
+    print(data["content"])
+    return data["content"]
+
+# A example structured output based on pydantic models. The LLM will create an entry for a Book database out of an unstructured text.
+class Category(Enum):
+    """
+    The category of the book.
+    """
+    Fiction = "Fiction"
+    NonFiction = "Non-Fiction"
+
+
+class Book(BaseModel):
+    """
+    Represents an entry about a book.
+    """
+    title: str = Field(..., description="Title of the book.")
+    author: str = Field(..., description="Author of the book.")
+    published_year: Optional[int] = Field(..., description="Publishing year of the book.")
+    keywords: List[str] = Field(..., description="A list of keywords.")
+    category: Category = Field(..., description="Category of the book.")
+    summary: str = Field(..., description="Summary of the book.")
+
+
+# We need no additional parameters other than our list of pydantic models.
+gbnf_grammar, documentation = generate_gbnf_grammar_and_documentation([Book])
+print(gbnf_grammar)
+
+system_message = "You are an advanced AI, tasked to create a dataset entry in JSON for a Book. The following is the expected output model:\n\n" + documentation
+
+text = """The Feynman Lectures on Physics is a physics textbook based on some lectures by Richard Feynman, a Nobel laureate who has sometimes been called "The Great Explainer". The lectures were presented before undergraduate students at the California Institute of Technology (Caltech), during 1961–1963. The book's co-authors are Feynman, Robert B. Leighton, and Matthew Sands."""
+prompt = f"<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{text}<|im_end|>\n<|im_start|>assistant"
+
+text = create_completion(prompt=prompt, grammar=gbnf_grammar)
+
+json_data = json.loads(text)
+
+print(Book(**json_data))
+
diff --git a/examples/calculator.py b/examples/calculator.py
@@ -0,0 +1,100 @@
+from enum import Enum
+import json
+from typing import Union
+
+from pydantic import BaseModel, Field
+import requests
+
+from pydantic_gbnf_grammar_generator import generate_gbnf_grammar_and_documentation
+
+
+# Function to get completion on the llama.cpp server with grammar.
+def create_completion(prompt, grammar):
+    headers = {"Content-Type": "application/json"}
+    data = {"prompt": prompt, "grammar": grammar, "stop": ["<|im_end|>"]}
+
+    response = requests.post("http://127.0.0.1:8080/completion", headers=headers, json=data)
+    data = response.json()
+
+    print(data["content"])
+    return data["content"]
+
+
+# A function for the agent to send a message to the user.
+class SendMessageToUser(BaseModel):
+    """
+    Send a message to the User.
+    """
+
+    chain_of_thought: str = Field(..., description="Your chain of thought while sending the message.")
+    message: str = Field(..., description="Message you want to send to the user.")
+
+    def run(self):
+        print(self.message)
+
+
+# Enum for the calculator tool.
+class MathOperation(Enum):
+    ADD = "add"
+    SUBTRACT = "subtract"
+    MULTIPLY = "multiply"
+    DIVIDE = "divide"
+
+
+# Simple pydantic calculator tool for the agent that can add, subtract, multiply, and divide. Docstring and description of fields will be used in system prompt.
+class Calculator(BaseModel):
+    """
+    Perform a math operation on two numbers.
+    """
+
+    number_one: Union[int, float] = Field(..., description="First number.")
+    operation: MathOperation = Field(..., description="Math operation to perform.")
+    number_two: Union[int, float] = Field(..., description="Second number.")
+
+    def run(self):
+        if self.operation == MathOperation.ADD:
+            return self.number_one + self.number_two
+        elif self.operation == MathOperation.SUBTRACT:
+            return self.number_one - self.number_two
+        elif self.operation == MathOperation.MULTIPLY:
+            return self.number_one * self.number_two
+        elif self.operation == MathOperation.DIVIDE:
+            return self.number_one / self.number_two
+        else:
+            raise ValueError("Unknown operation.")
+
+
+# Here the grammar gets generated by passing the available function models to generate_gbnf_grammar_and_documentation function. This also generates a documentation usable by the LLM.
+# pydantic_model_list is the list of pydanitc models
+# outer_object_name is an optional name for an outer object around the actual model object. Like a "function" object with "function_parameters" which contains the actual model object. If None, no outer object will be generated
+# outer_object_content is the name of outer object content.
+# model_prefix is the optional prefix for models in the documentation. (Default="Output Model")
+# fields_prefix is the prefix for the model fields in the documentation. (Default="Output Fields")
+gbnf_grammar, documentation = generate_gbnf_grammar_and_documentation(
+    pydantic_model_list=[SendMessageToUser, Calculator],
+    outer_object_name="function",
+    outer_object_content="function_parameters",
+    model_prefix="Function",
+    fields_prefix="Parameters",
+)
+
+print(gbnf_grammar)
+print(documentation)
+
+system_message = (
+    "You are an advanced AI, tasked to assist the user by calling functions in JSON format. The following are the available functions and their parameters and types:\n\n"
+    + documentation
+)
+
+user_message = "What is 42 * 42?"
+prompt = (
+    f"<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{user_message}<|im_end|>\n<|im_start|>assistant"
+)
+
+text = create_completion(prompt=prompt, grammar=gbnf_grammar)
+function_dictionary = json.loads(text)
+if function_dictionary["function"] == "Calculator":
+    function_parameters = {**function_dictionary["function_parameters"]}
+
+    print(Calculator(**function_parameters).run())
+    # This should output: 1764
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,26 @@
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "pydantic_gbnf_gammar_generator"
+version = "0.1.0"
+authors = [
+  { name="Ruben Hohndorf", email="[email protected]" },
+]
+description = "Generates GBNF grammars from pydantic models"
+readme = "README.md"
+requires-python = ">=3.10"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+]
+dependencies = [
+    "pydantic>=2.5.3",
+    "docstring_parser==0.16",
+]
+
+[project.urls]
+Homepage = "https://github.com/rhohndorf/pydantic-gbnf-gammar-generator"
+Issues = "https://github.com/rhohndorf/pydantic-gbnf-gammar-generator/issues"
diff --git a/src/pydantic_gbnf_grammar_generator/__init__.py b/src/pydantic_gbnf_grammar_generator/__init__.py
@@ -0,0 +1,2 @@
+from pydantic_gbnf_grammar_generator.main import *
+
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from pydantic_gbnf_grammar_generator.main import *