Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Synthesizer with custom criteria #771

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 36 additions & 32 deletions deepeval/synthesizer/template.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,46 @@
class SynthesizerTemplate:
@staticmethod
def generate_synthetic_inputs(context, max_goldens_per_context):
def generate_synthetic_inputs(
context, max_goldens_per_context, example=None
):
if example is None:
example = """Example max goldens per context: 2
Example context: ["Einstein won the Nobel Prize for his discovery of penicillin.", "Einstein won the Nobel Prize in 1968."]
Example JSON:
{{
"data": [
{{
"input": "What was Einstein known for?"
}},
{{
"input": "Einstein was a smart guy huh"
}}
]
}}"""

return f"""I want you act as a copywriter. Based on the given context, which is list of strings, please generate a list of JSON objects with a `input` key.
The `input` can either be a question or a statement that can be addressed by the given context.
The `input` can either be a question or a statement that can be addressed by the given context.

**
IMPORTANT: Please make sure to only return in JSON format, with the 'data' key as a list of JSON objects.
You MUST TRY to generate {max_goldens_per_context} data points, unless the `input` is getting reptitive.

Example context: ["Einstein won the Nobel Prize for his discovery of penicillin.", "Einstein won the Nobel Prize in 1968."]
Example max goldens per context: 2
Example JSON:
{{
"data": [
{{
"input": "What was Einstein known for?"
}},
{{
"input": "Einstein was a smart guy huh"
}}
]
}}


You should NOT incorporate any prior knowledge you have and take each context at face value.
You MUST include at least one statement as the input.
`input` MUST be a STRING.
You MUST TRY to generate {max_goldens_per_context} data points, unless the generated `input` is getting reptitive.
**
**
IMPORTANT: Please make sure to only return in JSON format, with the 'data' key as a list of JSON objects.
You MUST TRY to generate {max_goldens_per_context} data points, unless the `input` is getting reptitive.

Max Goldens Per Context:
{max_goldens_per_context}
{example}

Context:
{context}
You should NOT incorporate any prior knowledge you have and take each context at face value.
You MUST include at least one statement as the input.
`input` MUST be a STRING.
You MUST TRY to generate {max_goldens_per_context} data points, unless the generated `input` is getting reptitive.
**

JSON:
"""
Max Goldens Per Context:
{max_goldens_per_context}

Context:
{context}

JSON:
"""

@staticmethod
def generate_synthetic_expected_output(input, context):
Expand Down
230 changes: 230 additions & 0 deletions tests/test_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,233 @@ def test_synthesizer():
# max_goldens_per_document=2,
# )
# dataset.save_as(file_type="json", directory="./results")


###########################################
######### Custom Text to SQL example ######
###########################################
json_customer_table = """{
"name": "customers",
"refSql": "select * from main.customers",
"columns": [
{
"name": "City",
"type": "VARCHAR",
"isCalculated": false,
"notNull": false,
"properties": {
"description": "The Customer City, where the customer company is located. Also called 'customer segment'."
}
},
{
"name": "Id",
"type": "VARCHAR",
"isCalculated": false,
"notNull": false,
"properties": {
"description": "A unique identifier for each customer in the data model."
}
},
{
"name": "State",
"type": "VARCHAR",
"isCalculated": false,
"notNull": false,
"properties": {
"description": "A field indicating the state where the customer is located."
}
},
{
"name": "orders",
"type": "orders",
"relationship": "CustomersOrders",
"isCalculated": false,
"notNull": false,
"properties": {}
},
{
"name": "LatestRecord",
"type": "DATE",
"isCalculated": true,
"expression": "max(orders.PurchaseTimestamp)",
"notNull": false,
"properties": {}
},
{
"name": "FirstRecord",
"type": "DATE",
"isCalculated": true,
"expression": "min(orders.PurchaseTimestamp)",
"notNull": false,
"properties": {}
},
{
"name": "VIP",
"type": "BOOLEAN",
"isCalculated": true,
"expression": "sum(orders.Size) > 2",
"notNull": false,
"properties": {}
},
{
"name": "OrderCount",
"type": "BIGINT",
"isCalculated": true,
"expression": "count(orders.OrderId)",
"notNull": false,
"properties": {}
},
{
"name": "Debit",
"type": "DOUBLE",
"isCalculated": true,
"expression": "sum(orders.OrderBalance)",
"notNull": false,
"properties": {}
},
{
"name": "ReviewRate",
"type": "DOUBLE",
"isCalculated": true,
"expression": "count(orders.IsReviewed = TRUE) / count(DISTINCT orders.OrderId)",
"notNull": false,
"properties": {}
}
],
"primaryKey": "Id",
"cached": false,
"refreshTime": "30.00m",
"properties": {
"schema": "main",
"catalog": "memory",
"description": "A table of customers who have made purchases, including their city"
}
}
"""

schema = """
/* {"schema": "main", "catalog": "memory", "description": "A table of customers who have made purchases, including their city"} */
CREATE TABLE customers (
-- {"description": "The Customer City, where the customer company is located. Also called \'customer segment\'."}
City VARCHAR,
-- {"description": "A unique identifier for each customer in the data model."}
Id VARCHAR PRIMARY KEY,
-- {"description": "A field indicating the state where the customer is located."}
State VARCHAR,
-- This column is a Calculated Field
-- column expression: max(orders.PurchaseTimestamp)
LatestRecord DATE,
-- This column is a Calculated Field
-- column expression: min(orders.PurchaseTimestamp)
FirstRecord DATE,
-- This column is a Calculated Field
-- column expression: sum(orders.Size) > 2
VIP BOOLEAN,
-- This column is a Calculated Field
-- column expression: count(orders.OrderId)
OrderCount BIGINT,
-- This column is a Calculated Field
-- column expression: sum(orders.OrderBalance)
Debit DOUBLE,
-- This column is a Calculated Field
-- column expression: count(orders.IsReviewed = TRUE) / count(DISTINCT orders.OrderId)
ReviewRate DOUBLE
)
"""

function = """
from deepeval.test_case import LLMTestCase
from deepeval.metrics import AnswerRelevancyMetric

def test_llm():
test_case = LLMTestCase()
metric = AnswerRelevancyMetric()
assert_test(test_case, [metric])
"""

synthesizer = Synthesizer()
synthesizer.generate_goldens(
contexts=[[function]],
max_goldens_per_context=5,
include_expected_output=True,
)
synthesizer.save_as(file_type="json", directory="./results")


# for specific use cases, we need the synthesizer to generate inputs that USES
# the context to implicitly generate a specific form of expected output, rather than asking about
# the context itself (since this will always generate text form expected output)

# example:
# TEXT TO SQL:
# context = """
# /* {"schema": "main", "catalog": "memory", "description": "A table of customers who have made purchases, including their city"} */
# CREATE TABLE customers (
# -- {"description": "The Customer City, where the customer company is located. Also called \'customer segment\'."}
# City VARCHAR,
# -- {"description": "A unique identifier for each customer in the data model."}
# Id VARCHAR PRIMARY KEY,
# -- {"description": "A field indicating the state where the customer is located."}
# State VARCHAR,
# -- This column is a Calculated Field
# -- column expression: max(orders.PurchaseTimestamp)
# LatestRecord DATE,
# -- This column is a Calculated Field
# -- column expression: min(orders.PurchaseTimestamp)
# FirstRecord DATE,
# -- This column is a Calculated Field
# -- column expression: sum(orders.Size) > 2
# VIP BOOLEAN,
# -- This column is a Calculated Field
# -- column expression: count(orders.OrderId)
# OrderCount BIGINT,
# -- This column is a Calculated Field
# -- column expression: sum(orders.OrderBalance)
# Debit DOUBLE,
# -- This column is a Calculated Field
# -- column expression: count(orders.IsReviewed = TRUE) / count(DISTINCT orders.OrderId)
# ReviewRate DOUBLE
# )
# """


# Non-use case specific:
# Input: How is the VIP column calculated?
# Expected Output: It is calculated via count(orders.size) > 2
# ^this is non use case specific because for a text-sql use case, we need the
# expected output to be in sql, since we are going to evaluate whether the sql is correct
# but you'll see that the input does not prompt for this. It simply asks about the content of
# the context


# Use case specific:
# Input: How many customers are VIPs?
# Expected Output:
# SELECT COUNT(*) AS VIP_Customers
# FROM customers
# WHERE VIP = TRUE;



# CODING:
# context = """
# from deepeval.test_case import LLMTestCase
# from deepeval.metrics import AnswerRelevancyMetric

# def test_llm():
# test_case = LLMTestCase()
# metric = AnswerRelevancyMetric()
# assert_test(test_case, [metric])
# """


# Non-use case specific:
# input: How does test_llm utilize LLMTestCase and AnswerRelevancyMetric in its assertion process?
# expected output: The `test_llm` function creates instances of `LLMTestCase` and `AnswerRelevancyMetric`,
# then uses these instances in an assertion process through the `assert_test` function.

# Use Case specific:
# input: Implement me LLMTestCase interface. Assume the answer relevancy metric will be used to to
# measure an LLMTestCase based on its input and actual output.
# expected output:
# code here...
Loading