IBM · vazirim · Feb 5, 2025 · Jan 6, 2025 · Jan 24, 2025 · Jan 24, 2025
diff --git a/examples/gsm8k/README.md b/examples/gsm8k/README.md
@@ -0,0 +1,2 @@
+To run the programs in this file, you need the gsm8k benchmark available at:
+https://github.com/openai/grade-school-math
diff --git a/examples/gsm8k/math.pdl b/examples/gsm8k/math.pdl
@@ -0,0 +1,86 @@
+defs:
+  RITS_API_KEY:
+    lang: python
+    code: |
+      import os
+      result = os.environ["RITS_API_KEY"]
+  solve:
+    function:
+      question: str 
+    return:
+      lastOf:
+      - "${ question }\n"
+      - model: openai/ibm-granite/granite-3.1-8b-instruct
+        def: solution
+        parameters:
+          api_key: ${RITS_API_KEY}
+          api_base: https://inference-3scale-apicast-production.apps.rits.fmaas.res.ibm.com/granite-3-1-8b-instruct/v1/
+          extra_headers:
+            RITS_API_KEY: ${RITS_API_KEY}
+      - "\nwhat is the final answer from the above solution?\n"
+      - model: openai/ibm-granite/granite-3.1-8b-instruct
+        def: answer
+        spec: {result: float}
+        parser: json
+        parameters:
+          api_key: ${RITS_API_KEY}
+          api_base: https://inference-3scale-apicast-production.apps.rits.fmaas.res.ibm.com/granite-3-1-8b-instruct/v1/
+          extra_headers:
+            RITS_API_KEY: ${RITS_API_KEY}
+      - data:
+          solution: ${ solution }
+          answer: ${ answer } 
+  truth:
+    function: 
+      answer: str 
+    return:
+      lang: python
+      code:
+        |
+        result = float(answer.split("####")[-1].strip().replace("$", "").replace(",", ""))
+  compare:
+    function:
+      ans1: float
+      ans2: float
+    return:
+      lang: python
+      code:
+        |
+        result = float(ans1) == float(ans2) 
+
+
+text:
+- read: ../../../grade-school-math/grade_school_math/data/test.jsonl
+  parser: jsonl
+  contribute: []
+  def: mathdata
+- for:
+    problem: ${ mathdata }
+  repeat:
+    lastOf:
+      - call: ${ solve }
+        args:
+          question: ${ problem.question }
+          pdl_context: []
+        def: solution
+      - def: actual
+        call: ${ truth }
+        args: 
+          answer: ${ problem.answer }
+      - def: compare
+        lang: python
+        code:
+         |
+         result = float(solution["answer"]["result"]) == actual
+      - if: ${ compare }
+        then:
+          data:
+            solution: ${ solution }
+            matches: true
+        else:
+          data:
+            solution: ${ solution }
+            matches: false
+  join:
+    with: "\n"
+
diff --git a/examples/gsm8k/math_no_sd.pdl b/examples/gsm8k/math_no_sd.pdl
@@ -0,0 +1,85 @@
+defs:
+  RITS_API_KEY:
+    lang: python
+    code: |
+      import os
+      result = os.environ["RITS_API_KEY"]
+  solve:
+    function:
+      question: str 
+    return:
+      lastOf:
+      - "${ question }\n"
+      - model: openai/ibm-granite/granite-3.1-8b-instruct
+        def: solution
+        parameters:
+          api_key: ${RITS_API_KEY}
+          api_base: https://inference-3scale-apicast-production.apps.rits.fmaas.res.ibm.com/granite-3-1-8b-instruct/v1/
+          extra_headers:
+            RITS_API_KEY: ${RITS_API_KEY}
+      - def: answer
+        lang: python
+        code: |
+          try:
+            result = float(solution.split("The answer is:")[-1].strip())
+          except: 
+            result = 0
+      - data:
+          solution: ${ solution }
+          answer: 
+            result: ${ answer } 
+  truth:
+    function: 
+      answer: str 
+    return:
+      lang: python
+      code:
+        |
+        result = float(answer.split("####")[-1].strip().replace("$", "").replace(",", ""))
+  compare:
+    function:
+      ans1: float
+      ans2: float
+    return:
+      lang: python
+      code:
+        |
+        result = float(ans1) == float(ans2) 
+
+
+text:
+- read: ../../../grade-school-math/grade_school_math/data/test.jsonl
+  parser: jsonl
+  contribute: []
+  def: mathdata
+- for:
+    problem: ${ mathdata }
+  repeat:
+    lastOf:
+      - call: ${ solve }
+        args:
+          question: ${ problem.question }
+          pdl_context: []
+        def: solution
+      - def: actual
+        call: ${ truth }
+        args: 
+          answer: ${ problem.answer }
+      - def: compare
+        lang: python
+        contribute: []
+        code:
+         |
+         result = float(solution["answer"]["result"]) == actual
+      - if: ${ compare }
+        then:
+          data:
+            solution: ${ solution }
+            matches: true
+        else:
+          data:
+            solution: ${ solution }
+            matches: false
+  join:
+    with: "\n"
+
diff --git a/examples/gsm8k/math_no_sd_out.jsonl b/examples/gsm8k/math_no_sd_out.jsonl
diff --git a/examples/gsm8k/math_out.jsonl b/examples/gsm8k/math_out.jsonl
diff --git a/src/pdl/pdl_dumper.py b/src/pdl/pdl_dumper.py
@@ -282,7 +282,7 @@ def as_json(value: Any) -> JsonType:
 def parser_to_dict(parser: ParserType) -> str | dict[str, Any]:
     p: str | dict[str, Any]
     match parser:
-        case "json" | "yaml":
+        case "json" | "yaml" | "jsonl":
             p = parser
         case RegexParser():
             p = parser.model_dump()

diff --git a/tests/results/examples/chatbot/chatbot.13.result b/tests/results/examples/chatbot/chatbot.13.result
@@ -0,0 +1,2 @@
+What is APR?APR stands for Annual Percentage Rate. It is the annual interest rate charged for borrowing or earned through an investment, and it reflects the cost of a loan on an annual basis, including fees and compounding interest.yes
+
diff --git a/tests/results/examples/demo/4-translator.13.result b/tests/results/examples/demo/4-translator.13.result
@@ -0,0 +1,5 @@
+What is APR?
+APR stands for Annual Percentage Rate. It is the annual interest rate charged for borrowing or earned through an investment, and it represents the actual yearly cost of funds over the term of a loan. It includes any fees or additional costs associated with the transaction.french
+
+Translate the above to french
+"APR" signifie "Taux Annuel Équivalent" en français. Il est le taux d'intérêt annuel chargé pour emprunter ou gagné à travers une investissement, et il représente le coût réel annuer des fonds sur la durée d'un prêt. Il inclut toutes les frais ou coûts supplémentaires associés à la transaction.stop
diff --git a/tests/results/examples/fibonacci/fib.13.result b/tests/results/examples/fibonacci/fib.13.result
@@ -0,0 +1,45 @@
+Here is a simple Python function to compute the Fibonacci sequence:
+
+```python
+def fibonacci(n):
+    if n <= 0:
+        return "Input should be a positive integer."
+    elif n == 1:
+        return 0
+    elif n == 2:
+        return 1
+    else:
+        a, b = 0, 1
+        for _ in range(n - 2):
+            a, b = b, a + b
+        return b
+```
+
+This function takes an integer `n` as input and returns the `n`th number in the Fibonacci sequence. The Fibonacci sequence is a series of numbers in which each number is the sum of the two preceding ones, usually starting with 0 and 1.
+Find a random number between 1 and 20
+15
+Now computing fibonacci(15)
+
+def fibonacci(n):
+    if n <= 0:
+        return "Input should be a positive integer."
+    elif n == 1:
+        return 0
+    elif n == 2:
+        return 1
+    else:
+        a, b = 0, 1
+        for _ in range(n - 2):
+            a, b = b, a + b
+        return b
+The result is: 377
+
+Explain what the above code does and what the result means
+
+The provided code is a Python function named `fibonacci(n)` that calculates the `n`th number in the Fibonacci sequence. The Fibonacci sequence is a series of numbers in which each number is the sum of the two preceding ones, usually starting with 0 and 1.
+
+The function takes an integer `n` as input and returns the `n`th number in the Fibonacci sequence. If the input is not a positive integer, the function returns an error message.
+
+In this case, the function is called with the argument `15`, which means it will calculate the 15th number in the Fibonacci sequence. The result of this calculation is `377`.
+
+So, the number 377 is the 15th number in the Fibonacci sequence.
diff --git a/tests/results/examples/hello/hello-model-chaining.13.result b/tests/results/examples/hello/hello-model-chaining.13.result
@@ -0,0 +1,4 @@
+Hello
+Hello
+Did you say Hello?
+Yes, I did say Hello
diff --git a/tests/results/examples/hello/hello-roles-array.13.result b/tests/results/examples/hello/hello-roles-array.13.result
@@ -0,0 +1,40 @@
+Here is a Python function that implements the merge sort algorithm:
+
+```python
+def merge_sort(arr):
+    # Base case: if the input array has 1 or 0 elements, it's already sorted
+    if len(arr) <= 1:
+        return arr
+
+    # Recursive case: split the array into two halves, sort them, and then merge them
+    mid = len(arr) // 2
+    left_half = arr[:mid]
+    right_half = arr[mid:]
+
+    left_half = merge_sort(left_half)
+    right_half = merge_sort(right_half)
+
+    return merge(left_half, right_half)
+
+def merge(left, right):
+    merged = []
+    left_index = 0
+    right_index = 0
+
+    # Merge the two sorted halves into a single sorted array
+    while left_index < len(left) and right_index < len(right):
+        if left[left_index] < right[right_index]:
+            merged.append(left[left_index])
+            left_index += 1
+        else:
+            merged.append(right[right_index])
+            right_index += 1
+
+    # Add any remaining elements from the left and right halves
+    merged.extend(left[left_index:])
+    merged.extend(right[right_index:])
+
+    return merged
+```
+
+This function first checks if the input array has 1 or 0 elements, in which case it's already sorted. If not, it splits the array into two halves, sorts them recursively, and then merges them back together using the `merge` function. The `merge` function compares elements from the left and right halves and adds them to the `merged` array in sorted order.
diff --git a/tests/results/examples/talk/2-model-chaining.13.result b/tests/results/examples/talk/2-model-chaining.13.result
@@ -0,0 +1,4 @@
+Hello
+Hello
+Did you just say Hello?
+Yes, I did. I'm here to assist you.
diff --git a/tests/results/examples/talk/7-chatbot-roles.13.result b/tests/results/examples/talk/7-chatbot-roles.13.result
@@ -0,0 +1,8 @@
+Type `quit` to exit this chatbot.
+APR stands for Annual Percentage Rate. It is the annual interest rate charged for borrowing or earned through an investment, and it represents the true cost of borrowing or the actual annual return earned from an investment. APR is expressed as a percentage and includes the interest rate, fees, and other charges associated with a loan or investment. It is used to help consumers compare the cost of borrowing or the return on investment between different financial products.
+
+
+
+I'm sorry to see you go. If you have any more questions in the future, don't hesitate to ask. Have a great day!
+
+
diff --git a/tests/results/examples/tutorial/conditionals_loops.13.result b/tests/results/examples/tutorial/conditionals_loops.13.result
@@ -0,0 +1,9 @@
+APR stands for Annual Percentage Rate. It is the annual interest rate charged for borrowing or earned through an investment, and it represents the actual yearly cost of funds over the term of a loan. It includes any fees or additional costs associated with the transaction.Say it as a poemAPR, a number so grand,
+Annual Percentage Rate, in a loan's command.
+It's the yearly cost, fees included,
+In a loan's term, it's the rule, not the exception.
+
+It's the interest rate, in a nutshell,
+For borrowing or investing, it's the tell.
+So, when you see APR, don't be alarmed,
+It's just a number, not a storm.
diff --git a/tests/results/examples/tutorial/grouping_definitions.13.result b/tests/results/examples/tutorial/grouping_definitions.13.result
@@ -0,0 +1,2 @@
+The sentence 'I love Paris!' translates to 'Je t'aime, Paris!' in French.
+The sentence 'I love Madrid!' translates to 'Me encanta Madrid!' in Spanish.
diff --git a/tests/results/examples/tutorial/model_chaining.13.result b/tests/results/examples/tutorial/model_chaining.13.result
@@ -0,0 +1,4 @@
+Hello
+Hello
+Did you just say Hello?
+Yes, I did. I'm here to assist you.
diff --git a/tests/results/examples/tutorial/muting_block_output.13.result b/tests/results/examples/tutorial/muting_block_output.13.result
@@ -0,0 +1 @@
+The french sentence was: The translation of 'I love Paris!' in French is 'Je t'aime, Paris!'
diff --git a/tests/test_examples_run.py b/tests/test_examples_run.py
@@ -13,6 +13,8 @@
 from pdl.pdl_parser import PDLParseError
 
 UPDATE_RESULTS = False
+RESULTS_VERSION = 14
+
 
 TO_SKIP = {
     str(name)
@@ -33,6 +35,10 @@
         pathlib.Path("examples") / "rag" / "rag.pdl",
         pathlib.Path("examples") / "react" / "react_call.pdl",
         pathlib.Path("examples") / "callback" / "repair_prompt.pdl",
+        pathlib.Path("examples") / "gsm8k" / "math.pdl",
+        pathlib.Path("examples") / "gsm8k" / "math_no_sd.pdl",
+        pathlib.Path("pdl-live-react") / "demos" / "error.pdl",
+        pathlib.Path("pdl-live-react") / "demos" / "demo1.pdl",
     ]
 }
 
@@ -183,9 +189,12 @@ def test_valid_programs(capsys: CaptureFixture[str], monkeypatch: MonkeyPatch) -
                     expected_result = str(result_file.read())
                 if str(result).strip() == expected_result.strip():
                     wrong_result = False
+                    break
             if wrong_result:
                 if UPDATE_RESULTS:
-                    result_file_name_0 = pdl_file_name.stem + ".12.result"
+                    result_file_name_0 = (
+                        pdl_file_name.stem + "." + str(RESULTS_VERSION) + ".result"
+                    )
                     result_dir_name.mkdir(parents=True, exist_ok=True)
                     with open(
                         result_dir_name / result_file_name_0, "w", encoding="utf-8"
@@ -200,6 +209,7 @@ def test_valid_programs(capsys: CaptureFixture[str], monkeypatch: MonkeyPatch) -
             if str(pdl_file_name) not in set(str(p) for p in EXPECTED_RUNTIME_ERROR):
                 print(exc)  # unexpected error: breakpoint
             actual_runtime_error |= {str(pdl_file_name)}
+            print(exc)
     # Parse errors
     expected_parse_error = set(str(p) for p in EXPECTED_PARSE_ERROR)
     unexpected_parse_error = sorted(list(actual_parse_error - expected_parse_error))
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		To run the programs in this file, you need the gsm8k benchmark available at:
		https://github.com/openai/grade-school-math
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		What is APR?APR stands for Annual Percentage Rate. It is the annual interest rate charged for borrowing or earned through an investment, and it reflects the cost of a loan on an annual basis, including fees and compounding interest.yes
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		The sentence 'I love Paris!' translates to 'Je t'aime, Paris!' in French.
		The sentence 'I love Madrid!' translates to 'Me encanta Madrid!' in Spanish.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		The french sentence was: The translation of 'I love Paris!' in French is 'Je t'aime, Paris!'