Skip to content

Commit

Permalink
fix guidance-ai/guidance#1131 - backtracking+prompt healing
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoskal committed Mar 5, 2025
1 parent 9a9e6c6 commit 3fff351
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 8 deletions.
8 changes: 4 additions & 4 deletions parser/src/earley/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2701,10 +2701,10 @@ impl Parser {
}

pub(crate) fn additional_backtrack(&mut self, n_bytes: usize) {
assert!(self.state.byte_to_token_idx.len() >= n_bytes);
self.state
.byte_to_token_idx
.truncate(self.state.byte_to_token_idx.len() - n_bytes);
// we can be sometimes asked to backtrack more than we have
// in case the prompt was token-healed; see https://github.com/guidance-ai/guidance/issues/1131
let new_len = self.state.byte_to_token_idx.len().saturating_sub(n_bytes);
self.state.byte_to_token_idx.truncate(new_len);
}

pub fn apply_token(&mut self, tok_bytes: &[u8]) -> Result<usize> {
Expand Down
10 changes: 6 additions & 4 deletions parser/src/tokenparser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -513,13 +513,16 @@ impl TokenParser {
backtrack_tokens += 1;
}
assert!(backtrack_tokens > 0);
let additional_backtrack_bytes: usize = (-backtrack_bytes).try_into().unwrap();
let full_backtrack_bytes = backtrack_bytes0 + additional_backtrack_bytes;

let byte_ptr = self.llm_bytes.len() - backtrack_bytes0;
let byte_ptr = self.llm_bytes.len() - full_backtrack_bytes;
infoln!(
self,
"backtrack: {} tokens / {} bytes (deletes: {:?})",
"backtrack: {} tokens / {}+{} bytes (deletes: {:?})",
backtrack_tokens,
backtrack_bytes0,
additional_backtrack_bytes,
String::from_utf8_lossy(&self.llm_bytes[byte_ptr..])
);
self.llm_bytes.truncate(byte_ptr);
Expand All @@ -536,8 +539,7 @@ impl TokenParser {
} else {
// make sure the parser know we actually don't have
// the non-backtracked bytes of backtracked token
self.parser
.additional_backtrack((-backtrack_bytes).try_into().unwrap());
self.parser.additional_backtrack(additional_backtrack_bytes);
}
self.llm_tokens.truncate(token_ptr);
return Ok(backtrack_tokens);
Expand Down
19 changes: 19 additions & 0 deletions sample_parser/tests/test_ll.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
// syntax:
// token separator: ‧
// token disallowed: ✖
// backtrack: 1↶ (one token)
// end of string: ≺EOS≻

use sample_parser::*;
use serde_json::json;

Expand Down Expand Up @@ -406,6 +412,19 @@ fn test_ll_backtrack_stop() {
);
}

#[test]
fn test_ll_stop_heal() {
// https://github.com/guidance-ai/guidance/issues/1131
check_lark_grammar_prompt(
r#"
start: gen "foo"
gen[stop=/"/]: /.*/
"#,
"Hello, text: ",
&["Hello‧,‧ text‧:", " \"", "1↶ foo"],
);
}

#[test]
fn test_llparser() {
check_lark_grammar_prompt(
Expand Down

0 comments on commit 3fff351

Please sign in to comment.