From 649b43343aa14449605e24c2161c95ad7684adf6 Mon Sep 17 00:00:00 2001 From: skcd Date: Thu, 19 Oct 2023 16:24:32 +0100 Subject: [PATCH] [sidecar] use proper calcluations for constructing the prompt --- src/.DS_Store | Bin 8196 -> 8196 bytes src/agent/search.rs | 72 ++++++++++++++++++++++++------------------ src/indexes/schema.rs | 2 +- 3 files changed, 43 insertions(+), 31 deletions(-) diff --git a/src/.DS_Store b/src/.DS_Store index 64e4d50b69803aa41d5ef2bf41f732bfda810e1b..f922795363630b2a63bed1e54418a8440ea4c110 100644 GIT binary patch delta 50 zcmV-20L}k|K!iY$PXQ^hP`eKSDYFa^y8@H_61=km6$AsZfPewB2N?JSlc5$Dvz8Wd I0kMF60l%^lqyPW_ delta 421 zcmZp1XmOa}RCU^hRb-ew+w-AqQ@45&yyfmrW97%)seEOelr9jGahA(J5wn{q~3B;`OR0~=6vKG^sibfX!&@u^KJFD`&O zV@Fa!PG)h5fx$IKCgyq;RyKAHb`EZi*x-!(^5BxhlG0+Q#G+^rFC;%dCke(*ObW|P zEsqxvan8>xNzBYkEdp!EOi2YQi3!ilOUW;H$}i1JDF*8ehDdO5a&X2ANK{uF8(Qcn z7@1ns>L^rO8X4#)n3$Q=)^d7sh$`z_2gPUS { if let Some(user_selected_context_slice) = user_selected_context { - prompt += "##### SELECTED CODE CONTEXT #####\n"; - for user_selected_context in user_selected_context_slice.iter() { - let snippet = user_selected_context - .data - .lines() - .enumerate() - .map(|(i, line)| { - format!( - "{} {line}\n", - i + user_selected_context.start_line as usize + 1 + let selected_code_context = "##### SELECTED CODE CONTEXT #####\n"; + let selected_code_header_tokens = + bpe.encode_ordinary(&selected_code_context).len(); + if selected_code_header_tokens + >= remaining_prompt_tokens - self.model.prompt_tokens_limit + { + info!("we can't set selected selection because of prompt limit"); + } else { + prompt += "##### SELECTED CODE CONTEXT #####\n"; + remaining_prompt_tokens -= selected_code_header_tokens; + + for user_selected_context in user_selected_context_slice.iter() { + let snippet = user_selected_context + .data + .lines() + .enumerate() + .map(|(i, line)| { + format!( + "{} {line}\n", + i + user_selected_context.start_line as usize + 1 + ) + }) + .collect::(); + + let formatted_string = format!( + "### {} ###\n{snippet}\n\n", + self.get_absolute_path( + self.reporef(), + &user_selected_context.file_path ) - }) - .collect::(); - - let formatted_string = format!( - "### {} ###\n{snippet}\n\n", - self.get_absolute_path( - self.reporef(), - &user_selected_context.file_path - ) - ); - - let snippet_tokens = bpe.encode_ordinary(&formatted_string).len(); - if snippet_tokens - >= remaining_prompt_tokens - self.model.prompt_tokens_limit - { - info!("breaking at {} tokens", remaining_prompt_tokens); - break; - } + ); + + let snippet_tokens = bpe.encode_ordinary(&formatted_string).len(); + if snippet_tokens + >= remaining_prompt_tokens - self.model.prompt_tokens_limit + { + info!("breaking at {} tokens", remaining_prompt_tokens); + break; + } + prompt += &formatted_string; - // Make sure we are always in the context limit - remaining_prompt_tokens -= snippet_tokens; + // Make sure we are always in the context limit + remaining_prompt_tokens -= snippet_tokens; + } } } } diff --git a/src/indexes/schema.rs b/src/indexes/schema.rs index de654dca3..971488dc1 100644 --- a/src/indexes/schema.rs +++ b/src/indexes/schema.rs @@ -192,7 +192,7 @@ impl CodeSnippet { let unique_hash = builder.add_text_field("unique_hash", STRING | STORED); - let repo_disk_path = dbg!(builder.add_text_field("repo_disk_path", STRING)); + let repo_disk_path = builder.add_text_field("repo_disk_path", STRING); let repo_ref = builder.add_text_field("repo_ref", STRING | STORED); let repo_name = builder.add_text_field("repo_name", code_snippet_tokenizer.clone()); let relative_path = builder.add_text_field("relative_path", code_snippet_tokenizer.clone());