Skip to content

Commit

Permalink
Fix(LLMLingua-2): fix wrong special tokens being used (microsoft#181)
Browse files Browse the repository at this point in the history
  • Loading branch information
cornzz committed Sep 13, 2024
1 parent 2dbdbd3 commit c8709e6
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions llmlingua/prompt_compressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2161,7 +2161,7 @@ def __get_context_prob(
chunk_list.append(c)

dataset = TokenClfDataset(
chunk_list, tokenizer=self.tokenizer, max_len=self.max_seq_len
chunk_list, tokenizer=self.tokenizer, max_len=self.max_seq_len, model_name=self.model_name
)
dataloader = DataLoader(
dataset, batch_size=self.max_batch_size, shuffle=False, drop_last=False
Expand Down Expand Up @@ -2339,7 +2339,7 @@ def split_string_to_words(input_string):
chunk_list.append(c)

dataset = TokenClfDataset(
chunk_list, tokenizer=self.tokenizer, max_len=self.max_seq_len
chunk_list, tokenizer=self.tokenizer, max_len=self.max_seq_len, model_name=self.model_name
)
dataloader = DataLoader(
dataset, batch_size=self.max_batch_size, shuffle=False, drop_last=False
Expand Down

0 comments on commit c8709e6

Please sign in to comment.