Skip to content

Commit

Permalink
Fix falcon prompt template (#1696)
Browse files Browse the repository at this point in the history
  • Loading branch information
rasbt authored Aug 23, 2024
1 parent e712c68 commit ea01fbc
Showing 1 changed file with 3 additions and 7 deletions.
10 changes: 3 additions & 7 deletions litgpt/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,7 @@ def stop_tokens(self, tokenizer: "Tokenizer") -> Tuple[List[int], ...]:

class Falcon(PromptStyle):
def apply(self, prompt: str, **kwargs: str) -> str:
# First line could be modified. AFAIK Falcon doesn't impose a specific system prompt
# The instruction to not prefix its replies doesn't work always, but better than nothing
# I've also tried just "{prompt}\n" but the model seems to ramble more often
return f"Do not prefix your replies with 'Bot: '\nUser: {prompt}\n"
return f"{prompt}\nAnswer:"

def stop_tokens(self, tokenizer: "Tokenizer") -> Tuple[List[int], ...]:
return (
Expand Down Expand Up @@ -218,14 +215,14 @@ def apply(self, prompt: Union[str, List[Dict[str, str]]], **kwargs: str) -> str:

def encode_header(role: str) -> List[str]:
return [f"<|start_header_id|>{role}<|end_header_id|>\n\n"]

def encode_message(message: Dict[str, str]) -> List[str]:
tokens = encode_header(message["role"])
# NOTE: Meta stripped this. I'm not sure I agree, but who am I to argue?
tokens.append(message["content"].strip())
tokens.append("<|eot_id|>")
return tokens

def has_system_prompt(messages: List[Dict[str, str]]) -> bool:
return messages[0].get("role", "") == "system" if len(messages) else False

Expand Down Expand Up @@ -309,7 +306,6 @@ def apply(self, prompt: str, **kwargs: str) -> str:
return f'<|system|>\nYou are a helpful assistant.<|end|>\n<|user|>\n{prompt}<|end|>\n<|assistant|>\n'



class TinyLlama(PromptStyle):
def apply(self, prompt: str, **kwargs: str) -> str:
return (
Expand Down

0 comments on commit ea01fbc

Please sign in to comment.