Skip to content

Commit

Permalink
automatically get tokenizer from HF if possible
Browse files Browse the repository at this point in the history
  • Loading branch information
aarora79 committed Oct 1, 2024
1 parent 268c2d7 commit d912923
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 18 deletions.
2 changes: 1 addition & 1 deletion debug.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# 3. Runs fmbench as usual

CONDA_ENV_PATH=$CONDA_PREFIX/lib/python3.11/site-packages
CONFIG_FILE_PATH=src/fmbench/configs/llama3/8b/config-llama3-8b-trn1-32xlarge-triton-djl.yml
CONFIG_FILE_PATH=src/fmbench/configs/llama3/8b/config-ec2-llama3-8b.yml
#src/fmbench/configs/llama3.1/8b/config-llama3.1-8b-trn1-32xl-deploy-tp-8-ec2.yml
#src/fmbench/configs/llama3/8b/config-llama3-8b-trn1-32xlarge-triton-vllm.yml
#src/fmbench/configs/llama3/8b/config-llama3-8b-trn1-32xlarge-triton-vllm.yml
Expand Down
5 changes: 4 additions & 1 deletion src/fmbench/5_model_metric_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1216,13 +1216,16 @@
"# but the facets row col counter starts at bottom left so the col counter is\n",
"# correct but the row counter needs to be inverted\n",
"best_price_perf = df_summary_metrics_dataset_overall.to_dict(orient='records')[0]\n",
"logger.info(f\"best_price_perf={best_price_perf}\")\n",
"best_price_perf_x = best_price_perf['instance_type']\n",
"best_price_perf_y = best_price_perf['price_per_txn']*10000\n",
"best_price_perf_avg_latency = best_price_perf['latency_p95']\n",
"best_price_perf_cost_per_10k = round(best_price_perf['price_per_txn']*10000, 2)\n",
"best_price_perf_instance_type = best_price_perf['instance_type']\n",
"best_price_perf_instance_count = best_price_perf['instance_count']\n",
"best_price_perf_tpm = best_price_perf['transactions_per_minute']\n",
"best_completion_token_count_mean = int(best_price_perf['completion_token_count_mean'])\n",
"best_prompt_token_count_mean = int(best_price_perf['prompt_token_count_mean'])\n",
"concurrencies = np.sort(df_summary_metrics_dataset.concurrency.unique())\n",
"\n",
"concurrencies = list(df_summary_metrics_dataset.concurrency.unique())\n",
Expand Down Expand Up @@ -1267,7 +1270,7 @@
" x=0, \n",
" yref='paper',\n",
" y=-0.15,\n",
" text=f\"*<b>best price|performance</b>: {instance_count_str}{best_price_perf_instance_type}, p95 latency {best_price_perf_avg_latency}s, 10k txn cost ${best_price_perf_cost_per_10k}, transactions/minute {best_price_perf_tpm}.\",\n",
" text=f\"*<b>best price|performance</b>: {instance_count_str}{best_price_perf_instance_type}, p95 latency {best_price_perf_avg_latency}s, 10k txn cost ${best_price_perf_cost_per_10k}, transactions/minute {best_price_perf_tpm}, completion tokens {best_completion_token_count_mean}, prompt tokens {best_prompt_token_count_mean}.\",\n",
")\n",
"fig.show()"
]
Expand Down
9 changes: 1 addition & 8 deletions src/fmbench/configs/llama3/8b/config-ec2-llama3-8b.yml
Original file line number Diff line number Diff line change
Expand Up @@ -112,18 +112,11 @@ datasets:
min_length_in_tokens: 2000
max_length_in_tokens: 3000
payload_file: payload_en_2000-3000.jsonl
- language: en
min_length_in_tokens: 3000
max_length_in_tokens: 4000
payload_file: payload_en_3000-4000.jsonl
- language: en
min_length_in_tokens: 3000
max_length_in_tokens: 3840
payload_file: payload_en_3000-3840.jsonl
- language: en
min_length_in_tokens: 1
max_length_in_tokens: 924
payload_file: payload_en_1-924.jsonl


# While the tests would run on all the datasets
# configured in the experiment entries below but
Expand Down
6 changes: 6 additions & 0 deletions src/fmbench/globals.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,12 @@
# this is for custom tokenizers
TOKENIZER_DIR_S3 = config['s3_read_data']['tokenizer_prefix']
TOKENIZER = 'tokenizer'
# we take the first experiment's model id as the model whose tokenizer is used for
# counting tokens on the dataset. This is done just for ease of coding, this is just
# token counting logic on the client side (does not impact the tokenizer the model uses)
# NOTE: if tokenizer files are provided in the tokenizer directory then they take precedence
# if the files are not present then we load the tokenizer for this model id from Hugging Face
TOKENIZER_MODEL_ID = config['experiments'][0]['model_id']

DEPLOYMENT_SCRIPT_S3 = config['s3_read_data']['scripts_prefix']

Expand Down
2 changes: 1 addition & 1 deletion src/fmbench/prompt_template/prompt_template_llama3.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

You are an assistant for question-answering tasks. Use the following pieces of retrieved context in the section demarcated by "```" to answer the question.
The context may contain multiple question answer pairs as an example. Only answer the final question provided in the question section below.
If you dont know the answer just say that you dont know.
If you dont know the answer just say that you dont know. Be concise with your answer.

```
{context}
Expand Down
28 changes: 21 additions & 7 deletions src/fmbench/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,10 +412,17 @@ class CustomTokenizer:
"""A custom tokenizer class"""
TOKENS: int = 1000
WORDS: int = 750
HF_TOKEN_FNAME: str = os.path.join(os.path.dirname(os.path.realpath(__file__)), "scripts", "hf_token.txt")
if Path(HF_TOKEN_FNAME).is_file() is True:
print(f"{HF_TOKEN_FNAME} file found, going to set HF_TOKEN env var")
HF_TOKEN: str = Path(HF_TOKEN_FNAME).read_text().strip()
os.environ["HF_TOKEN"] = HF_TOKEN
else:
print(f"{HF_TOKEN_FNAME} file not found")

def __init__(self, bucket, prefix, local_dir):
def __init__(self, bucket, prefix, local_dir, model_id):
print(f"CustomTokenizer, based on HF transformers, {bucket} "
f"prefix: {prefix} local_dir: {local_dir}")
f"prefix: {prefix} local_dir: {local_dir}, model_id: {model_id}")
# Check if the tokenizer files exist in s3 and if not, use the autotokenizer
download_multiple_files_from_s3(bucket, prefix, local_dir)
# Load the tokenizer from the local directory
Expand All @@ -426,16 +433,23 @@ def __init__(self, bucket, prefix, local_dir):
if dir_not_empty > 0:
print(f"loading the provided tokenizer from local_dir={local_dir}, abs_path={abs_path}")
self.tokenizer = AutoTokenizer.from_pretrained(local_dir)
print("successfully loaded the tokenizer using AutoTokenizer.from_pretrained")
print(f"successfully loaded the tokenizer using AutoTokenizer.from_pretrained from {local_dir}")
else:
print(f"no tokenizer provided, the {local_dir}, abs_path={abs_path} is empty, "
f"using default tokenizer i.e. {self.WORDS} words = {self.TOKENS} tokens")
self.tokenizer = None
print(f"{local_dir} directory is empty")
try:
print(f"going to download tokenizer from HF for \"{model_id}\"")
self.tokenizer = AutoTokenizer.from_pretrained(model_id)
print(f"successfully loaded the tokenizer using AutoTokenizer.from_pretrained from HF for \"{model_id}\"")
except Exception as e:
print("exception while loading tokenizer from HuggingFace")
print(f"no tokenizer provided, the {local_dir}, abs_path={abs_path} is empty, "
f"using default tokenizer i.e. {self.WORDS} words = {self.TOKENS} tokens")
self.tokenizer = None

def count_tokens(self, text):
if self.tokenizer is not None:
return len(self.tokenizer.encode(text))
else:
return int(math.ceil((self.TOKENS/self.WORDS) * len(text.split())))

_tokenizer = CustomTokenizer(globals.READ_BUCKET_NAME, globals.TOKENIZER_DIR_S3, globals.TOKENIZER)
_tokenizer = CustomTokenizer(globals.READ_BUCKET_NAME, globals.TOKENIZER_DIR_S3, globals.TOKENIZER, globals.TOKENIZER_MODEL_ID)

0 comments on commit d912923

Please sign in to comment.