Skip to content

Commit

Permalink
Create finetune.py (#35)
Browse files Browse the repository at this point in the history
* Create finetune.py

This is script 1 out of 2 accompanying my 30Aug24 Fine-Tuning Llama3 article.

* Update finetune.py

Added the Beam command to deploy to Beam as a comment at the top

* Update finetune.py

I added

 Deploy to beam by running `$ beam deploy finetune.py:llama_fine_tune` in the terminal

* Update finetune.py

I fixed and simplified the dataset processing.

* Update finetune.py

Fix paths for the Beam context.

* Update finetune.py

fix comments to reflect Beam context
  • Loading branch information
hassaanQadir authored Sep 5, 2024
1 parent 36b7afd commit 84e75f6
Showing 1 changed file with 99 additions and 0 deletions.
99 changes: 99 additions & 0 deletions finetuning/llama/finetune.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# finetune.py
# Deploy to beam by running `$ python finetune.py` in the terminal
from beam import Volume, Image, function, env

# The mount path is the location on the beam volume that we will access.
MOUNT_PATH = "./llama-ft"
WEIGHT_PATH = "./llama-ft/weights"
DATASET_PATH = "./llama-ft/data"

@function(
secrets=["HF_TOKEN"],
volumes=[Volume(name="llama-ft", mount_path=MOUNT_PATH)],
image=Image(
python_packages=["transformers", "torch", "datasets", "peft", "bitsandbytes"]
),
gpu="A100-40",
cpu=4,
)
def llama_fine_tune():
import os
import torch
from datasets import load_dataset
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
TrainingArguments,
Trainer,
DataCollatorForLanguageModeling,
)
from peft import LoraConfig, get_peft_model, TaskType

os.environ["TOKENIZERS_PARALLELISM"] = "false"

if not torch.cuda.is_available():
return "CUDA is not available"

torch.set_float32_matmul_precision("high")

# Load the Llama3 model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
WEIGHT_PATH, device_map="auto", attn_implementation="eager", use_cache=False
)
tokenizer = AutoTokenizer.from_pretrained(WEIGHT_PATH, use_fast=False)

# Set the pad_token to eos_token
tokenizer.pad_token = tokenizer.eos_token


lora_config = LoraConfig(
r=16,
lora_alpha=32,
target_modules=["q_proj", "v_proj"],
lora_dropout=0.05,
bias="none",
task_type=TaskType.CAUSAL_LM,
)

model = get_peft_model(model, lora_config)

# Load the dataset
dataset = load_dataset(DATASET_PATH)

def prepare_dataset(examples):
return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_dataset = dataset.map(prepare_dataset, batched=True)

training_args = TrainingArguments(
# This output directory is on our mounted volume
output_dir="./llama-ft/llama-finetuned",
num_train_epochs=1,
per_device_train_batch_size=4,
gradient_accumulation_steps=4,
learning_rate=2e-4,
weight_decay=0.01,
logging_steps=10,
save_steps=100,
save_total_limit=3,
fp16=True,
gradient_checkpointing=False,
remove_unused_columns=False,
)

trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_dataset,
data_collator=DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False),
)

trainer.train()

# Saving the LORA model and tokenizer to our mounted volume so that our inference endpoint can access it.
model.save_pretrained("./llama-ft/llama-finetuned")
tokenizer.save_pretrained("./llama-ft/llama-finetuned")


if __name__ == "__main__":\
llama_fine_tune.remote()

0 comments on commit 84e75f6

Please sign in to comment.