Remove the add stop token

ai-hero · Dec 21, 2023 · d3ccd3c · d3ccd3c
1 parent 3ee9f1a
commit d3ccd3c
Show file tree

Hide file tree

Showing 3 changed files with 7 additions and 9 deletions.
diff --git a/app/sft.py b/app/sft.py
@@ -190,9 +190,7 @@ def load_model(config: dict[str, Any]) -> Tuple[AutoModelForCausalLM, AutoTokeni
                 use_cache=False,
                 trust_remote_code=True,
             )
-        tokenizer = AutoTokenizer.from_pretrained(
-            config["model"]["base"]["name"], add_eos_token=True, trust_remote_code=True
-        )
+        tokenizer = AutoTokenizer.from_pretrained(config["model"]["base"]["name"], trust_remote_code=True)
     elif config["model"]["base"]["type"] == "s3":
         # TODO : Add s3 support
         raise NotImplementedError("S3 support not implemented yet")

diff --git a/k8s/configs/llmos_peft.yaml b/k8s/configs/llmos_peft.yaml
@@ -1,5 +1,5 @@
 project:
-  name: "fine_tune_research"
+  name: "llmos"
 
 dataset:
   type: "s3"
@@ -12,7 +12,7 @@ model:
     name: "meta-llama/Llama-2-7b-hf"
   output:
     type: "hf"
-    name: "rparundekar/llama2-7b-mmlu"
+    name: "rparundekar/llama2-7b-llmos"
 
 training:
   trainer:
@@ -25,7 +25,7 @@ training:
     learning_rate: 0.0002
     lr_scheduler_type: "cosine"
     warmup_ratio: 0.1
-    num_train_epochs: 1
+    max_steps: 500
     gradient_accumulation_steps: 4
     gradient_checkpointing: True
     gradient_checkpointing_kwargs:

diff --git a/k8s/configs/llmos_sft.yaml b/k8s/configs/llmos_sft.yaml
@@ -1,5 +1,5 @@
 project:
-  name: "fine_tune_research"
+  name: "llmos"
 
 dataset:
   type: "s3"
@@ -12,7 +12,7 @@ model:
     name: "meta-llama/Llama-2-7b-hf"
   output:
     type: "hf"
-    name: "rparundekar/llama2-7b-mmlu"
+    name: "rparundekar/llama2-7b-llmos"
 
 training:
   trainer:
@@ -25,7 +25,7 @@ training:
     learning_rate: 0.0002
     lr_scheduler_type: "cosine"
     warmup_ratio: 0.1
-    num_train_epochs: 1
+    max_steps: 500
     gradient_accumulation_steps: 4
     gradient_checkpointing: True
     gradient_checkpointing_kwargs: