From d6c1324424572f1e2f7dc1bbf8cc40496203ddcc Mon Sep 17 00:00:00 2001
From: Meng Zhang <meng@tabbyml.com>
Date: Thu, 26 Oct 2023 16:46:20 -0700
Subject: [PATCH] chore: update script for ops (#647)

* add llama model converter

* update

* chore: update scripts for ops
---
 experimental/copy-to-modelscope/main.sh       |  18 ++-
 experimental/model-converter/__init__.py      |   0
 experimental/model-converter/args.py          |  27 -----
 experimental/model-converter/main.py          | 106 ------------------
 experimental/model-converter/requirements.txt |   3 -
 .../model-converter/update-llama-model.sh     |  58 ++++++++++
 6 files changed, 71 insertions(+), 141 deletions(-)
 delete mode 100644 experimental/model-converter/__init__.py
 delete mode 100644 experimental/model-converter/args.py
 delete mode 100644 experimental/model-converter/main.py
 delete mode 100644 experimental/model-converter/requirements.txt
 create mode 100755 experimental/model-converter/update-llama-model.sh
diff --git a/experimental/copy-to-modelscope/main.sh b/experimental/copy-to-modelscope/main.sh
index 5f4e3574806..67f52802d4c 100755
--- a/experimental/copy-to-modelscope/main.sh
+++ b/experimental/copy-to-modelscope/main.sh
@@ -13,8 +13,8 @@ if [ -z "${MODEL_ID}" ]; then
   usage
 fi
 
-git clone https://oauth2:${ACCESS_TOKEN}@www.modelscope.cn/$MODEL_ID.git ms_model --depth 1
-git clone https://huggingface.co/$MODEL_ID hf_model --depth 1
+git clone https://oauth2:${ACCESS_TOKEN}@www.modelscope.cn/$MODEL_ID.git ms_model --depth 1 || true
+git clone https://huggingface.co/$MODEL_ID hf_model --depth 1 || true
 
 echo "Sync directory"
 rsync -avh --exclude '.git' --delete hf_model/ ms_model/
@@ -48,12 +48,20 @@ cat <<EOF >ms_model/configuration.json
 }
 EOF
 
+push_origin() {
+git lfs push origin --all
+git push origin
+}
+
 set -x
 pushd ms_model
 git add .
-git commit -m "sync with upstream"
-git lfs push origin
-git push origin
+git commit -m "sync with upstream" || true
+
+while true; do
+	push_origin && break
+done
+
 popd
 
 echo "Success!"
diff --git a/experimental/model-converter/__init__.py b/experimental/model-converter/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/experimental/model-converter/args.py b/experimental/model-converter/args.py
deleted file mode 100644
index 4b2fadb640a..00000000000
--- a/experimental/model-converter/args.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import argparse
-
-
-def make_parser():
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-    parser.add_argument(
-        "--model",
-        required=True,
-        help=(
-            "Name of the pretrained model to download, "
-            "or path to a directory containing the pretrained model."
-        ),
-    )
-    parser.add_argument("--output_dir", required=True, help="Output model directory.")
-    parser.add_argument(
-        "--inference_mode",
-        required=True,
-        choices=["causallm", "seq2seq"],
-        help="Model inference mode. ",
-    )
-    parser.add_argument(
-        "--prompt_template", default=None, help="prompt template for fim"
-    )
-
-    return parser
diff --git a/experimental/model-converter/main.py b/experimental/model-converter/main.py
deleted file mode 100644
index 002048aeb3c..00000000000
--- a/experimental/model-converter/main.py
+++ /dev/null
@@ -1,106 +0,0 @@
-from args import make_parser
-import json
-import os
-import shutil
-
-from ctranslate2.converters.transformers import TransformersConverter
-from huggingface_hub import snapshot_download
-from transformers.convert_slow_tokenizers_checkpoints_to_fast import (
-    convert_slow_checkpoint_to_fast,
-)
-
-
-class InvalidConvertionException(Exception):
-    def __init__(self, *args: object) -> None:
-        super().__init__(*args)
-
-
-def convert_tokenizer():
-    if os.path.exists("./tokenizer.json"):
-        print("found tokenizer.json, skipping tokenizer conversion")
-        return
-
-    # Infer tokenizer name
-    if not os.path.isfile("tokenizer_config.json"):
-        raise InvalidConvertionException(
-            "cannot find tokenizer_config.json, unable to infer tokenizer name"
-        )
-
-    data = {}
-    with open("tokenizer_config.json", "r", encoding="utf-8") as f:
-        data = json.load(f)
-    tokenizer_name = data["tokenizer_class"]
-
-    convert_tmp_dir = "./convert_tmp"
-
-    # Start to convert
-    convert_slow_checkpoint_to_fast(
-        tokenizer_name=tokenizer_name,
-        checkpoint_name="./",
-        dump_path=convert_tmp_dir,
-        force_download=True,
-    )
-
-    # After successful conversion, copy file from ./convert_tmp to ./
-    for root, dirs, files in os.walk(convert_tmp_dir):
-        for f in files:
-            fpath = os.path.join(root, f)
-            shutil.copy2(fpath, "./")
-        for d in dirs:
-            dpath = os.path.join(root, d)
-            shutil.copy2(dpath, "./")
-    shutil.rmtree(convert_tmp_dir)
-
-
-def generate_tabby_json(args):
-    if os.path.exists("./tabby.json"):
-        print("found tabby.json, skipping tabby.json generation")
-        return
-
-    data = {}
-    data["auto_model"] = (
-        "AutoModelForCausalLM"
-        if args.inference_mode == "causallm"
-        else "AutoModelForSeq2SeqLM"
-    )
-    if args.prompt_template:
-        data["prompt_template"] = args.prompt_template
-    with open("tabby.json", "w", encoding="utf-8") as f:
-        json.dump(data, f, indent=4)
-
-
-def main():
-    # Set up args
-    parser = make_parser()
-
-    args = parser.parse_args()
-
-    # Check out model
-    model_path = snapshot_download(
-        repo_id=args.model,
-        cache_dir=args.output_dir,
-        force_download=False,
-    )
-
-    os.chdir(model_path)
-    convert_output_dir = os.path.join(model_path, "ctranslate2")
-
-    # Convert model into ctranslate
-    converter = TransformersConverter(
-        model_name_or_path=model_path,
-        load_as_float16=True,
-        trust_remote_code=True,
-    )
-    converter.convert(
-        output_dir=convert_output_dir, vmap=None, quantization="float16", force=True
-    )
-
-    # Convert model with fast tokenizer
-    convert_tokenizer()
-
-    # Generate tabby.json
-    generate_tabby_json(args)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/experimental/model-converter/requirements.txt b/experimental/model-converter/requirements.txt
deleted file mode 100644
index edf36390324..00000000000
--- a/experimental/model-converter/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-ctranslate2
-huggingface_hub
-transformers
diff --git a/experimental/model-converter/update-llama-model.sh b/experimental/model-converter/update-llama-model.sh
new file mode 100755
index 00000000000..e159587ca35
--- /dev/null
+++ b/experimental/model-converter/update-llama-model.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+set -e
+
+ACCESS_TOKEN=$1
+
+usage() {
+  echo "Usage: $0 <access_token>"
+  exit 1
+}
+
+if [ -z "${ACCESS_TOKEN}" ]; then
+  usage
+fi
+
+prepare_llama_cpp() {
+  git clone https://github.com/ggerganov/llama.cpp.git
+  pushd llama.cpp
+
+  git checkout 6961c4bd0b5176e10ab03b35394f1e9eab761792
+  mkdir build
+  pushd build
+  cmake ..
+  make quantize
+  popd
+  popd
+}
+
+update_model() {
+  MODEL_ID=$1
+
+  git clone https://${ACCESS_TOKEN}@huggingface.co/$MODEL_ID hf_model --depth 1
+
+  pushd hf_model
+  huggingface-cli lfs-enable-largefiles .
+
+  python ../llama.cpp/convert-starcoder-hf-to-gguf.py  . --outfile ./ggml/f16.v2.gguf 1
+  ../llama.cpp/build/bin/quantize ./ggml/f16.v2.gguf ./ggml/q8_0.v2.gguf q8_0
+
+  git add .
+  git commit -m "add ggml model v2"
+  git lfs push origin
+  git push origin
+  popd
+
+  echo "Success!"
+  rm -rf hf_model
+}
+
+set -x
+prepare_llama_cpp || true
+
+# update_model TabbyML/StarCoder-1B
+# update_model TabbyML/StarCoder-3B
+update_model TabbyML/StarCoder-7B
+update_model TabbyML/CodeLlama-7B
+update_model TabbyML/CodeLlama-13B
+update_model TabbyML/Mistral-7B
+update_model TabbyML/WizardCoder-3B