Oneflow-Inc · Peterliwenxuan · Mar 26, 2023 · Apr 12, 2023
diff --git a/onebench/codegeex/README.md b/onebench/codegeex/README.md
@@ -0,0 +1,16 @@
+使用说明：
+
+- 在A100机器上运行
+- 在正常terminal中运行bash initialize_docker.sh
+- 在打开的container bash中运行 bash original_script.sh
+- 确保environment.yml文件存在
+
+示例输出，保存在results_table.md中：
+
+| L | OneFlow[Mem(MiB)/Time(s)] | PyTorch[Mem(MiB)/Time(s)] | fastertransformer[Mem(MiB)/Time(s)] |
+| --- | --- | --- | --- |
+| 128 | 25687/0.039 | 26137/0.056 | 26892/2.832 |
+| 256 | 25987/3.035 | 26231/4.364 | 26892/5.421 |
+| 512 | 26707/9.158 | 27194/9.934 | 26892/11.236 |
+| 1024 | 27763/21.968 | 28654/24.382 | 28932/25.541 |
+| 2048 | 33093/50.033 | 34028/58.842 | 30294/56.203 |
diff --git a/onebench/codegeex/environment.yml b/onebench/codegeex/environment.yml
@@ -0,0 +1,82 @@
+name: py37
+channels:
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _openmp_mutex=5.1=1_gnu
+  - ca-certificates=2023.01.10=h06a4308_0
+  - certifi=2022.12.7=py37h06a4308_0
+  - ld_impl_linux-64=2.38=h1181459_1
+  - libffi=3.4.2=h6a678d5_6
+  - libgcc-ng=11.2.0=h1234567_1
+  - libgomp=11.2.0=h1234567_1
+  - libstdcxx-ng=11.2.0=h1234567_1
+  - ncurses=6.4=h6a678d5_0
+  - openssl=1.1.1s=h7f8727e_0
+  - pip=22.3.1=py37h06a4308_0
+  - python=3.7.16=h7a1cb2a_0
+  - readline=8.2=h5eee18b_0
+  - setuptools=65.6.3=py37h06a4308_0
+  - sqlite=3.40.1=h5082296_0
+  - tk=8.6.12=h1ccaba5_0
+  - wheel=0.37.1=pyhd3eb1b0_0
+  - xz=5.2.10=h5eee18b_1
+  - zlib=1.2.13=h5eee18b_0
+  - pip:
+    - backcall==0.2.0
+    - charset-normalizer==3.0.1
+    - cpm-kernels==1.0.11
+    - decorator==5.1.1
+    - deepspeed==0.8.0
+    - filelock==3.9.0
+    - fire==0.5.0
+    - hjson==3.1.0
+    - huggingface-hub==0.12.0
+    - idna==3.4
+    - importlib-metadata==6.0.0
+    - ipython==7.34.0
+    - jedi==0.18.2
+    - markdown-it-py==2.1.0
+    - matplotlib-inline==0.1.6
+    - mdurl==0.1.2
+    - ninja==1.11.1
+    - numpy==1.21.6
+    - nvidia-cublas-cu11==11.10.3.66
+    - nvidia-cuda-nvrtc-cu11==11.7.99
+    - nvidia-cuda-runtime-cu11==11.7.99
+    - nvidia-cudnn-cu11==8.5.0.96
+    - oneflow==0.9.1.dev20230311+cu117
+    - packaging==23.0
+    - pandas==1.3.5
+    - parso==0.8.3
+    - pexpect==4.8.0
+    - pickleshare==0.7.5
+    - pillow==9.4.0
+    - prompt-toolkit==3.0.36
+    - protobuf==3.20.3
+    - psutil==5.9.4
+    - ptyprocess==0.7.0
+    - py-cpuinfo==9.0.0
+    - pydantic==1.10.4
+    - pygments==2.14.0
+    - python-dateutil==2.8.2
+    - pytz==2022.7.1
+    - pyyaml==6.0
+    - pyzmq==25.0.0
+    - regex==2022.10.31
+    - requests==2.28.2
+    - rich==13.3.1
+    - six==1.16.0
+    - termcolor==2.2.0
+    - tokenizers==0.11.4
+    - torch==1.13.1
+    - torchaudio==0.13.1
+    - torchvision==0.14.1
+    - tqdm==4.64.1
+    - traitlets==5.9.0
+    - transformers==4.24.0
+    - typing-extensions==4.4.0
+    - urllib3==1.26.14
+    - wcwidth==0.2.6
+    - zipp==3.13.0
+prefix: /home/oyy/miniconda3/envs/py37
diff --git a/onebench/codegeex/extract_log.py b/onebench/codegeex/extract_log.py
@@ -0,0 +1,65 @@
+import os
+import re
+import numpy as np
+import argparse
+
+def process_logs(log_files_prefix, num_runs, is_faster_transformer=False):
+    memory_usage = []
+    process_code_time = []
+
+    for i in range(1, num_runs + 1):
+        with open(f"{log_files_prefix}_{i}.log", "r") as f:
+            content = f.read()
+
+            mem = re.search(r"\d+/\d+/\d+ \d+:\d+:\d+\.\d+, NVIDIA A100-PCIE-40GB, \d+\.\d+\.\d+, \d+ %, \d+ %, \d+ MiB, \d+ MiB, (\d+) MiB", content)
+            if is_faster_transformer:
+                time = re.search(r"process_code time used (\d+\.\d+)", content)
+            else:
+                time = re.search(r"Total generation time: (\d+\.\d+)", content)
+
+            if mem and time:
+                memory_usage.append(int(mem.group(1)))
+                process_code_time.append(float(time.group(1)))
+
+    return np.mean(memory_usage), np.mean(process_code_time)
+
+def main(logs_path, framework_list):
+    lengths = [128, 256, 512, 1024, 2048]
+    num_runs = 10
+    framework_list = ["oneflow", "pytorch", "faster_transformer"]
+
+    results = {}
+
+    for length in lengths:
+        results[length] = {}
+
+        for framework in framework_list:
+            log_files_prefix = os.path.join(logs_path_dict[framework], f"{length}_{framework}_run")
+            avg_memory, avg_time = process_logs(log_files_prefix, num_runs, is_faster_transformer=(framework == "faster_transformer"))
+            results[length][framework] = (avg_memory, avg_time)
+
+    markdown_table = "| L | OneFlow[Mem(MiB)/Time(s)] | PyTorch[Mem(MiB)/Time(s)] | FasterTransformer[Mem(MiB)/Time(s)] |\n| --- | --- | --- | --- |\n"
+
+    for length, framework_results in results.items():
+        row = f"| {length} | {framework_results['oneflow'][0]:.2f}/{framework_results['oneflow'][1]:.3f} | {framework_results['pytorch'][0]:.2f}/{framework_results['pytorch'][1]:.3f} | {framework_results['faster_transformer'][0]:.2f}/{framework_results['faster_transformer'][1]:.3f} |\n"
+        markdown_table += row
+
+    with open("results_table.md", "w") as f:
+        f.write(markdown_table)
+
+    print(markdown_table)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--oneflow_logs_path", type=str, required=True, help="Path to the OneFlow log files")
+    parser.add_argument("--pytorch_logs_path", type=str, required=True, help="Path to the PyTorch log files")
+    parser.add_argument("--faster_transformer_logs_path", type=str, required=True, help="Path to the FasterTransformer log files")
+    args = parser.parse_args()
+
+    logs_path_dict = {
+        "oneflow": args.oneflow_logs_path,
+        "pytorch": args.pytorch_logs_path,
+        "faster_transformer": args.faster_transformer_logs_path,
+    }
+
+    main(logs_path_dict)
diff --git a/onebench/codegeex/initialize_docker.sh b/onebench/codegeex/initialize_docker.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+DOCKER_IMAGE="nvcr.io/nvidia/pytorch:21.11-py3"
+DOCKER_NAME=$(openssl rand -hex 10)
+PORT=$(shuf -i 8000-9999 -n 1)
+
+docker pull $DOCKER_IMAGE
+docker run -p $PORT:5002 --cpus 12 --gpus '"device=0"' -it -d --ipc=host --name=$DOCKER_NAME -v $(pwd):/workspace $DOCKER_IMAGE
+docker cp /data/home/codegeex_13b.pt $DOCKER_NAME:/workspace/
+docker cp /data/home/ouyangyu/codegeex/codegeex-fastertransformer/codegeex_13b_ft.pt $DOCKER_NAME:/workspace/
+docker exec -it $DOCKER_NAME /bin/bash
diff --git a/onebench/codegeex/original_script.sh b/onebench/codegeex/original_script.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+conda init bash
+source /opt/conda/etc/profile.d/conda.sh
+if conda env list | grep -q '^py37\s'; then
+    echo "Environment 'py37' exists. Activating it now."
+    conda activate py37
+else
+    echo "Environment 'py37' does not exist. Creating it from 'environment.yml'."
+    conda env create -f environment.yml
+    conda activate py37
+fi
+GPU_ID=0
+git clone https://github.com/Oneflow-Inc/one-codegeex.git
+cd one-codegeex
+python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
+pip install -e .
+pip install torch
+pip install --pre oneflow -f https://staging.oneflow.info/branch/master/cu117
+pip install cpm_kernels
+pip install deepspeed
+pip install transformers
+pip install xgboost
+
+echo "sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))" | cat - tests/test_inference.py > temp && mv temp tests/test_inference.py
+echo "sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))" | cat - tests/test_inference_oneflow.py > temp && mv temp tests/test_inference_oneflow.py
+echo "import sys, os" | cat - tests/test_inference.py > temp && mv temp tests/test_inference.py
+echo "import sys, os" | cat - tests/test_inference_oneflow.py > temp && mv temp tests/test_inference_oneflow.py
+cat << 'EOF' > configs/codegeex_13b.sh
+# CodeGeeX-13B configuration
+
+CHECKPOINT_PATH="/workspace/codegeex_13b.pt"
+
+MODEL_ARGS="--num-layers 39 \
+            --hidden-size 5120 \
+            --num-attention-heads 40 \
+            --max-position-embeddings 2048 \
+            --attention-softmax-in-fp32 \
+            --load "$CHECKPOINT_PATH" \
+            --layernorm-epsilon 1e-5 \
+            --fp16 \
+            --ws-encoding-start-id 10 \
+            --ws-encoding-length 10 \
+            --make-vocab-size-divisible-by 52224 \
+            --seq-length 2048"
+EOF
+sed -i 's|default=39,|default=40,|g' tests/test_inference_oneflow.py
+sed -i '129,130s|state_dict.*|pass|g' tests/test_inference_oneflow.py
+sed -i '134s|model.load_state_dict(state_dict)|pass|g' tests/test_inference_oneflow.py
+sed -i '/print(times)/i \    import os\n    cmd = "nvidia-smi --query-gpu=timestamp,name,driver_version,utilization.gpu,utilization.memory,memory.total,memory.free,memory.used --format=csv"\n    os.system(cmd)' tests/test_inference_oneflow.py
+sed -i '/print(times)/i \    import os\n    cmd = "nvidia-smi --query-gpu=timestamp,name,driver_version,utilization.gpu,utilization.memory,memory.total,memory.free,memory.used --format=csv"\n    os.system(cmd)' tests/test_inference.py
+sed -i '326s|break|pass|g' codegeex/oneflow/inference.py
+sed -i 's|--out-seq-length 1024|--out-seq-length $OUTPUT_LEN|g' scripts/test_inference_oneflow.sh
+sed -i '7i OUTPUT_LEN=$3' scripts/test_inference_oneflow.sh
+sed -i 's|--out-seq-length 1024|--out-seq-length $OUTPUT_LEN|g' scripts/test_inference.sh
+sed -i '7i OUTPUT_LEN=$3' scripts/test_inference.sh
+
+for length in 128 256 512 1024 2048
+do
+    script_name="test_inference.sh"
+
+    for i in {1..10}
+    do
+        bash ./scripts/$script_name $GPU_ID ./tests/test_prompt.txt $length 2>&1 | tee ${length}_pytorch_run_${i}.log       
+    done
+    sleep 60
+    script_name="test_inference_oneflow.sh"
+
+    for i in {1..10}
+    do
+        bash ./scripts/$script_name $GPU_ID ./tests/test_prompt.txt $length 2>&1 | tee ${length}_oneflow_run_${i}.log
+    done
+    sleep 60
+
+done
+
+cd ..
+WORK_DIR=$(pwd)
+git clone https://github.com/CodeGeeX/codegeex-fastertransformer.git
+
+cd codegeex-fastertransformer && \
+python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
+pip3 install transformers sentencepiece && \
+sh make_all.sh && \
+python3 api.py --output_len 2048 --ckpt_path /workspace/codegeex_13b_ft.pt --lib_path /workspace/codegeex-fastertransformer/build/lib/libth_codegeex.so &
+FLASK_PID=$!
+for length in 128 256 512 1024 2048
+do 
+    echo "Running for output length: $length"
+    for ((i=1; i<=10; i++)); do
+        echo "Iteration: $i"
+        cd codegeex-fastertransformer && \
+        python3 post.py --output_len $length 2>&1 | tee -a ${length}_faster_transformer_run_${i}.log
+        nvidia-smi --query-gpu=timestamp,name,driver_version,utilization.gpu,utilization.memory,memory.total,memory.free,memory.used --format=csv
+        echo "------------------------$length--------------------------"
+    done
+    sleep 20s
+done
+kill $FLASK_PID