Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Codegeex revised #72

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions onebench/codegeex/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
使用说明:

- 在A100机器上运行
- 在正常terminal中运行bash initialize_docker.sh
- 在打开的container bash中运行 bash original_script.sh
- 确保environment.yml文件存在

示例输出,保存在results_table.md中:

| L | OneFlow[Mem(MiB)/Time(s)] | PyTorch[Mem(MiB)/Time(s)] | fastertransformer[Mem(MiB)/Time(s)] |
| --- | --- | --- | --- |
| 128 | 25687/0.039 | 26137/0.056 | 26892/2.832 |
| 256 | 25987/3.035 | 26231/4.364 | 26892/5.421 |
| 512 | 26707/9.158 | 27194/9.934 | 26892/11.236 |
| 1024 | 27763/21.968 | 28654/24.382 | 28932/25.541 |
| 2048 | 33093/50.033 | 34028/58.842 | 30294/56.203 |
82 changes: 82 additions & 0 deletions onebench/codegeex/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
name: py37
channels:
- defaults
dependencies:
- _libgcc_mutex=0.1=main
- _openmp_mutex=5.1=1_gnu
- ca-certificates=2023.01.10=h06a4308_0
- certifi=2022.12.7=py37h06a4308_0
- ld_impl_linux-64=2.38=h1181459_1
- libffi=3.4.2=h6a678d5_6
- libgcc-ng=11.2.0=h1234567_1
- libgomp=11.2.0=h1234567_1
- libstdcxx-ng=11.2.0=h1234567_1
- ncurses=6.4=h6a678d5_0
- openssl=1.1.1s=h7f8727e_0
- pip=22.3.1=py37h06a4308_0
- python=3.7.16=h7a1cb2a_0
- readline=8.2=h5eee18b_0
- setuptools=65.6.3=py37h06a4308_0
- sqlite=3.40.1=h5082296_0
- tk=8.6.12=h1ccaba5_0
- wheel=0.37.1=pyhd3eb1b0_0
- xz=5.2.10=h5eee18b_1
- zlib=1.2.13=h5eee18b_0
- pip:
- backcall==0.2.0
- charset-normalizer==3.0.1
- cpm-kernels==1.0.11
- decorator==5.1.1
- deepspeed==0.8.0
- filelock==3.9.0
- fire==0.5.0
- hjson==3.1.0
- huggingface-hub==0.12.0
- idna==3.4
- importlib-metadata==6.0.0
- ipython==7.34.0
- jedi==0.18.2
- markdown-it-py==2.1.0
- matplotlib-inline==0.1.6
- mdurl==0.1.2
- ninja==1.11.1
- numpy==1.21.6
- nvidia-cublas-cu11==11.10.3.66
- nvidia-cuda-nvrtc-cu11==11.7.99
- nvidia-cuda-runtime-cu11==11.7.99
- nvidia-cudnn-cu11==8.5.0.96
- oneflow==0.9.1.dev20230311+cu117
- packaging==23.0
- pandas==1.3.5
- parso==0.8.3
- pexpect==4.8.0
- pickleshare==0.7.5
- pillow==9.4.0
- prompt-toolkit==3.0.36
- protobuf==3.20.3
- psutil==5.9.4
- ptyprocess==0.7.0
- py-cpuinfo==9.0.0
- pydantic==1.10.4
- pygments==2.14.0
- python-dateutil==2.8.2
- pytz==2022.7.1
- pyyaml==6.0
- pyzmq==25.0.0
- regex==2022.10.31
- requests==2.28.2
- rich==13.3.1
- six==1.16.0
- termcolor==2.2.0
- tokenizers==0.11.4
- torch==1.13.1
- torchaudio==0.13.1
- torchvision==0.14.1
- tqdm==4.64.1
- traitlets==5.9.0
- transformers==4.24.0
- typing-extensions==4.4.0
- urllib3==1.26.14
- wcwidth==0.2.6
- zipp==3.13.0
prefix: /home/oyy/miniconda3/envs/py37
65 changes: 65 additions & 0 deletions onebench/codegeex/extract_log.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import os
import re
import numpy as np
import argparse

def process_logs(log_files_prefix, num_runs, is_faster_transformer=False):
memory_usage = []
process_code_time = []

for i in range(1, num_runs + 1):
with open(f"{log_files_prefix}_{i}.log", "r") as f:
content = f.read()

mem = re.search(r"\d+/\d+/\d+ \d+:\d+:\d+\.\d+, NVIDIA A100-PCIE-40GB, \d+\.\d+\.\d+, \d+ %, \d+ %, \d+ MiB, \d+ MiB, (\d+) MiB", content)
if is_faster_transformer:
time = re.search(r"process_code time used (\d+\.\d+)", content)
else:
time = re.search(r"Total generation time: (\d+\.\d+)", content)

if mem and time:
memory_usage.append(int(mem.group(1)))
process_code_time.append(float(time.group(1)))

return np.mean(memory_usage), np.mean(process_code_time)

def main(logs_path, framework_list):
lengths = [128, 256, 512, 1024, 2048]
num_runs = 10
framework_list = ["oneflow", "pytorch", "faster_transformer"]

results = {}

for length in lengths:
results[length] = {}

for framework in framework_list:
log_files_prefix = os.path.join(logs_path_dict[framework], f"{length}_{framework}_run")
avg_memory, avg_time = process_logs(log_files_prefix, num_runs, is_faster_transformer=(framework == "faster_transformer"))
results[length][framework] = (avg_memory, avg_time)

markdown_table = "| L | OneFlow[Mem(MiB)/Time(s)] | PyTorch[Mem(MiB)/Time(s)] | FasterTransformer[Mem(MiB)/Time(s)] |\n| --- | --- | --- | --- |\n"

for length, framework_results in results.items():
row = f"| {length} | {framework_results['oneflow'][0]:.2f}/{framework_results['oneflow'][1]:.3f} | {framework_results['pytorch'][0]:.2f}/{framework_results['pytorch'][1]:.3f} | {framework_results['faster_transformer'][0]:.2f}/{framework_results['faster_transformer'][1]:.3f} |\n"
markdown_table += row

with open("results_table.md", "w") as f:
f.write(markdown_table)

print(markdown_table)

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--oneflow_logs_path", type=str, required=True, help="Path to the OneFlow log files")
parser.add_argument("--pytorch_logs_path", type=str, required=True, help="Path to the PyTorch log files")
parser.add_argument("--faster_transformer_logs_path", type=str, required=True, help="Path to the FasterTransformer log files")
args = parser.parse_args()

logs_path_dict = {
"oneflow": args.oneflow_logs_path,
"pytorch": args.pytorch_logs_path,
"faster_transformer": args.faster_transformer_logs_path,
}

main(logs_path_dict)
11 changes: 11 additions & 0 deletions onebench/codegeex/initialize_docker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash

DOCKER_IMAGE="nvcr.io/nvidia/pytorch:21.11-py3"
DOCKER_NAME=$(openssl rand -hex 10)
PORT=$(shuf -i 8000-9999 -n 1)

docker pull $DOCKER_IMAGE
docker run -p $PORT:5002 --cpus 12 --gpus '"device=0"' -it -d --ipc=host --name=$DOCKER_NAME -v $(pwd):/workspace $DOCKER_IMAGE
docker cp /data/home/codegeex_13b.pt $DOCKER_NAME:/workspace/
docker cp /data/home/ouyangyu/codegeex/codegeex-fastertransformer/codegeex_13b_ft.pt $DOCKER_NAME:/workspace/
docker exec -it $DOCKER_NAME /bin/bash
98 changes: 98 additions & 0 deletions onebench/codegeex/original_script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#!/bin/bash
conda init bash
source /opt/conda/etc/profile.d/conda.sh
if conda env list | grep -q '^py37\s'; then
echo "Environment 'py37' exists. Activating it now."
conda activate py37
else
echo "Environment 'py37' does not exist. Creating it from 'environment.yml'."
conda env create -f environment.yml
conda activate py37
fi
GPU_ID=0
git clone https://github.com/Oneflow-Inc/one-codegeex.git
cd one-codegeex
python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
pip install -e .
pip install torch
pip install --pre oneflow -f https://staging.oneflow.info/branch/master/cu117
pip install cpm_kernels
pip install deepspeed
pip install transformers
pip install xgboost

echo "sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))" | cat - tests/test_inference.py > temp && mv temp tests/test_inference.py
echo "sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))" | cat - tests/test_inference_oneflow.py > temp && mv temp tests/test_inference_oneflow.py
echo "import sys, os" | cat - tests/test_inference.py > temp && mv temp tests/test_inference.py
echo "import sys, os" | cat - tests/test_inference_oneflow.py > temp && mv temp tests/test_inference_oneflow.py
cat << 'EOF' > configs/codegeex_13b.sh
# CodeGeeX-13B configuration

CHECKPOINT_PATH="/workspace/codegeex_13b.pt"

MODEL_ARGS="--num-layers 39 \
--hidden-size 5120 \
--num-attention-heads 40 \
--max-position-embeddings 2048 \
--attention-softmax-in-fp32 \
--load "$CHECKPOINT_PATH" \
--layernorm-epsilon 1e-5 \
--fp16 \
--ws-encoding-start-id 10 \
--ws-encoding-length 10 \
--make-vocab-size-divisible-by 52224 \
--seq-length 2048"
EOF
sed -i 's|default=39,|default=40,|g' tests/test_inference_oneflow.py
sed -i '129,130s|state_dict.*|pass|g' tests/test_inference_oneflow.py
sed -i '134s|model.load_state_dict(state_dict)|pass|g' tests/test_inference_oneflow.py
sed -i '/print(times)/i \ import os\n cmd = "nvidia-smi --query-gpu=timestamp,name,driver_version,utilization.gpu,utilization.memory,memory.total,memory.free,memory.used --format=csv"\n os.system(cmd)' tests/test_inference_oneflow.py
sed -i '/print(times)/i \ import os\n cmd = "nvidia-smi --query-gpu=timestamp,name,driver_version,utilization.gpu,utilization.memory,memory.total,memory.free,memory.used --format=csv"\n os.system(cmd)' tests/test_inference.py
sed -i '326s|break|pass|g' codegeex/oneflow/inference.py
sed -i 's|--out-seq-length 1024|--out-seq-length $OUTPUT_LEN|g' scripts/test_inference_oneflow.sh
sed -i '7i OUTPUT_LEN=$3' scripts/test_inference_oneflow.sh
sed -i 's|--out-seq-length 1024|--out-seq-length $OUTPUT_LEN|g' scripts/test_inference.sh
sed -i '7i OUTPUT_LEN=$3' scripts/test_inference.sh

for length in 128 256 512 1024 2048
do
script_name="test_inference.sh"

for i in {1..10}
do
bash ./scripts/$script_name $GPU_ID ./tests/test_prompt.txt $length 2>&1 | tee ${length}_pytorch_run_${i}.log
done
sleep 60
script_name="test_inference_oneflow.sh"

for i in {1..10}
do
bash ./scripts/$script_name $GPU_ID ./tests/test_prompt.txt $length 2>&1 | tee ${length}_oneflow_run_${i}.log
done
sleep 60

done

cd ..
WORK_DIR=$(pwd)
git clone https://github.com/CodeGeeX/codegeex-fastertransformer.git

cd codegeex-fastertransformer && \
python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
pip3 install transformers sentencepiece && \
sh make_all.sh && \
python3 api.py --output_len 2048 --ckpt_path /workspace/codegeex_13b_ft.pt --lib_path /workspace/codegeex-fastertransformer/build/lib/libth_codegeex.so &
FLASK_PID=$!
for length in 128 256 512 1024 2048
do
echo "Running for output length: $length"
for ((i=1; i<=10; i++)); do
echo "Iteration: $i"
cd codegeex-fastertransformer && \
python3 post.py --output_len $length 2>&1 | tee -a ${length}_faster_transformer_run_${i}.log
nvidia-smi --query-gpu=timestamp,name,driver_version,utilization.gpu,utilization.memory,memory.total,memory.free,memory.used --format=csv
echo "------------------------$length--------------------------"
done
sleep 20s
done
kill $FLASK_PID