Skip to content

Commit

Permalink
优化服务停止逻辑
Browse files Browse the repository at this point in the history
  • Loading branch information
shell-nlp committed Dec 27, 2024
1 parent 5fd7fd5 commit 29090cc
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 11 deletions.
17 changes: 7 additions & 10 deletions gpt_server/serving/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import yaml
import os
import sys
import signal
import ray

os.environ["OPENBLAS_NUM_THREADS"] = (
Expand All @@ -20,27 +19,25 @@
from gpt_server.utils import (
start_api_server,
start_model_worker,
stop_server,
delete_log,
)

# 删除日志
delete_log()


def signal_handler(signum, frame):
stop_server()
raise KeyboardInterrupt


signal.signal(signal.SIGINT, signal_handler)

config_path = os.path.join(root_dir, "gpt_server/script/config.yaml")
with open(config_path, "r") as f:
config = yaml.safe_load(f)


# print(config)
if __name__ == "__main__":
def main():
# ----------------------------启动 Controller 和 Openai API 服务----------------------------------------
start_api_server(config=config)
# ----------------------------启动 Model Worker 服务----------------------------------------------------
start_model_worker(config=config)


if __name__ == "__main__":
main()
44 changes: 43 additions & 1 deletion gpt_server/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,55 @@
import subprocess
from loguru import logger
import torch
import psutil
from rich import print
import signal

logger.add("logs/gpt_server.log", rotation="100 MB", level="INFO")


def kill_child_processes(parent_pid, including_parent=False):
"杀死子进程/僵尸进程"
try:
parent = psutil.Process(parent_pid)
children = parent.children(recursive=True)
for child in children:
try:
print(f"终止子进程 {child.pid}...")
os.kill(child.pid, signal.SIGTERM) # 优雅终止
child.wait(5) # 等待子进程最多 5 秒
except psutil.NoSuchProcess:
pass
except psutil.TimeoutExpired():
print(f"终止子进程 {child.pid} 超时!强制终止...")
os.kill(child.pid, signal.SIGKILL) # 强制终止
if including_parent:
print(f"终止父进程 {parent_pid}...")
os.kill(parent_pid, signal.SIGTERM)
except psutil.NoSuchProcess:
print(f"父进程 {parent_pid} 不存在!")


# 记录父进程 PID
parent_pid = os.getpid()


def signal_handler(signum, frame):
print("\nCtrl-C detected! Cleaning up...")
kill_child_processes(parent_pid, including_parent=False)
exit(0) # 正常退出程序


signal.signal(signal.SIGINT, signal_handler)


def run_cmd(cmd: str, *args, **kwargs):
logger.info(f"执行命令如下:\n{cmd}\n")
subprocess.run(cmd, shell=True)
# subprocess.run(cmd, shell=True)
process = subprocess.Popen(cmd, shell=True)
# 等待命令执行完成
process.wait()
return process.pid


def start_controller(controller_host, controller_port, dispatch_method):
Expand Down

0 comments on commit 29090cc

Please sign in to comment.