From bdde97a28c15eef27cfc7cf779c709845e8ddf66 Mon Sep 17 00:00:00 2001 From: Kedreamix Date: Wed, 24 Jan 2024 00:37:34 +0800 Subject: [PATCH] =?UTF-8?q?=E5=8A=A0=E5=85=A5LLM=E6=A8=A1=E5=9E=8B?= =?UTF-8?q?=E5=A6=82=E9=80=9A=E4=B9=89=E5=8D=83=E9=97=AEQwen=E8=BF=9B?= =?UTF-8?q?=E8=A1=8C=E6=96=87=E5=AD=97=E5=AF=B9=E8=AF=9D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 39 ++++++++++++++++++++++++--- app.py | 23 +++++++++++----- src/Gemini.py | 19 ++++++++++++++ src/LLM.py | 24 +++++++++++++++++ src/Linly.py | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/Qwen.py | 51 +++++++++++++++++++++++++++++++++++ 6 files changed, 219 insertions(+), 10 deletions(-) create mode 100644 src/Gemini.py create mode 100644 src/LLM.py create mode 100644 src/Linly.py create mode 100644 src/Qwen.py diff --git a/README.md b/README.md index e84773c..b5ed318 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,39 @@ python app.py 用浏览器打开http://serverip/echo.html,在文本框输入任意文字,提交。数字人播报该段文字 +### 使用LLM模型进行数字人对话 + +目前借鉴数字人对话系统[LinlyTalker](https://github.com/Kedreamix/Linly-Talker)的方式,LLM模型支持Linly-AI,Qwen和GeminiPro + +建议使用来自阿里云的通义千问Qwen,查看 [https://github.com/QwenLM/Qwen](https://github.com/QwenLM/Qwen) + +下载 Qwen 模型: [https://huggingface.co/Qwen/Qwen-1_8B-Chat](https://huggingface.co/Qwen/Qwen-1_8B-Chat) + +可以使用`git`下载 + +```bash +git lfs install +git clone https://huggingface.co/Qwen/Qwen-1_8B-Chat +``` + +或者使用`huggingface`的下载工具`huggingface-cli` + +```bash +pip install -U huggingface_hub + +# 设置镜像加速 +# Linux +export HF_ENDPOINT="https://hf-mirror.com" +# windows powershell +$env:HF_ENDPOINT="https://hf-mirror.com" + +huggingface-cli download --resume-download Qwen/Qwen-1_8B-Chat --local-dir Qwen/Qwen-1_8B-Chat +``` + +除此之外,还可以考虑使用魔搭社区链接进行下载 [https://modelscope.cn/models/qwen/Qwen-1_8B-Chat/files(https://modelscope.cn/models/qwen/Qwen-1_8B-Chat/files) + +### + ### docker运行 先运行srs和nginx 启动数字人 @@ -81,9 +114,9 @@ docker run --gpus all -it --network=host --rm registry.cn-hangzhou.aliyuncs.com ``` ## TODO -- 添加chatgpt实现数字人对话 -- 声音克隆 -- 数字人静音时用一段视频代替 +- [x] 添加chatgpt实现数字人对话 +- [ ] 声音克隆 +- [ ] 数字人静音时用一段视频代替 如果本项目对你有帮助,帮忙点个star。也欢迎感兴趣的朋友一起来完善该项目。 Email: lipku@foxmail.com diff --git a/app.py b/app.py index 7898eb9..3448aac 100644 --- a/app.py +++ b/app.py @@ -27,8 +27,18 @@ sockets = Sockets(app) global nerfreal +from src.LLM import * +# llm = Gemini(model_path='gemini-pro', api_key=None, proxy_url=None) +# llm = Linly(mode = 'offline', model_path="Linly-AI/Chinese-LLaMA-2-7B-hf") +# llm = Linly(mode = 'api', model_path="Linly-AI/Chinese-LLaMA-2-7B-hf") +llm = Qwen(mode = 'offline', model_path="Qwen/Qwen-1_8B-Chat") + + +def llm_response(question, history = None): + return llm.generate(question) async def main(voicename: str, text: str, render): + # print("text:", text, "voicename:", voicename) communicate = edge_tts.Communicate(text, voicename) #with open(OUTPUT_FILE, "wb") as file: @@ -42,12 +52,12 @@ async def main(voicename: str, text: str, render): def txt_to_audio(text_): audio_list = [] - #audio_path = 'data/audio/aud_0.wav' + # audio_path = 'data/audio/aud_0.wav' voicename = "zh-CN-YunxiaNeural" text = text_ t = time.time() asyncio.get_event_loop().run_until_complete(main(voicename,text,nerfreal)) - print('-------tts time: ',time.time()-t) + print(f'-------tts time:{time.time()-t:.4f}s') @sockets.route('/humanecho') def echo_socket(ws): @@ -65,8 +75,9 @@ def echo_socket(ws): if len(message)==0: return '输入信息为空' - else: - txt_to_audio(message) + else: + answer = llm_response(message) + txt_to_audio(answer) def render(): nerfreal.render() @@ -225,6 +236,4 @@ def render(): ############################################################################# print('start websocket server') server = pywsgi.WSGIServer(('0.0.0.0', 8000), app, handler_class=WebSocketHandler) - server.serve_forever() - - \ No newline at end of file + server.serve_forever() \ No newline at end of file diff --git a/src/Gemini.py b/src/Gemini.py new file mode 100644 index 0000000..9c3e6b7 --- /dev/null +++ b/src/Gemini.py @@ -0,0 +1,19 @@ +import os +import google.generativeai as genai + + +def configure_api(api_key, proxy_url=None): + os.environ['https_proxy'] = proxy_url if proxy_url else None + os.environ['http_proxy'] = proxy_url if proxy_url else None + genai.configure(api_key=api_key) + + +class Gemini: + def __init__(self, model_path='gemini-pro', api_key=None, proxy_url=None): + configure_api(api_key, proxy_url) + self.model = genai.GenerativeModel(model_path) + + def generate(self, question): + response = self.model.generate_content(question) + return response + \ No newline at end of file diff --git a/src/LLM.py b/src/LLM.py new file mode 100644 index 0000000..8cb6227 --- /dev/null +++ b/src/LLM.py @@ -0,0 +1,24 @@ +from src.Linly import Linly +from src.Qwen import Qwen +from src.Gemini import Gemini + +def test_Linly(question = "如何应对压力?", mode='offline', model_path="Linly-AI/Chinese-LLaMA-2-7B-hf"): + llm = Linly(mode, model_path) + answer = llm.generate(question) + print(answer) + +def test_Qwen(question = "如何应对压力?", mode='offline', model_path="Qwen/Qwen-1_8B-Chat"): + llm = Qwen(mode, model_path) + answer = llm.generate(question) + print(answer) + +def test_Gemini(question = "如何应对压力?", model_path='gemini-pro', api_key=None, proxy_url=None): + llm = Gemini(model_path, api_key, proxy_url) + answer = llm.generate(question) + print(answer) + + +if __name__ == '__main__': + test_Linly() + # test_Qwen() + # test_Gemini() diff --git a/src/Linly.py b/src/Linly.py new file mode 100644 index 0000000..67e2a22 --- /dev/null +++ b/src/Linly.py @@ -0,0 +1,73 @@ +import os +import torch +import requests +import json +from transformers import AutoModelForCausalLM, AutoTokenizer +os.environ['CUDA_LAUNCH_BLOCKING'] = '1' + +class Linly: + def __init__(self, mode='api', model_path="Linly-AI/Chinese-LLaMA-2-7B-hf") -> None: + # mode = api need + # self.url = f"http://ip:{api_port}" # local server: http://ip:port + self.url = f"http://172.31.58.8:7871" # local server: http://ip:port + self.headers = { + "Content-Type": "application/json" + } + self.data = { + "question": "北京有什么好玩的地方?" + } + self.prompt = '''请用少于25个字回答以下问题''' + self.mode = mode + if mode != 'api': + self.model, self.tokenizer = self.init_model(model_path) + + def init_model(self, path = "Linly-AI/Chinese-LLaMA-2-7B-hf"): + model = AutoModelForCausalLM.from_pretrained(path, device_map="cuda:0", + torch_dtype=torch.bfloat16, trust_remote_code=True) + tokenizer = AutoTokenizer.from_pretrained(path, use_fast=False, trust_remote_code=True) + return model, tokenizer + + def generate(self, question): + if self.mode != 'api': + self.data["question"] = f"{self.prompt} ### Instruction:{question} ### Response:" + inputs = self.tokenizer(self.data["question"], return_tensors="pt").to("cuda:0") + try: + generate_ids = self.model.generate(inputs.input_ids, max_new_tokens=2048, do_sample=True, top_k=20, top_p=0.84, + temperature=1, repetition_penalty=1.15, eos_token_id=2, bos_token_id=1, + pad_token_id=0) + response = self.tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] + print('log:', response) + response = response.split("### Response:")[-1] + return response + except: + return "对不起,你的请求出错了,请再次尝试。\nSorry, your request has encountered an error. Please try again.\n" + else: + return self.predict(question) + + def predict(self, question): + # FastAPI + self.data["question"] = f"{self.prompt} ### Instruction:{question} ### Response:" + headers = {'Content-Type': 'application/json'} + data = {"prompt": question} + response = requests.post(url=self.url, headers=headers, data=json.dumps(data)) + return response.json()['response'] + + # response = requests.post(self.url, headers=self.headers, json=self.data) + # self.json = response.json() + # answer, tag = self.json + # if tag == 'success': + # return answer[0] + # else: + # return "对不起,你的请求出错了,请再次尝试。\nSorry, your request has encountered an error. Please try again.\n" + +def test(): + #llm = Linly(mode='api') + #answer = llm.predict("如何应对压力?") + #print(answer) + + llm = Linly(mode='api',model_path='Linly-AI/Chinese-LLaMA-2-7B-hf') + answer = llm.generate("如何应对压力?") + print(answer) + +if __name__ == '__main__': + test() diff --git a/src/Qwen.py b/src/Qwen.py new file mode 100644 index 0000000..50e5b69 --- /dev/null +++ b/src/Qwen.py @@ -0,0 +1,51 @@ +import os +import torch +import requests +from transformers import AutoModelForCausalLM, AutoTokenizer +os.environ['CUDA_LAUNCH_BLOCKING'] = '1' + +class Qwen: + def __init__(self, mode='api', model_path="Qwen/Qwen-1_8B-Chat") -> None: + '''暂时不写api版本,与Linly-api相类似,感兴趣可以实现一下''' + self.url = "http://ip:port" # local server: http://ip:port + self.headers = { + "Content-Type": "application/json" + } + self.data = { + "question": "北京有什么好玩的地方?" + } + self.prompt = '''请用少于25个字回答以下问题''' + self.mode = mode + + self.model, self.tokenizer = self.init_model(model_path) + + def init_model(self, path = "Qwen/Qwen-1_8B-Chat"): + model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-1_8B-Chat", + device_map="auto", + trust_remote_code=True).eval() + tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True) + + return model, tokenizer + + def generate(self, question): + if self.mode != 'api': + self.data["question"] = f"{self.prompt} ### Instruction:{question} ### Response:" + try: + response, history = self.model.chat(self.tokenizer, self.data["question"], history=None) + print(history) + return response + except: + return "对不起,你的请求出错了,请再次尝试。\nSorry, your request has encountered an error. Please try again.\n" + else: + return self.predict(question) + def predict(self, question): + '''暂时不写api版本,与Linly-api相类似,感兴趣可以实现一下''' + pass + +def test(): + llm = Qwen(mode='offline',model_path="Qwen/Qwen-1_8B-Chat") + answer = llm.generate("如何应对压力?") + print(answer) + +if __name__ == '__main__': + test()