chenjianfei
commited on
Commit
·
81d00e1
1
Parent(s):
73512c3
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
import ollama
|
2 |
import gradio as gr
|
3 |
import numpy as np
|
4 |
import json
|
@@ -6,7 +5,7 @@ from tts_api import TTSapi, DEFAULT_TTS_MODEL_NAME
|
|
6 |
from config import *
|
7 |
from utils import *
|
8 |
from knowledge_base import LocalRAG, CosPlayer
|
9 |
-
|
10 |
|
11 |
def handle_retry(history, thinking_history, config, section_state, retry_data: gr.RetryData):
|
12 |
# 获取用户之前的消息
|
@@ -94,7 +93,7 @@ def predict(message, chat_history, thinking_history, config, section_state):
|
|
94 |
input_message = section_state["chat_history"] + [{"role": "user", "content": message}]
|
95 |
|
96 |
# 关闭Qwen3系列默认的思考模式
|
97 |
-
if config['llm_model'].startswith('
|
98 |
input_message[-1]['content'] += '/no_think'
|
99 |
# input_message[-1]['content'] += '/no_think'
|
100 |
|
@@ -114,15 +113,38 @@ def predict(message, chat_history, thinking_history, config, section_state):
|
|
114 |
gr.Warning("当前对话已经超出模型上下文长度,请开启新会话...")
|
115 |
try:
|
116 |
# 调用模型
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
|
|
|
|
123 |
|
124 |
-
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
|
127 |
# 更新对话历史
|
128 |
chat_history.append({'role': 'user', 'content': message})
|
@@ -190,11 +212,21 @@ def predict(message, chat_history, thinking_history, config, section_state):
|
|
190 |
return "", chat_history, thinking_history, (synthesiser.sr if synthesiser else 16000, audio_output)
|
191 |
|
192 |
|
193 |
-
def init_model(init_llm=
|
194 |
if init_llm:
|
195 |
print(f'正在加载LLM:{DEFAULT_MODEL_NAME}...')
|
196 |
-
|
197 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
198 |
if init_rag:
|
199 |
gr.Info("正在加载知识库,请稍候...")
|
200 |
local_rag = LocalRAG(rag_top_k=RAG_TOP_K)
|
@@ -208,14 +240,14 @@ def init_model(init_llm=False, init_rag=False, init_tts=False):
|
|
208 |
else:
|
209 |
synthesiser = None
|
210 |
TTS_LOADED = False
|
211 |
-
return local_rag, synthesiser, TTS_LOADED
|
212 |
|
213 |
|
214 |
if __name__ == "__main__":
|
215 |
import time
|
216 |
st = time.time()
|
217 |
print('********************模型加载中************************')
|
218 |
-
local_rag, synthesiser, TTS_LOADED = init_model()
|
219 |
print('********************模型加载完成************************')
|
220 |
print('耗时:',time.time() - st)
|
221 |
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
import json
|
|
|
5 |
from config import *
|
6 |
from utils import *
|
7 |
from knowledge_base import LocalRAG, CosPlayer
|
8 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
9 |
|
10 |
def handle_retry(history, thinking_history, config, section_state, retry_data: gr.RetryData):
|
11 |
# 获取用户之前的消息
|
|
|
93 |
input_message = section_state["chat_history"] + [{"role": "user", "content": message}]
|
94 |
|
95 |
# 关闭Qwen3系列默认的思考模式
|
96 |
+
if config['llm_model'].startswith('Qwen3'):
|
97 |
input_message[-1]['content'] += '/no_think'
|
98 |
# input_message[-1]['content'] += '/no_think'
|
99 |
|
|
|
113 |
gr.Warning("当前对话已经超出模型上下文长度,请开启新会话...")
|
114 |
try:
|
115 |
# 调用模型
|
116 |
+
if not LLM_LOADED:
|
117 |
+
core_llm = AutoModelForCausalLM.from_pretrained(
|
118 |
+
config['llm_model'],
|
119 |
+
torch_dtype="auto",
|
120 |
+
device_map="auto"
|
121 |
+
)
|
122 |
+
core_tokenizer = AutoTokenizer.from_pretrained(config['llm_model'])
|
123 |
+
LLM_LOADED = True
|
124 |
|
125 |
+
text = core_tokenizer.apply_chat_template(
|
126 |
+
input_message,
|
127 |
+
tokenize=False,
|
128 |
+
add_generation_prompt=True
|
129 |
+
)
|
130 |
+
model_inputs = core_tokenizer([text], return_tensors="pt").to(core_llm.device)
|
131 |
+
# conduct text completion
|
132 |
+
generated_ids = core_llm.generate(
|
133 |
+
**model_inputs,
|
134 |
+
max_new_tokens=32768
|
135 |
+
)
|
136 |
+
output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
|
137 |
+
|
138 |
+
# parsing thinking content
|
139 |
+
# try:
|
140 |
+
# # rindex finding 151668 (</think>)
|
141 |
+
# index = len(output_ids) - output_ids[::-1].index(151668)
|
142 |
+
# except ValueError:
|
143 |
+
# index = 0
|
144 |
+
index = 0
|
145 |
+
# thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
|
146 |
+
thinking = None
|
147 |
+
response_content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
|
148 |
|
149 |
# 更新对话历史
|
150 |
chat_history.append({'role': 'user', 'content': message})
|
|
|
212 |
return "", chat_history, thinking_history, (synthesiser.sr if synthesiser else 16000, audio_output)
|
213 |
|
214 |
|
215 |
+
def init_model(init_llm=True, init_rag=False, init_tts=False):
|
216 |
if init_llm:
|
217 |
print(f'正在加载LLM:{DEFAULT_MODEL_NAME}...')
|
218 |
+
core_llm = AutoModelForCausalLM.from_pretrained(
|
219 |
+
DEFAULT_MODEL_NAME,
|
220 |
+
torch_dtype="auto",
|
221 |
+
device_map="auto"
|
222 |
+
)
|
223 |
+
print('device:', core_llm.device)
|
224 |
+
core_tokenizer = AutoTokenizer.from_pretrained(DEFAULT_MODEL_NAME)
|
225 |
+
LLM_LOADED = True
|
226 |
+
else:
|
227 |
+
core_llm, core_tokenizer = None, None
|
228 |
+
LLM_LOADED = False
|
229 |
+
|
230 |
if init_rag:
|
231 |
gr.Info("正在加载知识库,请稍候...")
|
232 |
local_rag = LocalRAG(rag_top_k=RAG_TOP_K)
|
|
|
240 |
else:
|
241 |
synthesiser = None
|
242 |
TTS_LOADED = False
|
243 |
+
return local_rag, synthesiser, core_llm, core_tokenizer, TTS_LOADED, LLM_LOADED
|
244 |
|
245 |
|
246 |
if __name__ == "__main__":
|
247 |
import time
|
248 |
st = time.time()
|
249 |
print('********************模型加载中************************')
|
250 |
+
local_rag, synthesiser, core_llm, core_tokenizer, TTS_LOADED, LLM_LOADED = init_model()
|
251 |
print('********************模型加载完成************************')
|
252 |
print('耗时:',time.time() - st)
|
253 |
|
config.py
CHANGED
@@ -1,36 +1,13 @@
|
|
1 |
from pathlib import Path
|
2 |
import os
|
3 |
|
4 |
-
DEFAULT_MODEL_NAME = "
|
5 |
DEFAULT_MODE = "角色扮演"
|
6 |
DEFAULT_C_SETTING_MODE = "by system"
|
7 |
DEFAULT_COSPLAY_SETTING = 'rag/characters/周杰伦.txt'
|
8 |
AVALIABLE_MODELS = [
|
9 |
-
"
|
10 |
-
"
|
11 |
-
"deepseek-r1:32b",
|
12 |
-
"qwq",
|
13 |
-
"qwen2.5:0.5b-instruct",
|
14 |
-
"qwen2.5:0.5b",
|
15 |
-
# "qwen:1.8b",
|
16 |
-
# "qwen2.5:7b",
|
17 |
-
# "qwen2.5:14b",
|
18 |
-
"qwen2.5:32b",
|
19 |
-
"qwen2.5:32b-instruct",
|
20 |
-
"qwen7B_jaychou_f16",
|
21 |
-
"qwen0.5B_jaychou13",
|
22 |
-
"qwen2.5:14b-instruct",
|
23 |
-
"qwen2.5:7b-instruct",
|
24 |
-
"qwen2.5:3b-instruct",
|
25 |
-
"qwen14B_jaychou_q8_newdata_add_template",
|
26 |
-
"qwen2.5_32B_jaychou",
|
27 |
-
"qwen2.5_0.5B_jaychou_lora",
|
28 |
-
# "qwen2.5_32B_jaychou_tq1"
|
29 |
-
"qwen3:4b",
|
30 |
-
"qwen3:8b",
|
31 |
-
"qwen3:14b",
|
32 |
-
"qwen3:32b",
|
33 |
-
"qwen3:30b-a3b"
|
34 |
]
|
35 |
BASE_MODEL_TABLE = {"qwen7B_jaychou_f16": "qwen2.5:7b-instruct", "qwen0.5B_jaychou13": "qwen2.5:0.5b-instruct",
|
36 |
"qwen14B_jaychou_q8_newdata_add_template": "qwen2.5:14b-instruct",
|
|
|
1 |
from pathlib import Path
|
2 |
import os
|
3 |
|
4 |
+
DEFAULT_MODEL_NAME = "Qwen/Qwen3-30B-A3B"
|
5 |
DEFAULT_MODE = "角色扮演"
|
6 |
DEFAULT_C_SETTING_MODE = "by system"
|
7 |
DEFAULT_COSPLAY_SETTING = 'rag/characters/周杰伦.txt'
|
8 |
AVALIABLE_MODELS = [
|
9 |
+
"Qwen/Qwen3-30B-A3B",
|
10 |
+
"Qwen/Qwen2.5-32B-Instruct"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
]
|
12 |
BASE_MODEL_TABLE = {"qwen7B_jaychou_f16": "qwen2.5:7b-instruct", "qwen0.5B_jaychou13": "qwen2.5:0.5b-instruct",
|
13 |
"qwen14B_jaychou_q8_newdata_add_template": "qwen2.5:14b-instruct",
|