Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -13,9 +13,8 @@ from huggingface_hub import hf_hub_download
|
|
13 |
llm = None
|
14 |
llm_model = None
|
15 |
|
16 |
-
# ๋ชจ๋ธ ์ด๋ฆ๊ณผ ๊ฒฝ๋ก๋ฅผ ์ ์
|
17 |
MISTRAL_MODEL_NAME = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"
|
18 |
-
LLAMA_MODEL_NAME = "Meta-Llama-3-70B-Instruct-Q3_K_M.gguf"
|
19 |
|
20 |
# ๋ชจ๋ธ ๋ค์ด๋ก๋
|
21 |
model_path = hf_hub_download(
|
@@ -78,11 +77,7 @@ css = """
|
|
78 |
"""
|
79 |
|
80 |
def get_messages_formatter_type(model_name):
|
81 |
-
if "
|
82 |
-
return MessagesFormatterType.LLAMA_3
|
83 |
-
elif "unsloth" in model_name:
|
84 |
-
return MessagesFormatterType.CHATML
|
85 |
-
elif "Mistral" in model_name or "BitSix" in model_name:
|
86 |
return MessagesFormatterType.CHATML # Mistral ๊ณ์ด ๋ชจ๋ธ์ ChatML ํ์ ์ฌ์ฉ
|
87 |
else:
|
88 |
raise ValueError(f"Unsupported model: {model_name}")
|
@@ -91,7 +86,6 @@ def get_messages_formatter_type(model_name):
|
|
91 |
def respond(
|
92 |
message,
|
93 |
history: list[tuple[str, str]],
|
94 |
-
model_choice,
|
95 |
system_message,
|
96 |
max_tokens,
|
97 |
temperature,
|
@@ -102,22 +96,18 @@ def respond(
|
|
102 |
global llm
|
103 |
global llm_model
|
104 |
|
105 |
-
chat_template = get_messages_formatter_type(
|
106 |
|
107 |
# ๋ชจ๋ธ ํ์ผ ๊ฒฝ๋ก ํ์ธ
|
108 |
-
|
109 |
-
model_path = os.path.join("./models", MISTRAL_MODEL_NAME)
|
110 |
-
else:
|
111 |
-
model_path = os.path.join("./models", model_choice)
|
112 |
|
113 |
-
print(f"Selected model: {model_choice}")
|
114 |
print(f"Model path: {model_path}")
|
115 |
|
116 |
if not os.path.exists(model_path):
|
117 |
print(f"Warning: Model file not found at {model_path}")
|
118 |
print(f"Available files in ./models: {os.listdir('./models')}")
|
119 |
|
120 |
-
if llm is None or llm_model !=
|
121 |
llm = Llama(
|
122 |
model_path=model_path,
|
123 |
flash_attn=True,
|
@@ -125,7 +115,7 @@ def respond(
|
|
125 |
n_batch=1024,
|
126 |
n_ctx=8192,
|
127 |
)
|
128 |
-
llm_model =
|
129 |
|
130 |
provider = LlamaCppPythonProvider(llm)
|
131 |
|
@@ -232,13 +222,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet"
|
|
232 |
)
|
233 |
submit_btn = gr.Button("์ ์ก", variant="primary")
|
234 |
|
235 |
-
with gr.Column(scale=1):
|
236 |
-
model_choice = gr.Dropdown(
|
237 |
-
[MISTRAL_MODEL_NAME, LLAMA_MODEL_NAME],
|
238 |
-
value=MISTRAL_MODEL_NAME,
|
239 |
-
label="๋ชจ๋ธ"
|
240 |
-
)
|
241 |
-
|
242 |
system_message = gr.Textbox(
|
243 |
value="You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem.",
|
244 |
label="์์คํ
๋ฉ์์ง",
|
@@ -253,12 +237,11 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet"
|
|
253 |
|
254 |
with gr.Row():
|
255 |
clear_btn = gr.Button("๋ํ ์ด๊ธฐํ")
|
256 |
-
|
257 |
-
|
258 |
# Event handlers
|
259 |
submit_btn.click(
|
260 |
fn=respond,
|
261 |
-
inputs=[msg, chatbot,
|
262 |
outputs=[chatbot],
|
263 |
queue=True
|
264 |
).then(
|
@@ -269,7 +252,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet"
|
|
269 |
|
270 |
msg.submit(
|
271 |
fn=respond,
|
272 |
-
inputs=[msg, chatbot,
|
273 |
outputs=[chatbot],
|
274 |
queue=True
|
275 |
).then(
|
@@ -279,18 +262,6 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet"
|
|
279 |
)
|
280 |
|
281 |
clear_btn.click(lambda: None, None, chatbot, queue=False)
|
282 |
-
|
283 |
-
retry_btn.click(
|
284 |
-
fn=lambda x: x[:-1],
|
285 |
-
inputs=[chatbot],
|
286 |
-
outputs=[chatbot],
|
287 |
-
queue=False
|
288 |
-
).then(
|
289 |
-
fn=respond,
|
290 |
-
inputs=[chatbot[-1][0] if len(chatbot) > 0 else "", chatbot, model_choice, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty],
|
291 |
-
outputs=[chatbot],
|
292 |
-
queue=True
|
293 |
-
)
|
294 |
|
295 |
if __name__ == "__main__":
|
296 |
demo.launch()
|
|
|
13 |
llm = None
|
14 |
llm_model = None
|
15 |
|
16 |
+
# ๋ชจ๋ธ ์ด๋ฆ๊ณผ ๊ฒฝ๋ก๋ฅผ ์ ์
|
17 |
MISTRAL_MODEL_NAME = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"
|
|
|
18 |
|
19 |
# ๋ชจ๋ธ ๋ค์ด๋ก๋
|
20 |
model_path = hf_hub_download(
|
|
|
77 |
"""
|
78 |
|
79 |
def get_messages_formatter_type(model_name):
|
80 |
+
if "Mistral" in model_name or "BitSix" in model_name:
|
|
|
|
|
|
|
|
|
81 |
return MessagesFormatterType.CHATML # Mistral ๊ณ์ด ๋ชจ๋ธ์ ChatML ํ์ ์ฌ์ฉ
|
82 |
else:
|
83 |
raise ValueError(f"Unsupported model: {model_name}")
|
|
|
86 |
def respond(
|
87 |
message,
|
88 |
history: list[tuple[str, str]],
|
|
|
89 |
system_message,
|
90 |
max_tokens,
|
91 |
temperature,
|
|
|
96 |
global llm
|
97 |
global llm_model
|
98 |
|
99 |
+
chat_template = get_messages_formatter_type(MISTRAL_MODEL_NAME)
|
100 |
|
101 |
# ๋ชจ๋ธ ํ์ผ ๊ฒฝ๋ก ํ์ธ
|
102 |
+
model_path = os.path.join("./models", MISTRAL_MODEL_NAME)
|
|
|
|
|
|
|
103 |
|
|
|
104 |
print(f"Model path: {model_path}")
|
105 |
|
106 |
if not os.path.exists(model_path):
|
107 |
print(f"Warning: Model file not found at {model_path}")
|
108 |
print(f"Available files in ./models: {os.listdir('./models')}")
|
109 |
|
110 |
+
if llm is None or llm_model != MISTRAL_MODEL_NAME:
|
111 |
llm = Llama(
|
112 |
model_path=model_path,
|
113 |
flash_attn=True,
|
|
|
115 |
n_batch=1024,
|
116 |
n_ctx=8192,
|
117 |
)
|
118 |
+
llm_model = MISTRAL_MODEL_NAME
|
119 |
|
120 |
provider = LlamaCppPythonProvider(llm)
|
121 |
|
|
|
222 |
)
|
223 |
submit_btn = gr.Button("์ ์ก", variant="primary")
|
224 |
|
225 |
+
with gr.Column(scale=1):
|
|
|
|
|
|
|
|
|
|
|
|
|
226 |
system_message = gr.Textbox(
|
227 |
value="You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem.",
|
228 |
label="์์คํ
๋ฉ์์ง",
|
|
|
237 |
|
238 |
with gr.Row():
|
239 |
clear_btn = gr.Button("๋ํ ์ด๊ธฐํ")
|
240 |
+
|
|
|
241 |
# Event handlers
|
242 |
submit_btn.click(
|
243 |
fn=respond,
|
244 |
+
inputs=[msg, chatbot, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty],
|
245 |
outputs=[chatbot],
|
246 |
queue=True
|
247 |
).then(
|
|
|
252 |
|
253 |
msg.submit(
|
254 |
fn=respond,
|
255 |
+
inputs=[msg, chatbot, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty],
|
256 |
outputs=[chatbot],
|
257 |
queue=True
|
258 |
).then(
|
|
|
262 |
)
|
263 |
|
264 |
clear_btn.click(lambda: None, None, chatbot, queue=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
265 |
|
266 |
if __name__ == "__main__":
|
267 |
demo.launch()
|