Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import spaces
|
2 |
import json
|
3 |
import subprocess
|
|
|
4 |
from llama_cpp import Llama
|
5 |
from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
|
6 |
from llama_cpp_agent.providers import LlamaCppPythonProvider
|
@@ -12,12 +13,19 @@ from huggingface_hub import hf_hub_download
|
|
12 |
llm = None
|
13 |
llm_model = None
|
14 |
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
16 |
repo_id="ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503",
|
17 |
-
filename=
|
18 |
-
local_dir
|
19 |
)
|
20 |
|
|
|
|
|
21 |
css = """
|
22 |
.bubble-wrap {
|
23 |
padding-top: calc(var(--spacing-xl) * 3) !important;
|
@@ -83,7 +91,7 @@ def get_messages_formatter_type(model_name):
|
|
83 |
def respond(
|
84 |
message,
|
85 |
history: list[tuple[str, str]],
|
86 |
-
|
87 |
system_message,
|
88 |
max_tokens,
|
89 |
temperature,
|
@@ -94,17 +102,30 @@ def respond(
|
|
94 |
global llm
|
95 |
global llm_model
|
96 |
|
97 |
-
chat_template = get_messages_formatter_type(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
|
99 |
-
if llm is None or llm_model !=
|
100 |
llm = Llama(
|
101 |
-
model_path=
|
102 |
flash_attn=True,
|
103 |
n_gpu_layers=81,
|
104 |
n_batch=1024,
|
105 |
n_ctx=8192,
|
106 |
)
|
107 |
-
llm_model =
|
108 |
|
109 |
provider = LlamaCppPythonProvider(llm)
|
110 |
|
@@ -161,7 +182,7 @@ PLACEHOLDER = """
|
|
161 |
<div style="display: flex; justify-content: space-between; align-items: center;">
|
162 |
<div style="display: flex; flex-flow: column; justify-content: space-between;">
|
163 |
<span style="display: inline-flex; align-items: center; border-radius: 0.375rem; background-color: rgba(229, 70, 77, 0.1); padding: 0.1rem 0.75rem; font-size: 0.75rem; font-weight: 500; color: #f88181; margin-bottom: 2.5px;">
|
164 |
-
Mistral Small 24B Instruct
|
165 |
</span>
|
166 |
<span style="display: inline-flex; align-items: center; border-radius: 0.375rem; background-color: rgba(79, 70, 229, 0.1); padding: 0.1rem 0.75rem; font-size: 0.75rem; font-weight: 500; color: #60a5fa; margin-top: 2.5px;">
|
167 |
Meta Llama 3 70B Instruct
|
@@ -190,10 +211,10 @@ demo = gr.ChatInterface(
|
|
190 |
respond,
|
191 |
additional_inputs=[
|
192 |
gr.Dropdown([
|
193 |
-
|
194 |
-
|
195 |
],
|
196 |
-
value=
|
197 |
label="Model"
|
198 |
),
|
199 |
gr.Textbox(value="You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your solution or response to the problem.", label="System message"),
|
|
|
1 |
import spaces
|
2 |
import json
|
3 |
import subprocess
|
4 |
+
import os
|
5 |
from llama_cpp import Llama
|
6 |
from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
|
7 |
from llama_cpp_agent.providers import LlamaCppPythonProvider
|
|
|
13 |
llm = None
|
14 |
llm_model = None
|
15 |
|
16 |
+
# 모델 이름과 경로를 정의 (전역 변수로 활용)
|
17 |
+
MISTRAL_MODEL_NAME = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"
|
18 |
+
LLAMA_MODEL_NAME = "Meta-Llama-3-70B-Instruct-Q3_K_M.gguf"
|
19 |
+
|
20 |
+
# 모델 다운로드
|
21 |
+
model_path = hf_hub_download(
|
22 |
repo_id="ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503",
|
23 |
+
filename=MISTRAL_MODEL_NAME,
|
24 |
+
local_dir="./models"
|
25 |
)
|
26 |
|
27 |
+
print(f"Downloaded model path: {model_path}")
|
28 |
+
|
29 |
css = """
|
30 |
.bubble-wrap {
|
31 |
padding-top: calc(var(--spacing-xl) * 3) !important;
|
|
|
91 |
def respond(
|
92 |
message,
|
93 |
history: list[tuple[str, str]],
|
94 |
+
model_choice,
|
95 |
system_message,
|
96 |
max_tokens,
|
97 |
temperature,
|
|
|
102 |
global llm
|
103 |
global llm_model
|
104 |
|
105 |
+
chat_template = get_messages_formatter_type(model_choice)
|
106 |
+
|
107 |
+
# 모델 파일 경로 확인
|
108 |
+
if model_choice == MISTRAL_MODEL_NAME:
|
109 |
+
model_path = os.path.join("./models", MISTRAL_MODEL_NAME)
|
110 |
+
else:
|
111 |
+
model_path = os.path.join("./models", model_choice)
|
112 |
+
|
113 |
+
print(f"Selected model: {model_choice}")
|
114 |
+
print(f"Model path: {model_path}")
|
115 |
+
|
116 |
+
if not os.path.exists(model_path):
|
117 |
+
print(f"Warning: Model file not found at {model_path}")
|
118 |
+
print(f"Available files in ./models: {os.listdir('./models')}")
|
119 |
|
120 |
+
if llm is None or llm_model != model_choice:
|
121 |
llm = Llama(
|
122 |
+
model_path=model_path,
|
123 |
flash_attn=True,
|
124 |
n_gpu_layers=81,
|
125 |
n_batch=1024,
|
126 |
n_ctx=8192,
|
127 |
)
|
128 |
+
llm_model = model_choice
|
129 |
|
130 |
provider = LlamaCppPythonProvider(llm)
|
131 |
|
|
|
182 |
<div style="display: flex; justify-content: space-between; align-items: center;">
|
183 |
<div style="display: flex; flex-flow: column; justify-content: space-between;">
|
184 |
<span style="display: inline-flex; align-items: center; border-radius: 0.375rem; background-color: rgba(229, 70, 77, 0.1); padding: 0.1rem 0.75rem; font-size: 0.75rem; font-weight: 500; color: #f88181; margin-bottom: 2.5px;">
|
185 |
+
Private BitSix Mistral Small 3.1 24B Instruct
|
186 |
</span>
|
187 |
<span style="display: inline-flex; align-items: center; border-radius: 0.375rem; background-color: rgba(79, 70, 229, 0.1); padding: 0.1rem 0.75rem; font-size: 0.75rem; font-weight: 500; color: #60a5fa; margin-top: 2.5px;">
|
188 |
Meta Llama 3 70B Instruct
|
|
|
211 |
respond,
|
212 |
additional_inputs=[
|
213 |
gr.Dropdown([
|
214 |
+
MISTRAL_MODEL_NAME,
|
215 |
+
LLAMA_MODEL_NAME
|
216 |
],
|
217 |
+
value=MISTRAL_MODEL_NAME,
|
218 |
label="Model"
|
219 |
),
|
220 |
gr.Textbox(value="You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your solution or response to the problem.", label="System message"),
|