ginipick commited on
Commit
bbc84bc
·
verified ·
1 Parent(s): 3779abf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -12
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import spaces
2
  import json
3
  import subprocess
 
4
  from llama_cpp import Llama
5
  from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
6
  from llama_cpp_agent.providers import LlamaCppPythonProvider
@@ -12,12 +13,19 @@ from huggingface_hub import hf_hub_download
12
  llm = None
13
  llm_model = None
14
 
15
- hf_hub_download(
 
 
 
 
 
16
  repo_id="ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503",
17
- filename="Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf",
18
- local_dir = "./models"
19
  )
20
 
 
 
21
  css = """
22
  .bubble-wrap {
23
  padding-top: calc(var(--spacing-xl) * 3) !important;
@@ -83,7 +91,7 @@ def get_messages_formatter_type(model_name):
83
  def respond(
84
  message,
85
  history: list[tuple[str, str]],
86
- model,
87
  system_message,
88
  max_tokens,
89
  temperature,
@@ -94,17 +102,30 @@ def respond(
94
  global llm
95
  global llm_model
96
 
97
- chat_template = get_messages_formatter_type(model)
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
- if llm is None or llm_model != model:
100
  llm = Llama(
101
- model_path=f"models/{model}",
102
  flash_attn=True,
103
  n_gpu_layers=81,
104
  n_batch=1024,
105
  n_ctx=8192,
106
  )
107
- llm_model = model
108
 
109
  provider = LlamaCppPythonProvider(llm)
110
 
@@ -161,7 +182,7 @@ PLACEHOLDER = """
161
  <div style="display: flex; justify-content: space-between; align-items: center;">
162
  <div style="display: flex; flex-flow: column; justify-content: space-between;">
163
  <span style="display: inline-flex; align-items: center; border-radius: 0.375rem; background-color: rgba(229, 70, 77, 0.1); padding: 0.1rem 0.75rem; font-size: 0.75rem; font-weight: 500; color: #f88181; margin-bottom: 2.5px;">
164
- Mistral Small 24B Instruct 2501
165
  </span>
166
  <span style="display: inline-flex; align-items: center; border-radius: 0.375rem; background-color: rgba(79, 70, 229, 0.1); padding: 0.1rem 0.75rem; font-size: 0.75rem; font-weight: 500; color: #60a5fa; margin-top: 2.5px;">
167
  Meta Llama 3 70B Instruct
@@ -190,10 +211,10 @@ demo = gr.ChatInterface(
190
  respond,
191
  additional_inputs=[
192
  gr.Dropdown([
193
- 'Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf',
194
- 'Meta-Llama-3-70B-Instruct-Q3_K_M.gguf'
195
  ],
196
- value="Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf",
197
  label="Model"
198
  ),
199
  gr.Textbox(value="You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your solution or response to the problem.", label="System message"),
 
1
  import spaces
2
  import json
3
  import subprocess
4
+ import os
5
  from llama_cpp import Llama
6
  from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
7
  from llama_cpp_agent.providers import LlamaCppPythonProvider
 
13
  llm = None
14
  llm_model = None
15
 
16
+ # 모델 이름과 경로를 정의 (전역 변수로 활용)
17
+ MISTRAL_MODEL_NAME = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"
18
+ LLAMA_MODEL_NAME = "Meta-Llama-3-70B-Instruct-Q3_K_M.gguf"
19
+
20
+ # 모델 다운로드
21
+ model_path = hf_hub_download(
22
  repo_id="ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503",
23
+ filename=MISTRAL_MODEL_NAME,
24
+ local_dir="./models"
25
  )
26
 
27
+ print(f"Downloaded model path: {model_path}")
28
+
29
  css = """
30
  .bubble-wrap {
31
  padding-top: calc(var(--spacing-xl) * 3) !important;
 
91
  def respond(
92
  message,
93
  history: list[tuple[str, str]],
94
+ model_choice,
95
  system_message,
96
  max_tokens,
97
  temperature,
 
102
  global llm
103
  global llm_model
104
 
105
+ chat_template = get_messages_formatter_type(model_choice)
106
+
107
+ # 모델 파일 경로 확인
108
+ if model_choice == MISTRAL_MODEL_NAME:
109
+ model_path = os.path.join("./models", MISTRAL_MODEL_NAME)
110
+ else:
111
+ model_path = os.path.join("./models", model_choice)
112
+
113
+ print(f"Selected model: {model_choice}")
114
+ print(f"Model path: {model_path}")
115
+
116
+ if not os.path.exists(model_path):
117
+ print(f"Warning: Model file not found at {model_path}")
118
+ print(f"Available files in ./models: {os.listdir('./models')}")
119
 
120
+ if llm is None or llm_model != model_choice:
121
  llm = Llama(
122
+ model_path=model_path,
123
  flash_attn=True,
124
  n_gpu_layers=81,
125
  n_batch=1024,
126
  n_ctx=8192,
127
  )
128
+ llm_model = model_choice
129
 
130
  provider = LlamaCppPythonProvider(llm)
131
 
 
182
  <div style="display: flex; justify-content: space-between; align-items: center;">
183
  <div style="display: flex; flex-flow: column; justify-content: space-between;">
184
  <span style="display: inline-flex; align-items: center; border-radius: 0.375rem; background-color: rgba(229, 70, 77, 0.1); padding: 0.1rem 0.75rem; font-size: 0.75rem; font-weight: 500; color: #f88181; margin-bottom: 2.5px;">
185
+ Private BitSix Mistral Small 3.1 24B Instruct
186
  </span>
187
  <span style="display: inline-flex; align-items: center; border-radius: 0.375rem; background-color: rgba(79, 70, 229, 0.1); padding: 0.1rem 0.75rem; font-size: 0.75rem; font-weight: 500; color: #60a5fa; margin-top: 2.5px;">
188
  Meta Llama 3 70B Instruct
 
211
  respond,
212
  additional_inputs=[
213
  gr.Dropdown([
214
+ MISTRAL_MODEL_NAME,
215
+ LLAMA_MODEL_NAME
216
  ],
217
+ value=MISTRAL_MODEL_NAME,
218
  label="Model"
219
  ),
220
  gr.Textbox(value="You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your solution or response to the problem.", label="System message"),