Spaces:

ginigen
/

Mistral-Perflexity

Running on Zero

App Files Files Community

ginipick commited on Mar 31

Commit

bbc84bc

verified ·

1 Parent(s): 3779abf

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -12

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import spaces
 import json
 import subprocess
 from llama_cpp import Llama
 from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
 from llama_cpp_agent.providers import LlamaCppPythonProvider
@@ -12,12 +13,19 @@ from huggingface_hub import hf_hub_download
 llm = None
 llm_model = None
-hf_hub_download(
     repo_id="ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503",
-    filename="Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf",
-    local_dir = "./models"
 )
 css = """
 .bubble-wrap {
     padding-top: calc(var(--spacing-xl) * 3) !important;
@@ -83,7 +91,7 @@ def get_messages_formatter_type(model_name):
 def respond(
     message,
     history: list[tuple[str, str]],
-    model,
     system_message,
     max_tokens,
     temperature,
@@ -94,17 +102,30 @@ def respond(
     global llm
     global llm_model
-    chat_template = get_messages_formatter_type(model)
-    if llm is None or llm_model != model:
         llm = Llama(
-            model_path=f"models/{model}",
             flash_attn=True,
             n_gpu_layers=81,
             n_batch=1024,
             n_ctx=8192,
         )
-        llm_model = model
     provider = LlamaCppPythonProvider(llm)
@@ -161,7 +182,7 @@ PLACEHOLDER = """
         <div style="display: flex; justify-content: space-between; align-items: center;">
             <div style="display: flex; flex-flow: column; justify-content: space-between;">
                 <span style="display: inline-flex; align-items: center; border-radius: 0.375rem; background-color: rgba(229, 70, 77, 0.1); padding: 0.1rem 0.75rem; font-size: 0.75rem; font-weight: 500; color: #f88181; margin-bottom: 2.5px;">
-                    Mistral Small 24B Instruct 2501
                 </span>
                 <span style="display: inline-flex; align-items: center; border-radius: 0.375rem; background-color: rgba(79, 70, 229, 0.1); padding: 0.1rem 0.75rem; font-size: 0.75rem; font-weight: 500; color: #60a5fa; margin-top: 2.5px;">
                     Meta Llama 3 70B Instruct
@@ -190,10 +211,10 @@ demo = gr.ChatInterface(
     respond,
     additional_inputs=[
         gr.Dropdown([
-                'Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf',
-                'Meta-Llama-3-70B-Instruct-Q3_K_M.gguf'
             ],
-            value="Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf",
             label="Model"
         ),
         gr.Textbox(value="You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your solution or response to the problem.", label="System message"),

 import spaces
 import json
 import subprocess
+import os
 from llama_cpp import Llama
 from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
 from llama_cpp_agent.providers import LlamaCppPythonProvider
 llm = None
 llm_model = None
+# 모델 이름과 경로를 정의 (전역 변수로 활용)
+MISTRAL_MODEL_NAME = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"
+LLAMA_MODEL_NAME = "Meta-Llama-3-70B-Instruct-Q3_K_M.gguf"
+# 모델 다운로드
+model_path = hf_hub_download(
     repo_id="ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503",
+    filename=MISTRAL_MODEL_NAME,
+    local_dir="./models"
 )
+print(f"Downloaded model path: {model_path}")
 css = """
 .bubble-wrap {
     padding-top: calc(var(--spacing-xl) * 3) !important;
 def respond(
     message,
     history: list[tuple[str, str]],
+    model_choice,
     system_message,
     max_tokens,
     temperature,
     global llm
     global llm_model
+    chat_template = get_messages_formatter_type(model_choice)
+    # 모델 파일 경로 확인
+    if model_choice == MISTRAL_MODEL_NAME:
+        model_path = os.path.join("./models", MISTRAL_MODEL_NAME)
+    else:
+        model_path = os.path.join("./models", model_choice)
+    print(f"Selected model: {model_choice}")
+    print(f"Model path: {model_path}")
+    if not os.path.exists(model_path):
+        print(f"Warning: Model file not found at {model_path}")
+        print(f"Available files in ./models: {os.listdir('./models')}")
+    if llm is None or llm_model != model_choice:
         llm = Llama(
+            model_path=model_path,
             flash_attn=True,
             n_gpu_layers=81,
             n_batch=1024,
             n_ctx=8192,
         )
+        llm_model = model_choice
     provider = LlamaCppPythonProvider(llm)
         <div style="display: flex; justify-content: space-between; align-items: center;">
             <div style="display: flex; flex-flow: column; justify-content: space-between;">
                 <span style="display: inline-flex; align-items: center; border-radius: 0.375rem; background-color: rgba(229, 70, 77, 0.1); padding: 0.1rem 0.75rem; font-size: 0.75rem; font-weight: 500; color: #f88181; margin-bottom: 2.5px;">
+                    Private BitSix Mistral Small 3.1 24B Instruct
                 </span>
                 <span style="display: inline-flex; align-items: center; border-radius: 0.375rem; background-color: rgba(79, 70, 229, 0.1); padding: 0.1rem 0.75rem; font-size: 0.75rem; font-weight: 500; color: #60a5fa; margin-top: 2.5px;">
                     Meta Llama 3 70B Instruct
     respond,
     additional_inputs=[
         gr.Dropdown([
+                MISTRAL_MODEL_NAME,
+                LLAMA_MODEL_NAME
             ],
+            value=MISTRAL_MODEL_NAME,
             label="Model"
         ),
         gr.Textbox(value="You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your solution or response to the problem.", label="System message"),