Spaces:

ginigen
/

Mistral-Perflexity

Running on Zero

App Files Files Community

ginipick commited on Mar 31

Commit

632d6e5

verified ·

1 Parent(s): 7a7a507

Create app-backup.py

Browse files

Files changed (1) hide show

app-backup.py +230 -0

app-backup.py ADDED Viewed

	@@ -0,0 +1,230 @@

+import spaces
+import json
+import subprocess
+import os
+from llama_cpp import Llama
+from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
+from llama_cpp_agent.providers import LlamaCppPythonProvider
+from llama_cpp_agent.chat_history import BasicChatHistory
+from llama_cpp_agent.chat_history.messages import Roles
+import gradio as gr
+from huggingface_hub import hf_hub_download
+llm = None
+llm_model = None
+# 모델 이름과 경로를 정의
+MISTRAL_MODEL_NAME = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"
+# 모델 다운로드
+model_path = hf_hub_download(
+    repo_id="ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503",
+    filename=MISTRAL_MODEL_NAME,
+    local_dir="./models"
+)
+print(f"Downloaded model path: {model_path}")
+css = """
+.bubble-wrap {
+    padding-top: calc(var(--spacing-xl) * 3) !important;
+}
+.message-row {
+    justify-content: space-evenly !important;
+    width: 100% !important;
+    max-width: 100% !important;
+    margin: calc(var(--spacing-xl)) 0 !important;
+    padding: 0 calc(var(--spacing-xl) * 3) !important;
+}
+.flex-wrap.user {
+    border-bottom-right-radius: var(--radius-lg) !important;
+}
+.flex-wrap.bot {
+    border-bottom-left-radius: var(--radius-lg) !important;
+}
+.message.user{
+    padding: 10px;
+}
+.message.bot{
+    text-align: right;
+    width: 100%;
+    padding: 10px;
+    border-radius: 10px;
+}
+.message-bubble-border {
+    border-radius: 6px !important;
+}
+.message-buttons {
+    justify-content: flex-end !important;
+}
+.message-buttons-left {
+    align-self: end !important;
+}
+.message-buttons-bot, .message-buttons-user {
+    right: 10px !important;
+    left: auto !important;
+    bottom: 2px !important;
+}
+.dark.message-bubble-border {
+    border-color: #343140 !important;
+}
+.dark.user {
+    background: #1e1c26 !important;
+}
+.dark.assistant.dark, .dark.pending.dark {
+    background: #16141c !important;
+}
+"""
+def get_messages_formatter_type(model_name):
+    if "Mistral" in model_name or "BitSix" in model_name:
+        return MessagesFormatterType.CHATML  # Mistral 계열 모델은 ChatML 형식 사용
+    else:
+        raise ValueError(f"Unsupported model: {model_name}")
+@spaces.GPU(duration=120)
+def respond(
+    message,
+    history: list[tuple[str, str]],
+    system_message,
+    max_tokens,
+    temperature,
+    top_p,
+    top_k,
+    repeat_penalty,
+):
+    global llm
+    global llm_model
+    chat_template = get_messages_formatter_type(MISTRAL_MODEL_NAME)
+    # 모델 파일 경로 확인
+    model_path = os.path.join("./models", MISTRAL_MODEL_NAME)
+    print(f"Model path: {model_path}")
+    if not os.path.exists(model_path):
+        print(f"Warning: Model file not found at {model_path}")
+        print(f"Available files in ./models: {os.listdir('./models')}")
+    if llm is None or llm_model != MISTRAL_MODEL_NAME:
+        llm = Llama(
+            model_path=model_path,
+            flash_attn=True,
+            n_gpu_layers=81,
+            n_batch=1024,
+            n_ctx=8192,
+        )
+        llm_model = MISTRAL_MODEL_NAME
+    provider = LlamaCppPythonProvider(llm)
+    agent = LlamaCppAgent(
+        provider,
+        system_prompt=f"{system_message}",
+        predefined_messages_formatter_type=chat_template,
+        debug_output=True
+    )
+    settings = provider.get_provider_default_settings()
+    settings.temperature = temperature
+    settings.top_k = top_k
+    settings.top_p = top_p
+    settings.max_tokens = max_tokens
+    settings.repeat_penalty = repeat_penalty
+    settings.stream = True
+    messages = BasicChatHistory()
+    for msn in history:
+        user = {
+            'role': Roles.user,
+            'content': msn[0]
+        }
+        assistant = {
+            'role': Roles.assistant,
+            'content': msn[1]
+        }
+        messages.add_message(user)
+        messages.add_message(assistant)
+    stream = agent.get_chat_response(
+        message,
+        llm_sampling_settings=settings,
+        chat_history=messages,
+        returns_streaming_generator=True,
+        print_output=False
+    )
+    outputs = ""
+    for output in stream:
+        outputs += output
+        yield outputs
+PLACEHOLDER = """
+<div class="message-bubble-border" style="display:flex; max-width: 600px; border-radius: 6px; border-width: 1px; border-color: #e5e7eb; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); backdrop-filter: blur(10px);">
+    <figure style="margin: 0;max-width: 200px;min-height: 300px;">
+        <img src="https://huggingface.co/spaces/ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503/resolve/main/llama.jpg" alt="Logo" style="width: 100%; height: 100%; border-radius: 8px;">
+    </figure>
+    <div style="padding: .5rem 1.5rem;display: flex;flex-direction: column;justify-content: space-evenly;">
+        <h2 style="text-align: left; font-size: 1.5rem; font-weight: 700; margin-bottom: 0.5rem;">Ginigen Private-BitSix</h2>
+        <p style="text-align: left; font-size: 16px; line-height: 1.5; margin-bottom: 15px;">The Ginigen Private-BitSix framework simplifies interactions with Large Language Models (LLMs), providing an interface for chatting, executing function calls, generating structured output, performing retrieval augmented generation, and processing text using agentic chains with tools.</p>
+        <div style="display: flex; justify-content: space-between; align-items: center;">
+            <div style="display: flex; flex-flow: column; justify-content: space-between;">
+                <span style="display: inline-flex; align-items: center; border-radius: 0.375rem; background-color: rgba(229, 70, 77, 0.1); padding: 0.1rem 0.75rem; font-size: 0.75rem; font-weight: 500; color: #f88181; margin-bottom: 2.5px;">
+                    Private BitSix Mistral Small 3.1 24B Instruct
+                </span>
+            </div>
+            <div style="display: flex; justify-content: flex-end; align-items: center;">
+                <a href="https://discord.gg/openfreeai" target="_blank" rel="noreferrer" style="padding: .5rem;">
+                    <svg width="24" height="24" fill="currentColor" xmlns="http://www.w3.org/2000/svg" viewBox="0 5 30.67 23.25">
+                        <title>Discord</title>
+                        <path d="M26.0015 6.9529C24.0021 6.03845 21.8787 5.37198 19.6623 5C19.3833 5.48048 19.0733 6.13144 18.8563 6.64292C16.4989 6.30193 14.1585 6.30193 11.8336 6.64292C11.6166 6.13144 11.2911 5.48048 11.0276 5C8.79575 5.37198 6.67235 6.03845 4.6869 6.9529C0.672601 12.8736 -0.41235 18.6548 0.130124 24.3585C2.79599 26.2959 5.36889 27.4739 7.89682 28.2489C8.51679 27.4119 9.07477 26.5129 9.55525 25.5675C8.64079 25.2265 7.77283 24.808 6.93587 24.312C7.15286 24.1571 7.36986 23.9866 7.57135 23.8161C12.6241 26.1255 18.0969 26.1255 23.0876 23.8161C23.3046 23.9866 23.5061 24.1571 23.7231 24.312C22.8861 24.808 22.0182 25.2265 21.1037 25.5675C21.5842 26.5129 22.1422 27.4119 22.7621 28.2489C25.2885 27.4739 27.8769 26.2959 30.5288 24.3585C31.1952 17.7559 29.4733 12.0212 26.0015 6.9529ZM10.2527 20.8402C8.73376 20.8402 7.49382 19.4608 7.49382 17.7714C7.49382 16.082 8.70276 14.7025 10.2527 14.7025C11.7871 14.7025 13.0425 16.082 13.0115 17.7714C13.0115 19.4608 11.7871 20.8402 10.2527 20.8402ZM20.4373 20.8402C18.9183 20.8402 17.6768 19.4608 17.6768 17.7714C17.6768 16.082 18.8873 14.7025 20.4373 14.7025C21.9717 14.7025 23.2271 16.082 23.1961 17.7714C23.1961 19.4608 21.9872 20.8402 20.4373 20.8402Z"></path>
+                    </svg>
+                </a>
+            </div>
+        </div>
+    </div>
+</div>
+"""
+demo = gr.ChatInterface(
+    fn=respond,
+    title="Ginigen Private AI",
+    description="6BIT 양자화로 모델 크기는 줄이고 성능은 유지하는 프라이버시 중심 AI 솔루션.",
+    theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
+        body_background_fill_dark="#16141c",
+        block_background_fill_dark="#16141c",
+        block_border_width="1px",
+        block_title_background_fill_dark="#1e1c26",
+        input_background_fill_dark="#292733",
+        button_secondary_background_fill_dark="#24212b",
+        border_color_accent_dark="#343140",
+        border_color_primary_dark="#343140",
+        background_fill_secondary_dark="#16141c",
+        color_accent_soft_dark="transparent",
+        code_background_fill_dark="#292733",
+    ),
+    css=css,
+    examples=[
+        ["안녕하세요, 저는 AI에 관심이 많습니다. 양자화란 무엇인가요?"],
+        ["미스트랄 모델의 특징은 무엇인가요?"],
+        ["긴 컨텍스트(context)를 처리하는 방법을 설명해 주세요."]
+    ],
+    additional_inputs=[
+        gr.Textbox(
+            value="You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem.",
+            label="시스템 메시지",
+            lines=5
+        ),
+        gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="최대 토큰 수"),
+        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
+        gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k"),
+        gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty"),
+    ],
+    chatbot=gr.Chatbot(placeholder=PLACEHOLDER, type="messages")
+)
+if __name__ == "__main__":
+    demo.launch()