Spaces:
Running
on
Zero
Running
on
Zero
import spaces | |
import json | |
import subprocess | |
import os | |
from llama_cpp import Llama | |
from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType | |
from llama_cpp_agent.providers import LlamaCppPythonProvider | |
from llama_cpp_agent.chat_history import BasicChatHistory | |
from llama_cpp_agent.chat_history.messages import Roles | |
import gradio as gr | |
from huggingface_hub import hf_hub_download | |
llm = None | |
llm_model = None | |
# ๋ชจ๋ธ ์ด๋ฆ๊ณผ ๊ฒฝ๋ก๋ฅผ ์ ์ | |
MISTRAL_MODEL_NAME = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf" | |
# ๋ชจ๋ธ ๋ค์ด๋ก๋ | |
model_path = hf_hub_download( | |
repo_id="ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503", | |
filename=MISTRAL_MODEL_NAME, | |
local_dir="./models" | |
) | |
print(f"Downloaded model path: {model_path}") | |
css = """ | |
.bubble-wrap { | |
padding-top: calc(var(--spacing-xl) * 3) !important; | |
} | |
.message-row { | |
justify-content: space-evenly !important; | |
width: 100% !important; | |
max-width: 100% !important; | |
margin: calc(var(--spacing-xl)) 0 !important; | |
padding: 0 calc(var(--spacing-xl) * 3) !important; | |
} | |
.flex-wrap.user { | |
border-bottom-right-radius: var(--radius-lg) !important; | |
} | |
.flex-wrap.bot { | |
border-bottom-left-radius: var(--radius-lg) !important; | |
} | |
.message.user{ | |
padding: 10px; | |
} | |
.message.bot{ | |
text-align: right; | |
width: 100%; | |
padding: 10px; | |
border-radius: 10px; | |
} | |
.message-bubble-border { | |
border-radius: 6px !important; | |
} | |
.message-buttons { | |
justify-content: flex-end !important; | |
} | |
.message-buttons-left { | |
align-self: end !important; | |
} | |
.message-buttons-bot, .message-buttons-user { | |
right: 10px !important; | |
left: auto !important; | |
bottom: 2px !important; | |
} | |
.dark.message-bubble-border { | |
border-color: #343140 !important; | |
} | |
.dark.user { | |
background: #1e1c26 !important; | |
} | |
.dark.assistant.dark, .dark.pending.dark { | |
background: #16141c !important; | |
} | |
""" | |
def get_messages_formatter_type(model_name): | |
if "Mistral" in model_name or "BitSix" in model_name: | |
return MessagesFormatterType.CHATML # Mistral ๊ณ์ด ๋ชจ๋ธ์ ChatML ํ์ ์ฌ์ฉ | |
else: | |
raise ValueError(f"Unsupported model: {model_name}") | |
def respond( | |
message, | |
history: list[dict], # history ํญ๋ชฉ์ด tuple์ด ์๋ dict ํ์์ผ๋ก ์ ๋ฌ๋จ | |
system_message, | |
max_tokens, | |
temperature, | |
top_p, | |
top_k, | |
repeat_penalty, | |
): | |
global llm | |
global llm_model | |
chat_template = get_messages_formatter_type(MISTRAL_MODEL_NAME) | |
# ๋ชจ๋ธ ํ์ผ ๊ฒฝ๋ก ํ์ธ | |
model_path_local = os.path.join("./models", MISTRAL_MODEL_NAME) | |
print(f"Model path: {model_path_local}") | |
if not os.path.exists(model_path_local): | |
print(f"Warning: Model file not found at {model_path_local}") | |
print(f"Available files in ./models: {os.listdir('./models')}") | |
if llm is None or llm_model != MISTRAL_MODEL_NAME: | |
llm = Llama( | |
model_path=model_path_local, | |
flash_attn=True, | |
n_gpu_layers=81, | |
n_batch=1024, | |
n_ctx=8192, | |
) | |
llm_model = MISTRAL_MODEL_NAME | |
provider = LlamaCppPythonProvider(llm) | |
agent = LlamaCppAgent( | |
provider, | |
system_prompt=f"{system_message}", | |
predefined_messages_formatter_type=chat_template, | |
debug_output=True | |
) | |
settings = provider.get_provider_default_settings() | |
settings.temperature = temperature | |
settings.top_k = top_k | |
settings.top_p = top_p | |
settings.max_tokens = max_tokens | |
settings.repeat_penalty = repeat_penalty | |
settings.stream = True | |
messages = BasicChatHistory() | |
# history์ ๊ฐ ํญ๋ชฉ์ด dict ํ์์ผ๋ก {'user': <user_message>, 'assistant': <assistant_message>} ํํ๋ผ๊ณ ๊ฐ์ | |
for msn in history: | |
user_message = { | |
'role': Roles.user, | |
'content': msn.get('user', '') | |
} | |
assistant_message = { | |
'role': Roles.assistant, | |
'content': msn.get('assistant', '') | |
} | |
messages.add_message(user_message) | |
messages.add_message(assistant_message) | |
stream = agent.get_chat_response( | |
message, | |
llm_sampling_settings=settings, | |
chat_history=messages, | |
returns_streaming_generator=True, | |
print_output=False | |
) | |
outputs = "" | |
for output in stream: | |
outputs += output | |
yield outputs | |
demo = gr.ChatInterface( | |
fn=respond, | |
title="Ginigen Private AI", | |
description="6BIT ์์ํ๋ก ๋ชจ๋ธ ํฌ๊ธฐ๋ ์ค์ด๊ณ ์ฑ๋ฅ์ ์ ์งํ๋ ํ๋ผ์ด๋ฒ์ ์ค์ฌ AI ์๋ฃจ์ : The Ginigen Private-BitSix framework simplifies interactions with Large Language Models (LLMs), providing an interface for chatting, executing function calls, generating structured output, performing retrieval augmented generation, and processing text using agentic chains with tools.", | |
theme=gr.themes.Soft( | |
primary_hue="violet", | |
secondary_hue="violet", | |
neutral_hue="gray", | |
font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"] | |
).set( | |
body_background_fill_dark="#16141c", | |
block_background_fill_dark="#16141c", | |
block_border_width="1px", | |
block_title_background_fill_dark="#1e1c26", | |
input_background_fill_dark="#292733", | |
button_secondary_background_fill_dark="#24212b", | |
border_color_accent_dark="#343140", | |
border_color_primary_dark="#343140", | |
background_fill_secondary_dark="#16141c", | |
color_accent_soft_dark="transparent", | |
code_background_fill_dark="#292733", | |
), | |
css=css, | |
examples=[ | |
["์๋ ํ์ธ์, ์ ๋ AI์ ๊ด์ฌ์ด ๋ง์ต๋๋ค. ์์ํ๋ ๋ฌด์์ธ๊ฐ์?"], | |
["๋ฏธ์คํธ๋ ๋ชจ๋ธ์ ํน์ง์ ๋ฌด์์ธ๊ฐ์?"], | |
["๊ธด ์ปจํ ์คํธ(context)๋ฅผ ์ฒ๋ฆฌํ๋ ๋ฐฉ๋ฒ์ ์ค๋ช ํด ์ฃผ์ธ์."] | |
], | |
additional_inputs=[ | |
gr.Textbox( | |
value="You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem.", | |
label="์์คํ ๋ฉ์์ง", | |
lines=5 | |
), | |
gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="์ต๋ ํ ํฐ ์"), | |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), | |
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"), | |
gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k"), | |
gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty"), | |
], | |
chatbot=gr.Chatbot(type="messages") | |
) | |
if __name__ == "__main__": | |
demo.launch() | |