ginipick's picture
Update app.py
b5d9ec0 verified
raw
history blame
6.96 kB
import spaces
import json
import subprocess
import os
from llama_cpp import Llama
from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
from llama_cpp_agent.providers import LlamaCppPythonProvider
from llama_cpp_agent.chat_history import BasicChatHistory
from llama_cpp_agent.chat_history.messages import Roles
import gradio as gr
from huggingface_hub import hf_hub_download
llm = None
llm_model = None
# ๋ชจ๋ธ ์ด๋ฆ„๊ณผ ๊ฒฝ๋กœ๋ฅผ ์ •์˜
MISTRAL_MODEL_NAME = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"
# ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ
model_path = hf_hub_download(
repo_id="ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503",
filename=MISTRAL_MODEL_NAME,
local_dir="./models"
)
print(f"Downloaded model path: {model_path}")
css = """
.bubble-wrap {
padding-top: calc(var(--spacing-xl) * 3) !important;
}
.message-row {
justify-content: space-evenly !important;
width: 100% !important;
max-width: 100% !important;
margin: calc(var(--spacing-xl)) 0 !important;
padding: 0 calc(var(--spacing-xl) * 3) !important;
}
.flex-wrap.user {
border-bottom-right-radius: var(--radius-lg) !important;
}
.flex-wrap.bot {
border-bottom-left-radius: var(--radius-lg) !important;
}
.message.user{
padding: 10px;
}
.message.bot{
text-align: right;
width: 100%;
padding: 10px;
border-radius: 10px;
}
.message-bubble-border {
border-radius: 6px !important;
}
.message-buttons {
justify-content: flex-end !important;
}
.message-buttons-left {
align-self: end !important;
}
.message-buttons-bot, .message-buttons-user {
right: 10px !important;
left: auto !important;
bottom: 2px !important;
}
.dark.message-bubble-border {
border-color: #343140 !important;
}
.dark.user {
background: #1e1c26 !important;
}
.dark.assistant.dark, .dark.pending.dark {
background: #16141c !important;
}
"""
def get_messages_formatter_type(model_name):
if "Mistral" in model_name or "BitSix" in model_name:
return MessagesFormatterType.CHATML # Mistral ๊ณ„์—ด ๋ชจ๋ธ์€ ChatML ํ˜•์‹ ์‚ฌ์šฉ
else:
raise ValueError(f"Unsupported model: {model_name}")
@spaces.GPU(duration=120)
def respond(
message,
history: list[dict], # history ํ•ญ๋ชฉ์ด tuple์ด ์•„๋‹Œ dict ํ˜•์‹์œผ๋กœ ์ „๋‹ฌ๋จ
system_message,
max_tokens,
temperature,
top_p,
top_k,
repeat_penalty,
):
global llm
global llm_model
chat_template = get_messages_formatter_type(MISTRAL_MODEL_NAME)
# ๋ชจ๋ธ ํŒŒ์ผ ๊ฒฝ๋กœ ํ™•์ธ
model_path_local = os.path.join("./models", MISTRAL_MODEL_NAME)
print(f"Model path: {model_path_local}")
if not os.path.exists(model_path_local):
print(f"Warning: Model file not found at {model_path_local}")
print(f"Available files in ./models: {os.listdir('./models')}")
if llm is None or llm_model != MISTRAL_MODEL_NAME:
llm = Llama(
model_path=model_path_local,
flash_attn=True,
n_gpu_layers=81,
n_batch=1024,
n_ctx=8192,
)
llm_model = MISTRAL_MODEL_NAME
provider = LlamaCppPythonProvider(llm)
agent = LlamaCppAgent(
provider,
system_prompt=f"{system_message}",
predefined_messages_formatter_type=chat_template,
debug_output=True
)
settings = provider.get_provider_default_settings()
settings.temperature = temperature
settings.top_k = top_k
settings.top_p = top_p
settings.max_tokens = max_tokens
settings.repeat_penalty = repeat_penalty
settings.stream = True
messages = BasicChatHistory()
# history์˜ ๊ฐ ํ•ญ๋ชฉ์ด dict ํ˜•์‹์œผ๋กœ {'user': <user_message>, 'assistant': <assistant_message>} ํ˜•ํƒœ๋ผ๊ณ  ๊ฐ€์ •
for msn in history:
user_message = {
'role': Roles.user,
'content': msn.get('user', '')
}
assistant_message = {
'role': Roles.assistant,
'content': msn.get('assistant', '')
}
messages.add_message(user_message)
messages.add_message(assistant_message)
stream = agent.get_chat_response(
message,
llm_sampling_settings=settings,
chat_history=messages,
returns_streaming_generator=True,
print_output=False
)
outputs = ""
for output in stream:
outputs += output
yield outputs
demo = gr.ChatInterface(
fn=respond,
title="Ginigen Private AI",
description="6BIT ์–‘์žํ™”๋กœ ๋ชจ๋ธ ํฌ๊ธฐ๋Š” ์ค„์ด๊ณ  ์„ฑ๋Šฅ์€ ์œ ์ง€ํ•˜๋Š” ํ”„๋ผ์ด๋ฒ„์‹œ ์ค‘์‹ฌ AI ์†”๋ฃจ์…˜: The Ginigen Private-BitSix framework simplifies interactions with Large Language Models (LLMs), providing an interface for chatting, executing function calls, generating structured output, performing retrieval augmented generation, and processing text using agentic chains with tools.",
theme=gr.themes.Soft(
primary_hue="violet",
secondary_hue="violet",
neutral_hue="gray",
font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]
).set(
body_background_fill_dark="#16141c",
block_background_fill_dark="#16141c",
block_border_width="1px",
block_title_background_fill_dark="#1e1c26",
input_background_fill_dark="#292733",
button_secondary_background_fill_dark="#24212b",
border_color_accent_dark="#343140",
border_color_primary_dark="#343140",
background_fill_secondary_dark="#16141c",
color_accent_soft_dark="transparent",
code_background_fill_dark="#292733",
),
css=css,
examples=[
["์•ˆ๋…•ํ•˜์„ธ์š”, ์ €๋Š” AI์— ๊ด€์‹ฌ์ด ๋งŽ์Šต๋‹ˆ๋‹ค. ์–‘์žํ™”๋ž€ ๋ฌด์—‡์ธ๊ฐ€์š”?"],
["๋ฏธ์ŠคํŠธ๋ž„ ๋ชจ๋ธ์˜ ํŠน์ง•์€ ๋ฌด์—‡์ธ๊ฐ€์š”?"],
["๊ธด ์ปจํ…์ŠคํŠธ(context)๋ฅผ ์ฒ˜๋ฆฌํ•˜๋Š” ๋ฐฉ๋ฒ•์„ ์„ค๋ช…ํ•ด ์ฃผ์„ธ์š”."]
],
additional_inputs=[
gr.Textbox(
value="You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem.",
label="์‹œ์Šคํ…œ ๋ฉ”์‹œ์ง€",
lines=5
),
gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="์ตœ๋Œ€ ํ† ํฐ ์ˆ˜"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k"),
gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty"),
],
chatbot=gr.Chatbot(type="messages")
)
if __name__ == "__main__":
demo.launch()