import spaces import json import subprocess import os from llama_cpp import Llama from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType from llama_cpp_agent.providers import LlamaCppPythonProvider from llama_cpp_agent.chat_history import BasicChatHistory from llama_cpp_agent.chat_history.messages import Roles import gradio as gr from huggingface_hub import hf_hub_download llm = None llm_model = None # 모델 이름과 경로를 정의 MISTRAL_MODEL_NAME = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf" # 모델 다운로드 model_path = hf_hub_download( repo_id="ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503", filename=MISTRAL_MODEL_NAME, local_dir="./models" ) print(f"Downloaded model path: {model_path}") css = """ .bubble-wrap { padding-top: calc(var(--spacing-xl) * 3) !important; } .message-row { justify-content: space-evenly !important; width: 100% !important; max-width: 100% !important; margin: calc(var(--spacing-xl)) 0 !important; padding: 0 calc(var(--spacing-xl) * 3) !important; } .flex-wrap.user { border-bottom-right-radius: var(--radius-lg) !important; } .flex-wrap.bot { border-bottom-left-radius: var(--radius-lg) !important; } .message.user{ padding: 10px; } .message.bot{ text-align: right; width: 100%; padding: 10px; border-radius: 10px; } .message-bubble-border { border-radius: 6px !important; } .message-buttons { justify-content: flex-end !important; } .message-buttons-left { align-self: end !important; } .message-buttons-bot, .message-buttons-user { right: 10px !important; left: auto !important; bottom: 2px !important; } .dark.message-bubble-border { border-color: #343140 !important; } .dark.user { background: #1e1c26 !important; } .dark.assistant.dark, .dark.pending.dark { background: #16141c !important; } """ def get_messages_formatter_type(model_name): if "Mistral" in model_name or "BitSix" in model_name: return MessagesFormatterType.CHATML # Mistral 계열 모델은 ChatML 형식 사용 else: raise ValueError(f"Unsupported model: {model_name}") @spaces.GPU(duration=120) def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, top_k, repeat_penalty, ): global llm global llm_model chat_template = get_messages_formatter_type(MISTRAL_MODEL_NAME) # 모델 파일 경로 확인 model_path = os.path.join("./models", MISTRAL_MODEL_NAME) print(f"Model path: {model_path}") if not os.path.exists(model_path): print(f"Warning: Model file not found at {model_path}") print(f"Available files in ./models: {os.listdir('./models')}") if llm is None or llm_model != MISTRAL_MODEL_NAME: llm = Llama( model_path=model_path, flash_attn=True, n_gpu_layers=81, n_batch=1024, n_ctx=8192, ) llm_model = MISTRAL_MODEL_NAME provider = LlamaCppPythonProvider(llm) agent = LlamaCppAgent( provider, system_prompt=f"{system_message}", predefined_messages_formatter_type=chat_template, debug_output=True ) settings = provider.get_provider_default_settings() settings.temperature = temperature settings.top_k = top_k settings.top_p = top_p settings.max_tokens = max_tokens settings.repeat_penalty = repeat_penalty settings.stream = True messages = BasicChatHistory() for msn in history: user = { 'role': Roles.user, 'content': msn[0] } assistant = { 'role': Roles.assistant, 'content': msn[1] } messages.add_message(user) messages.add_message(assistant) stream = agent.get_chat_response( message, llm_sampling_settings=settings, chat_history=messages, returns_streaming_generator=True, print_output=False ) outputs = "" for output in stream: outputs += output yield outputs PLACEHOLDER = """
Logo

Ginigen Private-BitSix

The Ginigen Private-BitSix framework simplifies interactions with Large Language Models (LLMs), providing an interface for chatting, executing function calls, generating structured output, performing retrieval augmented generation, and processing text using agentic chains with tools.

Private BitSix Mistral Small 3.1 24B Instruct
Discord
""" demo = gr.ChatInterface( fn=respond, title="Ginigen Private AI", description="6BIT 양자화로 모델 크기는 줄이고 성능은 유지하는 프라이버시 중심 AI 솔루션.", theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set( body_background_fill_dark="#16141c", block_background_fill_dark="#16141c", block_border_width="1px", block_title_background_fill_dark="#1e1c26", input_background_fill_dark="#292733", button_secondary_background_fill_dark="#24212b", border_color_accent_dark="#343140", border_color_primary_dark="#343140", background_fill_secondary_dark="#16141c", color_accent_soft_dark="transparent", code_background_fill_dark="#292733", ), css=css, examples=[ ["안녕하세요, 저는 AI에 관심이 많습니다. 양자화란 무엇인가요?"], ["미스트랄 모델의 특징은 무엇인가요?"], ["긴 컨텍스트(context)를 처리하는 방법을 설명해 주세요."] ], additional_inputs=[ gr.Textbox( value="You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem.", label="시스템 메시지", lines=5 ), gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="최대 토큰 수"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"), gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k"), gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty"), ], chatbot=gr.Chatbot(placeholder=PLACEHOLDER, type="messages") ) if __name__ == "__main__": demo.launch()