import spaces import json import subprocess import os from llama_cpp import Llama from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType from llama_cpp_agent.providers import LlamaCppPythonProvider from llama_cpp_agent.chat_history import BasicChatHistory from llama_cpp_agent.chat_history.messages import Roles import gradio as gr from huggingface_hub import hf_hub_download llm = None llm_model = None # 모델 이름과 경로를 정의 (전역 변수로 활용) MISTRAL_MODEL_NAME = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf" LLAMA_MODEL_NAME = "Meta-Llama-3-70B-Instruct-Q3_K_M.gguf" # 모델 다운로드 model_path = hf_hub_download( repo_id="ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503", filename=MISTRAL_MODEL_NAME, local_dir="./models" ) print(f"Downloaded model path: {model_path}") css = """ .bubble-wrap { padding-top: calc(var(--spacing-xl) * 3) !important; } .message-row { justify-content: space-evenly !important; width: 100% !important; max-width: 100% !important; margin: calc(var(--spacing-xl)) 0 !important; padding: 0 calc(var(--spacing-xl) * 3) !important; } .flex-wrap.user { border-bottom-right-radius: var(--radius-lg) !important; } .flex-wrap.bot { border-bottom-left-radius: var(--radius-lg) !important; } .message.user{ padding: 10px; } .message.bot{ text-align: right; width: 100%; padding: 10px; border-radius: 10px; } .message-bubble-border { border-radius: 6px !important; } .message-buttons { justify-content: flex-end !important; } .message-buttons-left { align-self: end !important; } .message-buttons-bot, .message-buttons-user { right: 10px !important; left: auto !important; bottom: 2px !important; } .dark.message-bubble-border { border-color: #343140 !important; } .dark.user { background: #1e1c26 !important; } .dark.assistant.dark, .dark.pending.dark { background: #16141c !important; } """ def get_messages_formatter_type(model_name): if "Llama" in model_name: return MessagesFormatterType.LLAMA_3 elif "unsloth" in model_name: return MessagesFormatterType.CHATML elif "Mistral" in model_name or "BitSix" in model_name: return MessagesFormatterType.CHATML # Mistral 계열 모델은 ChatML 형식 사용 else: raise ValueError(f"Unsupported model: {model_name}") @spaces.GPU(duration=120) def respond( message, history: list[tuple[str, str]], model_choice, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty, ): global llm global llm_model chat_template = get_messages_formatter_type(model_choice) # 모델 파일 경로 확인 if model_choice == MISTRAL_MODEL_NAME: model_path = os.path.join("./models", MISTRAL_MODEL_NAME) else: model_path = os.path.join("./models", model_choice) print(f"Selected model: {model_choice}") print(f"Model path: {model_path}") if not os.path.exists(model_path): print(f"Warning: Model file not found at {model_path}") print(f"Available files in ./models: {os.listdir('./models')}") if llm is None or llm_model != model_choice: llm = Llama( model_path=model_path, flash_attn=True, n_gpu_layers=81, n_batch=1024, n_ctx=8192, ) llm_model = model_choice provider = LlamaCppPythonProvider(llm) agent = LlamaCppAgent( provider, system_prompt=f"{system_message}", predefined_messages_formatter_type=chat_template, debug_output=True ) settings = provider.get_provider_default_settings() settings.temperature = temperature settings.top_k = top_k settings.top_p = top_p settings.max_tokens = max_tokens settings.repeat_penalty = repeat_penalty settings.stream = True messages = BasicChatHistory() for msn in history: user = { 'role': Roles.user, 'content': msn[0] } assistant = { 'role': Roles.assistant, 'content': msn[1] } messages.add_message(user) messages.add_message(assistant) stream = agent.get_chat_response( message, llm_sampling_settings=settings, chat_history=messages, returns_streaming_generator=True, print_output=False ) outputs = "" for output in stream: outputs += output yield outputs PLACEHOLDER = """
Logo

llama-cpp-agent

The llama-cpp-agent framework simplifies interactions with Large Language Models (LLMs), providing an interface for chatting, executing function calls, generating structured output, performing retrieval augmented generation, and processing text using agentic chains with tools.

Private BitSix Mistral Small 3.1 24B Instruct Meta Llama 3 70B Instruct
Discord GitHub
""" demo = gr.ChatInterface( respond, additional_inputs=[ gr.Dropdown([ MISTRAL_MODEL_NAME, LLAMA_MODEL_NAME ], value=MISTRAL_MODEL_NAME, label="Model" ), gr.Textbox(value="You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem.", label="System message"), gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p", ), gr.Slider( minimum=0, maximum=100, value=40, step=1, label="Top-k", ), gr.Slider( minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty", ), ], theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set( body_background_fill_dark="#16141c", block_background_fill_dark="#16141c", block_border_width="1px", block_title_background_fill_dark="#1e1c26", input_background_fill_dark="#292733", button_secondary_background_fill_dark="#24212b", border_color_accent_dark="#343140", border_color_primary_dark="#343140", background_fill_secondary_dark="#16141c", color_accent_soft_dark="transparent", code_background_fill_dark="#292733", ), css=css, retry_btn="Retry", undo_btn="Undo", clear_btn="Clear", submit_btn="Send", description="Llama-cpp-agent: Chat multi llm selection", chatbot=gr.Chatbot( scale=1, placeholder=PLACEHOLDER, likeable=False, show_copy_button=True ) ) if __name__ == "__main__": demo.launch()