import spaces import json import subprocess import gradio as gr from huggingface_hub import hf_hub_download subprocess.run('pip install llama-cpp-python==0.2.75 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124', shell=True) subprocess.run('pip install llama-cpp-agent==0.2.10', shell=True) hf_hub_download(repo_id="bartowski/dolphin-2.9.1-yi-1.5-34b-GGUF", filename="dolphin-2.9.1-yi-1.5-34b-Q6_K.gguf", local_dir = "./models") hf_hub_download(repo_id="crusoeai/dolphin-2.9.1-llama-3-70b-GGUF", filename="dolphin-2.9.1-llama-3-70b.Q3_K_M.gguf", local_dir = "./models") # hf_hub_download(repo_id="bartowski/dolphin-2.9.1-yi-1.5-9b-GGUF", filename="dolphin-2.9.1-yi-1.5-9b-f32.gguf", local_dir = "./models") # hf_hub_download(repo_id="crusoeai/dolphin-2.9.1-llama-3-8b-GGUF", filename="dolphin-2.9.1-llama-3-8b.Q6_K.gguf", local_dir = "./models") css = """ .message-row { justify-content: space-evenly !important; } .message-bubble-border { border-radius: 6px !important; } .dark.message-bubble-border { border-color: #21293b !important; } .dark.user { background: #0a1120 !important; } .dark.assistant { background: transparent !important; } """ PLACEHOLDER = """
Logo

Cognitive Computations

Cognitive Computations aims to be at the forefront of the AI revolution. We collaborate with brilliant minds from academia and industry, leveraging their expertise to develop groundbreaking solutions that address real-world challenges and unlock new possibilities.

Discord GitHub
""" @spaces.GPU(duration=120) def respond( message, history: list[tuple[str, str]], max_tokens, temperature, top_p, top_k, repeat_penalty, model, ): from llama_cpp import Llama from llama_cpp_agent import LlamaCppAgent from llama_cpp_agent import MessagesFormatterType from llama_cpp_agent.providers import LlamaCppPythonProvider from llama_cpp_agent.chat_history import BasicChatHistory from llama_cpp_agent.chat_history.messages import Roles print(message) print(history) llm = Llama( model_path=f"models/{model}", flash_attn=True, n_threads=40, n_gpu_layers=81, n_batch=1024, n_ctx=8192, ) provider = LlamaCppPythonProvider(llm) agent = LlamaCppAgent( provider, system_prompt="You are Dolphin an AI assistant that helps humanity.", predefined_messages_formatter_type=MessagesFormatterType.CHATML, debug_output=True ) settings = provider.get_provider_default_settings() settings.temperature = temperature settings.top_k = top_k settings.top_p = top_p settings.max_tokens = max_tokens settings.repeat_penalty = repeat_penalty settings.stream = True messages = BasicChatHistory() for msn in history: user = { 'role': Roles.user, 'content': msn[0] } assistant = { 'role': Roles.assistant, 'content': msn[1] } messages.add_message(user) messages.add_message(assistant) stream = agent.get_chat_response(message, llm_sampling_settings=settings, chat_history=messages, returns_streaming_generator=True, print_output=False) outputs = "" for output in stream: outputs += output yield outputs demo = gr.ChatInterface( respond, additional_inputs=[ gr.Slider(minimum=1, maximum=8192, value=8192, step=1, label="Max tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p", ), gr.Slider( minimum=0, maximum=100, value=40, step=1, label="Top-k", ), gr.Slider( minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty", ), gr.Dropdown(['dolphin-2.9.1-yi-1.5-34b-Q6_K.gguf', 'dolphin-2.9.1-llama-3-70b.Q3_K_M.gguf'], value="dolphin-2.9.1-llama-3-70b.Q3_K_M.gguf", label="Model"), ], theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set( body_background_fill_dark="#0f172a", block_background_fill_dark="#0f172a", block_border_width="1px", block_title_background_fill_dark="#070d1b", input_background_fill_dark="#0c1425", button_secondary_background_fill_dark="#070d1b", border_color_primary_dark="#21293b", background_fill_secondary_dark="#0f172a", color_accent_soft_dark="transparent" ), css=css, retry_btn="Retry", undo_btn="Undo", clear_btn="Clear", submit_btn="Send", description="Cognitive Computation: Chat Dolphin 🐬 2.9.1-llama-3-70b & 2.9.1-yi-1.5-34b", chatbot=gr.Chatbot(scale=1, placeholder=PLACEHOLDER) ) if __name__ == "__main__": demo.launch()