import spaces import json import subprocess import gradio as gr from huggingface_hub import hf_hub_download subprocess.run('pip install llama-cpp-python==0.2.75 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124', shell=True) subprocess.run('pip install llama-cpp-agent==0.2.10', shell=True) hf_hub_download(repo_id="bartowski/dolphin-2.9.1-yi-1.5-34b-GGUF", filename="dolphin-2.9.1-yi-1.5-34b-Q6_K.gguf", local_dir = "./models") hf_hub_download(repo_id="crusoeai/dolphin-2.9.1-llama-3-70b-GGUF", filename="dolphin-2.9.1-llama-3-70b.Q3_K_M.gguf", local_dir = "./models") # hf_hub_download(repo_id="bartowski/dolphin-2.9.1-yi-1.5-9b-GGUF", filename="dolphin-2.9.1-yi-1.5-9b-f32.gguf", local_dir = "./models") # hf_hub_download(repo_id="crusoeai/dolphin-2.9.1-llama-3-8b-GGUF", filename="dolphin-2.9.1-llama-3-8b.Q6_K.gguf", local_dir = "./models") css = """ .message-row { justify-content: space-evenly !important; } .message-bubble-border { border-radius: 6px !important; } .dark.message-bubble-border { border-color: #21293b !important; } .dark.user { background: #0a1120 !important; } .dark.assistant { background: transparent !important; } """ PLACEHOLDER = """
Logo

Cognitive Computations

we are dedicated to embracing innovation, exploring cutting-edge ideas, and fearlessly pushing the boundaries of what is possible in the realm of artificial intelligence. Our mission is to drive transformative progress that will reshape industries, enhance human experiences, and unlock new frontiers of knowledge.

Discord GitHub
""" @spaces.GPU(duration=120) def respond( message, history: list[tuple[str, str]], max_tokens, temperature, top_p, top_k, repeat_penalty, model, ): from llama_cpp import Llama from llama_cpp_agent import LlamaCppAgent from llama_cpp_agent import MessagesFormatterType from llama_cpp_agent.providers import LlamaCppPythonProvider from llama_cpp_agent.chat_history import BasicChatHistory from llama_cpp_agent.chat_history.messages import Roles print(message) print(history) llm = Llama( model_path=f"models/{model}", flash_attn=True, n_threads=40, n_gpu_layers=81, n_batch=1024, n_ctx=8192, ) provider = LlamaCppPythonProvider(llm) agent = LlamaCppAgent( provider, system_prompt="You are Dolphin an AI assistant that helps humanity.", predefined_messages_formatter_type=MessagesFormatterType.CHATML, debug_output=True ) settings = provider.get_provider_default_settings() settings.temperature = temperature settings.top_k = top_k settings.top_p = top_p settings.max_tokens = max_tokens settings.repeat_penalty = repeat_penalty settings.stream = True messages = BasicChatHistory() for msn in history: user = { 'role': Roles.user, 'content': msn[0] } assistant = { 'role': Roles.assistant, 'content': msn[1] } messages.add_message(user) messages.add_message(assistant) stream = agent.get_chat_response(message, llm_sampling_settings=settings, chat_history=messages, returns_streaming_generator=True, print_output=False) outputs = "" for output in stream: outputs += output yield outputs demo = gr.ChatInterface( respond, additional_inputs=[ gr.Slider(minimum=1, maximum=8192, value=8192, step=1, label="Max tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p", ), gr.Slider( minimum=0, maximum=100, value=40, step=1, label="Top-k", ), gr.Slider( minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty", ), gr.Dropdown(['dolphin-2.9.1-yi-1.5-34b-Q6_K.gguf', 'dolphin-2.9.1-llama-3-70b.Q3_K_M.gguf'], value="dolphin-2.9.1-llama-3-70b.Q3_K_M.gguf", label="Model"), ], theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set( body_background_fill_dark="#0f172a", block_background_fill_dark="#0f172a", block_border_width="1px", block_title_background_fill_dark="#070d1b", input_background_fill_dark="#0c1425", button_secondary_background_fill_dark="#070d1b", border_color_primary_dark="#21293b", background_fill_secondary_dark="#0f172a", color_accent_soft_dark="transparent" ), css=css, retry_btn="Retry", undo_btn="Undo", clear_btn="Clear", submit_btn="Send", description="Cognitive Computation: Chat Dolphin 🐬 2.9.1-llama-3-70b & 2.9.1-yi-1.5-34b", chatbot=gr.Chatbot(scale=1, placeholder=PLACEHOLDER) ) if __name__ == "__main__": demo.launch()