Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import gradio as gr | |
| from llama_cpp import Llama | |
| # Get environment variables | |
| model_id = os.getenv('MODEL') | |
| quant = os.getenv('QUANT') | |
| chat_template = os.getenv('CHAT_TEMPLATE') | |
| # Interface variables | |
| model_name = model_id.split('/')[-1] | |
| title = f"🇩🇪 {model_name}" | |
| description = f"Chat with <a href=\"https://huggingface.co/{model_id}\">{model_name}</a> in GGUF format ({quant})!" | |
| print("loading model") | |
| # Initialize the LLM | |
| llm = Llama(model_path="~/.cache/huggingface/hub/models--LSX-UniWue--LLaMmlein_1B_alternative_formats/snapshots/7d97b69ae6910b5f317be2dbd5b4820d848c66b4/LLaMmlein_1B_chat_selected.gguf", | |
| n_ctx=32768, | |
| n_threads=2, | |
| chat_format=chat_template) | |
| # Function for streaming chat completions | |
| def chat_stream_completion(message, history): | |
| #messages_prompts = [{"role": "system", "content": system_prompt}] | |
| messages_prompts = [] | |
| for human, assistant in history: | |
| messages_prompts.append({"role": "user", "content": human}) | |
| messages_prompts.append({"role": "assistant", "content": assistant}) | |
| messages_prompts.append({"role": "user", "content": message}) | |
| response = llm.create_chat_completion( | |
| messages=messages_prompts, | |
| stream=True, | |
| stop=["<|im_end|>"] | |
| ) | |
| message_repl = "" | |
| for chunk in response: | |
| if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]: | |
| message_repl = message_repl + chunk['choices'][0]["delta"]["content"] | |
| yield message_repl | |
| print("starting gradio") | |
| # Gradio chat interface | |
| gr.ChatInterface( | |
| fn=chat_stream_completion, | |
| title=title, | |
| description=description, | |
| #additional_inputs=[gr.Textbox("Du bist ein hilfreicher Assistent.")], | |
| #additional_inputs_accordion="📝 System prompt", | |
| examples=[ | |
| ["Was ist ein Large Language Model?"], | |
| ["Was ist 9+2-1?"], | |
| ["Schreibe Python code um die Fibonacci-Reihenfolge auszugeben."] | |
| ] | |
| ).queue().launch() |