Spaces:

Futuresony
/

FutureX

Sleeping

File size: 923 Bytes

a4e4083
c2c7eb4
b7f8793
c2c7eb4
0621ae2
ed0ccfa
c2c7eb4
 
ed0ccfa
c2c7eb4
 
 
 
ed0ccfa
6d9c19c
 
c2c7eb4
 
 
 
 
 
 
6d9c19c
c2c7eb4
 
 
 
 
 
6d9c19c
c2c7eb4
6d9c19c
c2c7eb4
a4e4083

import gradio as gr
from llama_cpp import Llama

# Path to your GGUF model inside the space
MODEL_PATH = "Futuresony/gemma2-2b-gguf-q4_k_m"

# Load model
llm = Llama(model_path=MODEL_PATH, n_ctx=2048, n_threads=4, verbose=True)

# Function to format the prompt
def format_prompt(user_message):
    return f"""### Instruction:
{user_message}

### Response:"""

# Chat handler
def respond(user_message, chat_history):
    prompt = format_prompt(user_message)
    output = llm(prompt, max_tokens=300, stop=["###"])
    response = output["choices"][0]["text"].strip()
    chat_history.append((user_message, response))
    return "", chat_history

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## 🤖 DStv AI Assistant (Offline - GGUF)")
    chatbot = gr.Chatbot()
    msg = gr.Textbox(placeholder="Ask your question...")
    state = gr.State([])

    msg.submit(respond, [msg, state], [msg, chatbot])

demo.launch()