import streamlit as st
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

# Hugging Face model repo details
HF_MODEL_REPO = "anush76/unsloth-model"
MODEL_FILENAME = "unsloth.Q4_K_M.gguf"

# Download model from Hugging Face Hub
st.sidebar.write("📥 Downloading model from Hugging Face...")
model_path = hf_hub_download(repo_id=HF_MODEL_REPO, filename=MODEL_FILENAME)

# Load the model with llama-cpp-python
st.sidebar.write("🔄 Loading model...")
llm = Llama(model_path=model_path, n_threads=8, n_batch=512, n_gpu_layers=20)

# Streamlit UI
st.title("🦥 Unsloth Chatbot")
st.write("💬 Ask me anything!")

user_input = st.text_input("You:")
if user_input:
    response = llm.create_completion(
        prompt=f"Answer in a clear paragraph format:\n\n{user_input}",
        max_tokens=300,  # Ensures a complete response
        temperature=0.6,  
        top_p=0.9,
        stream=False  # Disables word-by-word output
    )

    full_response = response["choices"][0]["text"].strip()
    
    # Format response into a paragraph
    st.write("🤖 Chatbot:\n\n", full_response)