import os
import gradio as gr
from transformers import pipeline

# Set the cache directory to a writable location
os.environ["TRANSFORMERS_CACHE"] = "/app/cache"

# Initialize the Qwen model pipeline
pipe = pipeline("text-generation", model="Qwen/Qwen1.5-0.5B")

def chatbot(user_input, history=[]):
    # Append user input to the conversation history
    messages = history + [{"role": "user", "content": user_input}]
    
    # Generate response from the model
    response = pipe(messages, max_new_tokens=150, do_sample=True, temperature=0.7)[0]['generated_text']
    
    # Extract the assistant's response
    assistant_response = response[-1]["content"] if isinstance(response, list) and len(response) > 0 else response
    
    # Update history with user input and assistant response
    history = messages + [{"role": "assistant", "content": assistant_response}]
    
    # Format the conversation for display
    chat_display = ""
    for msg in history:
        role = "You" if msg["role"] == "user" else "Assistant"
        chat_display += f"**{role}**: {msg['content']}\n\n"
    
    return chat_display, history

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# AI Chatbot with Qwen1.5-0.5B")
    chatbot_output = gr.Markdown()
    user_input = gr.Textbox(label="Your Message", placeholder="Type your message here...")
    state = gr.State(value=[])  # To store conversation history
    
    user_input.submit(
        fn=chatbot,
        inputs=[user_input, state],
        outputs=[chatbot_output, state]
    )