from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr
import torch
import spaces

# Initialize the model and tokenizer
model_name = "Qwen/Qwen2.5-Math-1.5B-Instruct"
device = "cuda" if torch.cuda.is_available() else "cpu"

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
    device_map="auto" if device == "cuda" else None
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# System instruction
SYSTEM_INSTRUCTION = (
    "You are a math tutor providing hints and guidance. "
    "Do not reveal final answers. Offer step-by-step assistance only."
)

def apply_chat_template(messages):
    """
    Prepares the messages for the model using the tokenizer's chat template.
    """
    return tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

@spaces.GPU
def generate_response(history, user_input):
    """
    Generates a response from the model based on the chat history and user input.
    """
    # Append user input to the chat history
    history.append({"role": "user", "content": user_input})

    # Build messages for the model
    messages = [{"role": "system", "content": SYSTEM_INSTRUCTION}] + history

    # Tokenize input for the model
    text = apply_chat_template(messages)
    model_inputs = tokenizer([text], return_tensors="pt").to(device)

    # Generate response
    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=512
    )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

    # Append the assistant's response to history
    history.append({"role": "assistant", "content": response})

    # Format the conversation for display
    formatted_history = format_chat_history(history)

    return formatted_history, history

def format_chat_history(history):
    """
    Formats the conversation history for a user-friendly chat display.
    """
    chat_display = ""
    for message in history:
        if message["role"] == "user":
            chat_display += f"**User:** {message['content']}\n\n"
        elif message["role"] == "assistant":
            chat_display += f"**MathTutor:** {message['content']}\n\n"
    return chat_display

# Gradio chat interface
def create_chat_interface():
    """
    Creates the Gradio interface for the chat application.
    """
    with gr.Blocks() as chat_app:
        gr.Markdown("## Math Hint Chat")
        gr.Markdown(
            "This chat application helps with math problems by providing hints and guidance. "
            "It keeps a history of your conversation and ensures no direct answers are given."
        )

        with gr.Row():
            with gr.Column():
                user_input = gr.Textbox(
                    label="Your Math Query",
                    placeholder="Ask about a math problem (e.g., Solve for x: 4x + 5 = 6x + 7)",
                    lines=2
                )
                send_button = gr.Button("Send")
            with gr.Column():
                chat_history = gr.Textbox(
                    label="Chat History",
                    placeholder="Chat history will appear here.",
                    lines=20,
                    interactive=False
                )

        # Hidden state for storing conversation history
        history_state = gr.State([])

        # Button interaction
        send_button.click(
            fn=generate_response,
            inputs=[history_state, user_input],
            outputs=[chat_history, history_state]
        )

    return chat_app


app = create_chat_interface()
app.launch(debug=True)