import gradio as gr
import spaces
from transformers import pipeline
import torch
import logging

# Configure logging/logger
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Predefined list of models to compare (can be expanded)
model_options = {
    "Foundation-Sec-8B": pipeline("text-generation", model="fdtn-ai/Foundation-Sec-8B"),
}

# Define the response function
@spaces.GPU
def generate_text_local(model_pipeline, prompt):
    """Local text generation"""
    try:
        logger.info(f"Running local text generation with {model_pipeline.path}")
        
        # Move model to GPU (entire pipeline)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model_pipeline.model = model_pipeline.model.to(device)
        
        # Set other pipeline components to use GPU
        if hasattr(model_pipeline, "device"):
            model_pipeline.device = device
        
        # Record device information
        device_info = next(model_pipeline.model.parameters()).device
        logger.info(f"Model {model_pipeline.path} is running on device: {device_info}")
        
        outputs = model_pipeline(
            prompt,
            max_new_tokens=3,      # = model.generate(max_new_tokens=3, …)
            do_sample=True,
            temperature=0.1,
            top_p=0.9,
            clean_up_tokenization_spaces=True,  # echo 部分を整形
        )

        # Move model back to CPU
        model_pipeline.model = model_pipeline.model.to("cpu")
        if hasattr(model_pipeline, "device"):
            model_pipeline.device = torch.device("cpu")
        
        return outputs[0]["generated_text"].replace(prompt, "").strip()
    except Exception as e:
        logger.error(f"Error in local text generation with {model_pipeline.path}: {str(e)}")
        return f"Error: {str(e)}"

# Build Gradio app
def create_demo():
    with gr.Blocks() as demo:
        gr.Markdown("# AI Model Comparison Tool 🌟")
        gr.Markdown(
            """
            Compare responses from two AI models side-by-side.  
            Select two models, ask a question, and compare their responses in real time!
            """
        )

        # Input Section
        with gr.Row():
            system_message = gr.Textbox(
                value="You are a helpful assistant providing answers for technical and customer support queries.",
                label="System message"
            )
            user_message = gr.Textbox(label="Your question", placeholder="Type your question here...")

        with gr.Row():
            max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
            temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
            top_p = gr.Slider(
                minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
            )

        # Model Selection Section
        selected_models = gr.CheckboxGroup(
            choices=list(model_options.keys()),
            label="Select exactly two model to compare",
            value=["Foundation-Sec-8B"],  # Default models
        )

        # Dynamic Response Section
        response_box1 = gr.Textbox(label="Response from Model 1", interactive=False)
        #response_box2 = gr.Textbox(label="Response from Model 2", interactive=False)

        # Function to generate responses
        def generate_responses(
            message, system_message, max_tokens, temperature, top_p, selected_models
        ):
            #if len(selected_models) != 2:
            #    return "Error: Please select exactly two models to compare.", ""
            responses = generate_text_local(
                #message, [], system_message, max_tokens, temperature, top_p, selected_models
                model_options[selected_models[0]],
                message
            )
            #return responses.get(selected_models[0], ""), responses.get(selected_models[1], "")
            return responses
        # Add a button for generating responses
        submit_button = gr.Button("Generate Responses")
        submit_button.click(
            generate_responses,
            inputs=[user_message, system_message, max_tokens, temperature, top_p, selected_models],
            #outputs=[response_box1, response_box2],  # Link to response boxes
            outputs=[response_box1]
        )

    return demo

if __name__ == "__main__":
    demo = create_demo()
    demo.launch()