import gradio as gr
from transformers import AutoTokenizer
import transformers
import torch
import os
import time

# Model configuration
MODEL_NAME = "meta-llama/CodeLlama-7b-hf"

# Default example prompts
EXAMPLES = [
    ["import socket\n\ndef ping_exponential_backoff(host: str):"],
    ["def fibonacci(n: int) -> int:"],
    ["class BinarySearchTree:\n    def __init__(self):"],
    ["async def fetch_data(url: str):"]
]

# Load model with error handling
def load_model():
    try:
        print("Loading model and tokenizer...")
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

        # Configure the pipeline based on available resources
        # Hugging Face Spaces typically have GPU available
        pipeline = transformers.pipeline(
            "text-generation",
            model=MODEL_NAME,
            torch_dtype=torch.float16,
            device_map="auto",
        )

        print("Model loaded successfully!")
        return tokenizer, pipeline
    except Exception as e:
        print(f"Error loading model: {str(e)}")
        # Return None to indicate failure
        return None, None

# Generate code based on the prompt
def generate_code(prompt, max_length=200, temperature=0.1, top_p=0.95, top_k=10):
    try:
        # Check if model is loaded
        if tokenizer is None or pipeline is None:
            return "Error: Model failed to load. Please check the logs."

        # Add a loading message
        start_time = time.time()

        # Generate the code
        sequences = pipeline(
            prompt,
            do_sample=True,
            top_k=top_k,
            temperature=temperature,
            top_p=top_p,
            num_return_sequences=1,
            eos_token_id=tokenizer.eos_token_id,
            max_length=max_length,
        )

        # Calculate generation time
        generation_time = time.time() - start_time

        # Format the result
        result = sequences[0]['generated_text']
        return f"{result}\n\n---\nGeneration time: {generation_time:.2f} seconds"

    except Exception as e:
        return f"Error generating code: {str(e)}"

# Load the model and tokenizer
print("Initializing CodeLlama...")
tokenizer, pipeline = load_model()

# Create the Gradio interface
with gr.Blocks(title="CodeLlama Code Generation") as demo:
    gr.Markdown("# CodeLlama Code Generation")
    gr.Markdown("Enter a code prompt and CodeLlama will complete it for you.")

    with gr.Row():
        with gr.Column():
            prompt = gr.Textbox(
                label="Code Prompt",
                placeholder="Enter your code prompt here...",
                lines=5
            )

            with gr.Row():
                max_length = gr.Slider(
                    minimum=50,
                    maximum=500,
                    value=200,
                    step=10,
                    label="Max Length"
                )
                temperature = gr.Slider(
                    minimum=0.1,
                    maximum=1.0,
                    value=0.1,
                    step=0.1,
                    label="Temperature"
                )

            with gr.Row():
                top_p = gr.Slider(
                    minimum=0.5,
                    maximum=1.0,
                    value=0.95,
                    step=0.05,
                    label="Top-p"
                )
                top_k = gr.Slider(
                    minimum=1,
                    maximum=50,
                    value=10,
                    step=1,
                    label="Top-k"
                )

            generate_btn = gr.Button("Generate Code")

        with gr.Column():
            output = gr.Textbox(
                label="Generated Code",
                lines=20
            )

    # Connect the button to the generate function
    generate_btn.click(
        fn=generate_code,
        inputs=[prompt, max_length, temperature, top_p, top_k],
        outputs=output
    )

    # Add examples
    gr.Examples(
        examples=EXAMPLES,
        inputs=prompt
    )

    # Add information about the model
    gr.Markdown("""
    ## About

    This demo uses the CodeLlama-7b model to generate code completions based on your prompts.

    - **Max Length**: Controls the maximum length of the generated text
    - **Temperature**: Controls randomness (lower = more deterministic)
    - **Top-p**: Controls diversity via nucleus sampling
    - **Top-k**: Controls diversity via top-k sampling

    Created by DheepLearning
    """)

# Launch the app
demo.launch()