import gradio as gr
from transformers import AutoTokenizer
import transformers
import torch
import os
import time

# Model configuration
MODEL_NAME = "meta-llama/CodeLlama-7b-hf"  # Using CodeLlama as requested

# Default example prompts
EXAMPLES = [
    ["import socket\n\ndef ping_exponential_backoff(host: str):"],
    ["def fibonacci(n: int) -> int:"],
    ["class BinarySearchTree:\n    def __init__(self):"],
    ["async def fetch_data(url: str):"]
]

# Load model with error handling and authentication
def load_model():
    try:
        print("Loading model and tokenizer...")

        # Get Hugging Face token from environment variable
        # This will be set in the Hugging Face Space settings
        hf_token = os.environ.get("HF_TOKEN")

        # If running locally and token is not set, try to use the token from git config
        if not hf_token:
            try:
                # Extract token from git config if available
                import subprocess
                git_url = subprocess.check_output(["git", "config", "--get", "remote.origin.url"]).decode().strip()
                if "@huggingface.co" in git_url:
                    # Extract token from URL if it's in the format https://username:token@huggingface.co/...
                    hf_token = git_url.split(":")[-2].split("/")[-1] if ":" in git_url else None
                    if hf_token:
                        print("Using token from git config")
            except Exception as e:
                print(f"Could not extract token from git config: {str(e)}")

        # Load tokenizer with token if available
        if hf_token:
            print("Using Hugging Face token for authentication")
            tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=hf_token)

            # Configure the pipeline with token
            pipeline = transformers.pipeline(
                "text-generation",
                model=MODEL_NAME,
                torch_dtype=torch.float16,
                device_map="auto",
                token=hf_token
            )
        else:
            # Try without token (will only work if model is public or user is logged in)
            print("No Hugging Face token found, trying without authentication")
            tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

            pipeline = transformers.pipeline(
                "text-generation",
                model=MODEL_NAME,
                torch_dtype=torch.float16,
                device_map="auto",
            )

        print("Model loaded successfully!")
        return tokenizer, pipeline
    except Exception as e:
        print(f"Error loading model: {str(e)}")
        # Try to provide more helpful error message
        if "gated repo" in str(e) or "401" in str(e):
            print("\nIMPORTANT: CodeLlama is a gated model that requires authentication.")
            print("To use this model, you need to:")
            print("1. Accept the model's license at https://huggingface.co/meta-llama/CodeLlama-7b-hf")
            print("2. Set your Hugging Face token in the Space's settings")
            print("   (Settings > Repository Secrets > Add > HF_TOKEN)")

        # Return None to indicate failure
        return None, None

# Generate code based on the prompt
def generate_code(prompt, max_length=200, temperature=0.1, top_p=0.95, top_k=10):
    try:
        # Check if model is loaded
        if tokenizer is None or pipeline is None:
            return "Error: Model failed to load. Please check the logs."

        # Add a loading message
        start_time = time.time()

        # Generate the code
        sequences = pipeline(
            prompt,
            do_sample=True,
            top_k=top_k,
            temperature=temperature,
            top_p=top_p,
            num_return_sequences=1,
            eos_token_id=tokenizer.eos_token_id,
            max_length=max_length,
        )

        # Calculate generation time
        generation_time = time.time() - start_time

        # Format the result
        result = sequences[0]['generated_text']
        return f"{result}\n\n---\nGeneration time: {generation_time:.2f} seconds"

    except Exception as e:
        return f"Error generating code: {str(e)}"

# Load the model and tokenizer
print("Initializing CodeLlama-7b...")
tokenizer, pipeline = load_model()

# Create the Gradio interface
with gr.Blocks(title="CodeLlama Code Generation") as demo:
    gr.Markdown("# CodeLlama Code Generation")
    gr.Markdown("Enter a code prompt and CodeLlama will complete it for you.")

    # Add a note about authentication if needed
    if tokenizer is None or pipeline is None:
        gr.Markdown("""
        ## ⚠️ Authentication Required

        This demo requires authentication to access the CodeLlama model.

        To use this model, you need to:
        1. Accept the model's license at [meta-llama/CodeLlama-7b-hf](https://huggingface.co/meta-llama/CodeLlama-7b-hf)
        2. Set your Hugging Face token in the Space's settings (Settings > Repository Secrets > Add > HF_TOKEN)

        The demo will show a limited interface until authentication is set up.
        """)

    with gr.Row():
        with gr.Column():
            prompt = gr.Textbox(
                label="Code Prompt",
                placeholder="Enter your code prompt here...",
                lines=5
            )

            with gr.Row():
                max_length = gr.Slider(
                    minimum=50,
                    maximum=500,
                    value=200,
                    step=10,
                    label="Max Length"
                )
                temperature = gr.Slider(
                    minimum=0.1,
                    maximum=1.0,
                    value=0.1,
                    step=0.1,
                    label="Temperature"
                )

            with gr.Row():
                top_p = gr.Slider(
                    minimum=0.5,
                    maximum=1.0,
                    value=0.95,
                    step=0.05,
                    label="Top-p"
                )
                top_k = gr.Slider(
                    minimum=1,
                    maximum=50,
                    value=10,
                    step=1,
                    label="Top-k"
                )

            generate_btn = gr.Button("Generate Code")

        with gr.Column():
            output = gr.Textbox(
                label="Generated Code",
                lines=20
            )

    # Connect the button to the generate function
    generate_btn.click(
        fn=generate_code,
        inputs=[prompt, max_length, temperature, top_p, top_k],
        outputs=output
    )

    # Add examples
    gr.Examples(
        examples=EXAMPLES,
        inputs=prompt
    )

    # Add information about the model
    gr.Markdown("""
    ## About

    This demo uses the CodeLlama-7b model to generate code completions based on your prompts.

    - **Max Length**: Controls the maximum length of the generated text
    - **Temperature**: Controls randomness (lower = more deterministic)
    - **Top-p**: Controls diversity via nucleus sampling
    - **Top-k**: Controls diversity via top-k sampling

    **Note**: CodeLlama is a gated model that requires authentication. If you're seeing authentication errors,
    please follow the instructions at the top of the page.

    Created by DheepLearning
    """)

# Launch the app
demo.launch()