import gradio as gr
from huggingface_hub import InferenceClient
import os
import re

API_TOKEN = os.getenv("HF_TOKEN", None)
MODEL = "Qwen/Qwen3-32B"

try:
    print(f"Initializing Inference Client for model: {MODEL}")
    client = InferenceClient(model=MODEL, token=API_TOKEN) if API_TOKEN else InferenceClient(model=MODEL)
except Exception as e:
    raise gr.Error(f"Failed to initialize model client for {MODEL}. Error: {e}. Check HF_TOKEN and model availability.")

# Parse all ```filename.ext\n<code>``` blocks
def parse_code_blocks(response: str) -> list:
    pattern = r"```([^\n]+)\n(.*?)```"
    blocks = re.findall(pattern, response, re.DOTALL)
    files = []
    for filename, code in blocks:
        lang = None
        if filename.endswith(".py"):
            lang = "python"
        elif filename.endswith(".js"):
            lang = "javascript"
        elif filename.endswith(".html"):
            lang = "html"
        elif filename.endswith(".css"):
            lang = "css"
        files.append({
            "filename": filename.strip(),
            "language": lang,
            "code": code.strip()
        })
    return files

def strip_think_tags(text: str) -> str:
    return re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL)

def extract_thoughts(text: str) -> str:
    matches = re.findall(r"<think>(.*?)</think>", text, flags=re.DOTALL)
    return "\n".join(matches).strip()

system_message = (
    "You are an AI that generates website code using markdown-style code blocks. "
    "Each file should be output as: ```filename.ext\\n<code>\\n``` with no explanation."
)

def generate_code(prompt, backend_choice, max_tokens, temperature, top_p):
    user_prompt = f"USER_PROMPT = {prompt}\nUSER_BACKEND = {backend_choice}"

    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt}
    ]

    full_response = ""
    current_thoughts = ""

    # Reset outputs: code file tabs and thinking box
    yield [], gr.update(visible=True, value="")

    try:
        stream = client.chat_completion(
            messages=messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        )

        for message in stream:
            token = message.choices[0].delta.content
            if isinstance(token, str):
                full_response += token

                # Extract thinking chunks and show them
                thoughts = extract_thoughts(full_response)
                if thoughts != current_thoughts:
                    current_thoughts = thoughts
                    yield gr.update(visible=True, value=current_thoughts)

                cleaned = strip_think_tags(full_response)
                files = parse_code_blocks(cleaned)
                updates = []
                for f in files:
                    updates.append(gr.Code(value=f["code"], label=f["filename"], language=f["language"]))
                yield updates, gr.update(visible=True, value=current_thoughts)

        # Final clean state
        cleaned = strip_think_tags(full_response)
        files = parse_code_blocks(cleaned)
        final_updates = [gr.Code(value=f["code"], label=f["filename"], language=f["language"]) for f in files]
        yield final_updates, gr.update(visible=False)

    except Exception as e:
        print(f"ERROR during code generation stream: {e}")
        yield [gr.Markdown(f"**Error:** {e}")], gr.update(visible=False)

with gr.Blocks(css=".gradio-container { max-width: 90% !important; }") as demo:
    gr.Markdown("# ✨ Website Code Generator ✨")
    gr.Markdown("Describe the website you want. Multiple file tabs will be created automatically.")

    with gr.Row():
        with gr.Column(scale=2):
            prompt_input = gr.Textbox(label="Website Description", lines=6)
            backend_radio = gr.Radio(["Static", "Flask", "Node.js"], label="Backend Context", value="Static")
            generate_button = gr.Button("✨ Generate Website Code", variant="primary")

        with gr.Column(scale=3):
            file_outputs = gr.Group(update=lambda: [])
            thinking_box = gr.Textbox(label="Model is thinking...", visible=False, interactive=False)

    with gr.Accordion("Advanced Settings", open=False):
        max_tokens_slider = gr.Slider(512, 4096, value=3072, step=256, label="Max New Tokens")
        temperature_slider = gr.Slider(0.1, 1.2, value=0.7, step=0.1, label="Temperature")
        top_p_slider = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P")

    generate_button.click(
        fn=generate_code,
        inputs=[prompt_input, backend_radio, max_tokens_slider, temperature_slider, top_p_slider],
        outputs=[file_outputs, thinking_box],
    )

if __name__ == "__main__":
    if not API_TOKEN:
        print("Warning: HF_TOKEN environment variable not set. Using anonymous access.")
    demo.queue().launch()