import gradio as gr
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer, GenerationConfig
import torch
import re
import json
import time
import logging
import os
import gc

# Configurações de otimização
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["OMP_NUM_THREADS"] = "2"
os.environ["MKL_NUM_THREADS"] = "2"
torch.set_num_threads(2)
torch.set_num_interop_threads(1)

logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
log = logging.getLogger("news-filter-gradio")

device = "cpu"
torch.set_default_device(device)

# Carrega modelo e tokenizer
print("🚀 Carregando modelo e tokenizer...")
log.info("🚀 Carregando modelo e tokenizer...")

model = AutoPeftModelForCausalLM.from_pretrained(
    "habulaj/filterinstruct2",
    device_map=device,
    torch_dtype=torch.bfloat16,
    load_in_4bit=False,
    low_cpu_mem_usage=True,
    use_cache=True,
    trust_remote_code=True
)

tokenizer = AutoTokenizer.from_pretrained(
    "habulaj/filterinstruct2",
    use_fast=True,
    padding_side="left"
)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model.eval()
log.info("✅ Modelo carregado (eval mode).")

tokenizer.chat_template = """{% for message in messages %}
{%- if message['role'] == 'user' %}
{%- if loop.first %}
<|begin_of_text|><|start_header_id|>user<|end_header_id|>

{{ message['content'] }}<|eot_id|>
{%- else %}
<|start_header_id|>user<|end_header_id|>

{{ message['content'] }}<|eot_id|>
{%- endif %}
{%- elif message['role'] == 'assistant' %}
<|start_header_id|>assistant<|end_header_id|>

{{ message['content'] }}<|eot_id|>
{%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
<|start_header_id|>assistant<|end_header_id|>

{%- endif %}"""

generation_config = GenerationConfig(
    max_new_tokens=200,
    temperature=1.0,
    min_p=0.1,
    do_sample=True,
    use_cache=True,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
)

def extract_json(text):
    match = re.search(r'\{.*\}', text, flags=re.DOTALL)
    if match:
        return match.group(0)
    return text

def analyze_news(title, content):
    try:
        log.info(f"🧠 Inferência iniciada para: {title}")
        start_time = time.time()
        
        messages = [
            {
                "role": "user",
                "content": f"""Analyze the news title and content, and return the filters in JSON format with the defined fields.
                
Please respond ONLY with the JSON filter, do NOT add any explanations, system messages, or extra text.

Title: "{title}"
Content: "{content}"
"""
            }
        ]
        
        inputs = tokenizer.apply_chat_template(
            messages,
            tokenize=True,
            add_generation_prompt=True,
            return_tensors="pt",
        )
        
        with torch.no_grad(), torch.inference_mode():
            outputs = model.generate(
                input_ids=inputs,
                generation_config=generation_config,
                num_return_sequences=1,
                output_scores=False,
                return_dict_in_generate=False
            )
        
        prompt_text = tokenizer.decode(inputs[0], skip_special_tokens=False)
        decoded_text = tokenizer.decode(outputs[0], skip_special_tokens=False)
        generated_only = decoded_text[len(prompt_text):].strip()
        json_result = extract_json(generated_only)
        
        duration = time.time() - start_time
        log.info(f"✅ JSON extraído em {duration:.2f}s")
        
        del outputs, inputs
        gc.collect()
        
        try:
            parsed_json = json.loads(json_result)
            return json.dumps(parsed_json, indent=2, ensure_ascii=False)
        except json.JSONDecodeError:
            return json_result
            
    except Exception as e:
        log.exception("❌ Erro inesperado:")
        return f"Erro durante a análise: {str(e)}"

def warmup_model():
    log.info("🔥 Executando warmup...")
    try:
        analyze_news("Test title", "Test content")
        log.info("✅ Warmup concluído.")
    except Exception as e:
        log.warning(f"⚠️ Warmup falhou: {e}")

def create_interface():
    with gr.Blocks(title="Analisador de Notícias", theme=gr.themes.Soft()) as demo:
        
        gr.Markdown("# 📰 Analisador de Notícias")
        
        with gr.Row():
            with gr.Column(scale=1):
                title_input = gr.Textbox(
                    label="Título da Notícia",
                    placeholder="Digite o título da notícia...",
                    lines=2
                )
                
                content_input = gr.Textbox(
                    label="Conteúdo da Notícia",
                    placeholder="Digite o conteúdo da notícia...",
                    lines=6
                )
                
                analyze_btn = gr.Button("🔍 Analisar Notícia", variant="primary")
            
            with gr.Column(scale=1):
                output = gr.Textbox(
                    label="Resultado JSON",
                    lines=15,
                    max_lines=20,
                    show_copy_button=True
                )
                
                status = gr.Textbox(
                    label="Status",
                    value="Aguardando entrada...",
                    interactive=False
                )
        
        def update_status_and_analyze(title, content):
            if not title.strip() or not content.strip():
                return "❌ Preencha título e conteúdo.", "Erro: Campos obrigatórios."
            
            try:
                result = analyze_news(title, content)
                return f"✅ Análise concluída!", result
            except Exception as e:
                return f"❌ Erro: {str(e)}", f"Erro: {str(e)}"
        
        analyze_btn.click(
            fn=update_status_and_analyze,
            inputs=[title_input, content_input],
            outputs=[status, output]
        )
    
    return demo

if __name__ == "__main__":
    warmup_model()
    print("🚀 Iniciando interface Gradio...")
    demo = create_interface()
    demo.launch(
        share=False,
        server_name="0.0.0.0",
        server_port=7860,
        show_error=True
    )