filtergradio / app.py
habulaj's picture
Update app.py
1950ea9 verified
import gradio as gr
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer, GenerationConfig
import torch
import re
import json
import time
import logging
import os
import gc
# Configurações de otimização
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["OMP_NUM_THREADS"] = "2"
os.environ["MKL_NUM_THREADS"] = "2"
torch.set_num_threads(2)
torch.set_num_interop_threads(1)
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
log = logging.getLogger("news-filter-gradio")
device = "cpu"
torch.set_default_device(device)
# Carrega modelo e tokenizer
print("🚀 Carregando modelo e tokenizer...")
log.info("🚀 Carregando modelo e tokenizer...")
model = AutoPeftModelForCausalLM.from_pretrained(
"habulaj/filterinstruct2",
device_map=device,
torch_dtype=torch.bfloat16,
load_in_4bit=False,
low_cpu_mem_usage=True,
use_cache=True,
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(
"habulaj/filterinstruct2",
use_fast=True,
padding_side="left"
)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
model.eval()
log.info("✅ Modelo carregado (eval mode).")
tokenizer.chat_template = """{% for message in messages %}
{%- if message['role'] == 'user' %}
{%- if loop.first %}
<|begin_of_text|><|start_header_id|>user<|end_header_id|>
{{ message['content'] }}<|eot_id|>
{%- else %}
<|start_header_id|>user<|end_header_id|>
{{ message['content'] }}<|eot_id|>
{%- endif %}
{%- elif message['role'] == 'assistant' %}
<|start_header_id|>assistant<|end_header_id|>
{{ message['content'] }}<|eot_id|>
{%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
<|start_header_id|>assistant<|end_header_id|>
{%- endif %}"""
generation_config = GenerationConfig(
max_new_tokens=200,
temperature=1.0,
min_p=0.1,
do_sample=True,
use_cache=True,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id,
)
def extract_json(text):
match = re.search(r'\{.*\}', text, flags=re.DOTALL)
if match:
return match.group(0)
return text
def analyze_news(title, content):
try:
log.info(f"🧠 Inferência iniciada para: {title}")
start_time = time.time()
messages = [
{
"role": "user",
"content": f"""Analyze the news title and content, and return the filters in JSON format with the defined fields.
Please respond ONLY with the JSON filter, do NOT add any explanations, system messages, or extra text.
Title: "{title}"
Content: "{content}"
"""
}
]
inputs = tokenizer.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_tensors="pt",
)
with torch.no_grad(), torch.inference_mode():
outputs = model.generate(
input_ids=inputs,
generation_config=generation_config,
num_return_sequences=1,
output_scores=False,
return_dict_in_generate=False
)
prompt_text = tokenizer.decode(inputs[0], skip_special_tokens=False)
decoded_text = tokenizer.decode(outputs[0], skip_special_tokens=False)
generated_only = decoded_text[len(prompt_text):].strip()
json_result = extract_json(generated_only)
duration = time.time() - start_time
log.info(f"✅ JSON extraído em {duration:.2f}s")
del outputs, inputs
gc.collect()
try:
parsed_json = json.loads(json_result)
return json.dumps(parsed_json, indent=2, ensure_ascii=False)
except json.JSONDecodeError:
return json_result
except Exception as e:
log.exception("❌ Erro inesperado:")
return f"Erro durante a análise: {str(e)}"
def warmup_model():
log.info("🔥 Executando warmup...")
try:
analyze_news("Test title", "Test content")
log.info("✅ Warmup concluído.")
except Exception as e:
log.warning(f"⚠️ Warmup falhou: {e}")
def create_interface():
with gr.Blocks(title="Analisador de Notícias", theme=gr.themes.Soft()) as demo:
gr.Markdown("# 📰 Analisador de Notícias")
with gr.Row():
with gr.Column(scale=1):
title_input = gr.Textbox(
label="Título da Notícia",
placeholder="Digite o título da notícia...",
lines=2
)
content_input = gr.Textbox(
label="Conteúdo da Notícia",
placeholder="Digite o conteúdo da notícia...",
lines=6
)
analyze_btn = gr.Button("🔍 Analisar Notícia", variant="primary")
with gr.Column(scale=1):
output = gr.Textbox(
label="Resultado JSON",
lines=15,
max_lines=20,
show_copy_button=True
)
status = gr.Textbox(
label="Status",
value="Aguardando entrada...",
interactive=False
)
def update_status_and_analyze(title, content):
if not title.strip() or not content.strip():
return "❌ Preencha título e conteúdo.", "Erro: Campos obrigatórios."
try:
result = analyze_news(title, content)
return f"✅ Análise concluída!", result
except Exception as e:
return f"❌ Erro: {str(e)}", f"Erro: {str(e)}"
analyze_btn.click(
fn=update_status_and_analyze,
inputs=[title_input, content_input],
outputs=[status, output]
)
return demo
if __name__ == "__main__":
warmup_model()
print("🚀 Iniciando interface Gradio...")
demo = create_interface()
demo.launch(
share=False,
server_name="0.0.0.0",
server_port=7860,
show_error=True
)