Spaces:

habulaj
/

filtergradio

Sleeping

App Files Files Community

habulaj commited on Jul 8

Commit

7440293

verified ·

1 Parent(s): 9112884

Update app.py

Browse files

Files changed (1) hide show

app.py +191 -115

app.py CHANGED Viewed

@@ -1,128 +1,198 @@
 import gradio as gr
 from peft import AutoPeftModelForCausalLM
-from transformers import AutoTokenizer
 import torch
 import re
 import json
-# Configuração global para usar CPU
-device = "cpu"
-torch.set_default_device(device)
-# Carrega o modelo e tokenizer uma vez no início
-print("Carregando modelo e tokenizer...")
 model = AutoPeftModelForCausalLM.from_pretrained(
-    "habulaj/filterinstruct180",
-    device_map=device,
-    torch_dtype=torch.float32,  # Usa float32 para CPU
-    load_in_4bit=False,  # Desabilita quantização para CPU
 )
-tokenizer = AutoTokenizer.from_pretrained("habulaj/filterinstruct180")
-# Configura o chat template
-tokenizer.chat_template = """{% for message in messages %}
-{%- if message['role'] == 'user' %}
-{%- if loop.first %}
-<|begin_of_text|><|start_header_id|>user<|end_header_id|>
-{{ message['content'] }}<|eot_id|>
-{%- else %}
-<|start_header_id|>user<|end_header_id|>
-{{ message['content'] }}<|eot_id|>
-{%- endif %}
-{%- elif message['role'] == 'assistant' %}
-<|start_header_id|>assistant<|end_header_id|>
-{{ message['content'] }}<|eot_id|>
-{%- endif %}
-{%- endfor %}
-{%- if add_generation_prompt %}
-<|start_header_id|>assistant<|end_header_id|>
-{%- endif %}"""
 def extract_json(text):
-    """Extrai apenas o JSON da resposta"""
-    match = re.search(r'\{.*\}', text, flags=re.DOTALL)
     if match:
-        return match.group(0)
     return text
-def analyze_news(title, content):
-    """Função principal de análise de notícias"""
-    try:
-        # Prepara a mensagem
-        messages = [
-            {
-                "role": "user",
-                "content": f"""Analyze the news title and content, and return the filters in JSON format with the defined fields.
-Please respond ONLY with the JSON filter, do NOT add any explanations, system messages, or extra text.
-Title: "{title}"
-Content: "{content}"
-"""
-            }
-        ]
-        # Aplica o template e tokeniza
-        inputs = tokenizer.apply_chat_template(
-            messages,
-            tokenize=True,
-            add_generation_prompt=True,
-            return_tensors="pt",
         )
-        # Gera a resposta
-        with torch.no_grad():
-            outputs = model.generate(
-                input_ids=inputs,
-                max_new_tokens=200,
-                use_cache=True,
-                temperature=1.0,
-                min_p=0.1,
-                pad_token_id=tokenizer.eos_token_id,
-                do_sample=True,
-            )
-        # Decode input (prompt)
-        prompt_text = tokenizer.decode(inputs[0], skip_special_tokens=False)
-        # Decode output (prompt + resposta)
-        decoded_text = tokenizer.decode(outputs[0], skip_special_tokens=False)
-        # Geração pura (remove o prompt)
-        generated_only = decoded_text[len(prompt_text):].strip()
-        # Extrai só o JSON
-        json_result = extract_json(generated_only)
-        # Tenta validar o JSON
-        try:
-            parsed_json = json.loads(json_result)
-            return json.dumps(parsed_json, indent=2, ensure_ascii=False)
-        except json.JSONDecodeError:
-            return json_result
     except Exception as e:
-        return f"Erro durante a análise: {str(e)}"
 # Interface Gradio
 def create_interface():
     with gr.Blocks(
-        title="Analisador de Notícias",
         theme=gr.themes.Soft(),
         css="""
         .gradio-container {
             max-width: 1200px !important;
         }
         """
     ) as demo:
-        gr.Markdown("# 📰 Analisador de Notícias")
-        gr.Markdown("Insira o título e conteúdo da notícia para obter os filtros em formato JSON.")
         with gr.Row():
             with gr.Column(scale=1):
@@ -141,11 +211,12 @@ def create_interface():
                 analyze_btn = gr.Button("🔍 Analisar Notícia", variant="primary")
                 # Exemplos predefinidos
-                gr.Markdown("### Exemplos:")
-                example_btn1 = gr.Button("📻 Exemplo: Músico", size="sm")
-                example_btn2 = gr.Button("⚽ Exemplo: Esporte", size="sm")
-                example_btn3 = gr.Button("💼 Exemplo: Negócios", size="sm")
             with gr.Column(scale=1):
                 output = gr.Textbox(
@@ -155,23 +226,24 @@ def create_interface():
                     show_copy_button=True
                 )
-                gr.Markdown("### Status:")
                 status = gr.Textbox(
                     label="Status da Análise",
-                    value="Aguardando entrada...",
                     interactive=False
                 )
-        # Função para atualizar status
-        def update_status_and_analyze(title, content):
-            if not title.strip() or not content.strip():
-                return "❌ Por favor, preencha tanto o título quanto o conteúdo.", "Erro: Campos obrigatórios não preenchidos."
-            try:
-                result = analyze_news(title, content)
-                return f"✅ Análise concluída com sucesso!", result
-            except Exception as e:
-                return f"❌ Erro na análise: {str(e)}", f"Erro: {str(e)}"
         # Exemplos predefinidos
         def load_example_1():
@@ -194,7 +266,7 @@ def create_interface():
         # Event handlers
         analyze_btn.click(
-            fn=update_status_and_analyze,
             inputs=[title_input, content_input],
             outputs=[status, output]
         )
@@ -215,24 +287,28 @@ def create_interface():
         )
         # Informações adicionais
-        with gr.Accordion("ℹ️ Informações", open=False):
             gr.Markdown("""
-            **Como usar:**
-            1. Insira o título da notícia
-            2. Insira o conteúdo da notícia
-            3. Clique em "Analisar Notícia"
-            4. O resultado será exibido em formato JSON
-            **Notas:**
-            - O modelo está rodando em CPU
-            - O processamento pode levar alguns segundos
-            - Use os exemplos predefinidos para testar rapidamente
             """)
     return demo
 if __name__ == "__main__":
-    print("Iniciando interface Gradio...")
     demo = create_interface()
     demo.launch(
         share=False,

 import gradio as gr
 from peft import AutoPeftModelForCausalLM
+from transformers import AutoTokenizer, GenerationConfig
 import torch
 import re
 import json
+import time
+import logging
+import os
+import gc
+# -------- CONFIGURAÇÕES DE OTIMIZAÇÃO --------
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+os.environ["OMP_NUM_THREADS"] = "2"
+os.environ["MKL_NUM_THREADS"] = "2"
+torch.set_num_threads(2)
+torch.set_num_interop_threads(1)
+# -------- LOGGING CONFIG --------
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
+log = logging.getLogger("news-filter-gradio")
+# -------- LOAD MODEL --------
+model_name = "habulaj/filterinstruct180"
+log.info("🚀 Carregando modelo e tokenizer...")
+tokenizer = AutoTokenizer.from_pretrained(
+    model_name,
+    use_fast=True,
+    padding_side="left"
+)
+if tokenizer.pad_token is None:
+    tokenizer.pad_token = tokenizer.eos_token
 model = AutoPeftModelForCausalLM.from_pretrained(
+    model_name,
+    device_map="cpu",
+    torch_dtype=torch.bfloat16,
+    low_cpu_mem_usage=True,
+    use_cache=True,
+    trust_remote_code=True
 )
+model.eval()
+log.info("✅ Modelo carregado (eval mode).")
+generation_config = GenerationConfig(
+    max_new_tokens=128,
+    temperature=1.0,
+    do_sample=False,
+    num_beams=1,
+    use_cache=True,
+    eos_token_id=tokenizer.eos_token_id,
+    pad_token_id=tokenizer.eos_token_id,
+    no_repeat_ngram_size=2,
+    repetition_penalty=1.1,
+    length_penalty=1.0
+)
+def build_chat_prompt(title, content):
+    """Constrói o prompt do chat"""
+    return f"""<|begin_of_text|><|start_header_id|>user<|end_header_id|>
+Analyze the news title and content, and return the filters in JSON format with the defined fields.
+Please respond ONLY with the JSON filter, do NOT add any explanations, system messages, or extra text.
+Title: "{title}"
+Content: "{content}"
+<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+"""
 def extract_json(text):
+    """Extrai e limpa o JSON da resposta"""
+    match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', text, re.DOTALL)
     if match:
+        json_text = match.group(0)
+        # Conversões comuns
+        json_text = re.sub(r"'", '"', json_text)
+        json_text = re.sub(r'\bTrue\b', 'true', json_text)
+        json_text = re.sub(r'\bFalse\b', 'false', json_text)
+        json_text = re.sub(r",\s*}", "}", json_text)
+        json_text = re.sub(r",\s*]", "]", json_text)
+        return json_text.strip()
     return text
+def infer_filter(title, content):
+    """Função principal de inferência otimizada"""
+    log.info(f"🧠 Inferência iniciada para: {title}")
+    start_time = time.time()
+    chat_prompt = build_chat_prompt(title, content)
+    inputs = tokenizer(
+        chat_prompt,
+        return_tensors="pt",
+        truncation=True,
+        max_length=512,
+        padding=False,
+        add_special_tokens=False
+    )
+    input_ids = inputs.input_ids
+    attention_mask = inputs.attention_mask
+    with torch.no_grad(), torch.inference_mode():
+        outputs = model.generate(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            generation_config=generation_config,
+            num_return_sequences=1,
+            output_scores=False,
+            return_dict_in_generate=False
         )
+    generated_tokens = outputs[0][len(input_ids[0]):]
+    generated = tokenizer.decode(
+        generated_tokens,
+        skip_special_tokens=True,
+        clean_up_tokenization_spaces=True
+    )
+    log.info("📤 Resultado gerado:")
+    log.info(generated)
+    json_result = extract_json(generated)
+    duration = time.time() - start_time
+    log.info(f"✅ JSON extraído em {duration:.2f}s")
+    # Limpeza de memória
+    del outputs, generated_tokens, inputs
+    gc.collect()
+    return json_result, duration
+def analyze_news(title, content):
+    """Função principal de análise de notícias para Gradio"""
+    try:
+        if not title.strip() or not content.strip():
+            return "❌ Por favor, preencha tanto o título quanto o conteúdo.", "Erro: Campos obrigatórios não preenchidos."
+        json_result, duration = infer_filter(title, content)
+        if json_result:
+            # Tenta validar e formatar o JSON
+            try:
+                parsed_json = json.loads(json_result)
+                formatted_json = json.dumps(parsed_json, indent=2, ensure_ascii=False)
+                status = f"✅ Análise concluída em {duration:.2f}s"
+                return status, formatted_json
+            except json.JSONDecodeError as e:
+                log.error(f"❌ Erro ao parsear JSON: {e}")
+                status = f"⚠️ JSON retornado como string devido a erro de parsing ({duration:.2f}s)"
+                return status, json_result
+        else:
+            return "❌ Não foi possível extrair JSON da resposta do modelo.", "Erro: Falha na extração do JSON."
     except Exception as e:
+        log.exception("❌ Erro inesperado:")
+        return f"❌ Erro durante a análise: {str(e)}", f"Erro: {str(e)}"
+# -------- WARMUP --------
+def warmup_model():
+    """Executa warmup do modelo"""
+    log.info("🔥 Executando warmup...")
+    try:
+        infer_filter("Test title", "Test content")
+        log.info("✅ Warmup concluído.")
+    except Exception as e:
+        log.warning(f"⚠️ Warmup falhou: {e}")
 # Interface Gradio
 def create_interface():
     with gr.Blocks(
+        title="Analisador de Notícias - Otimizado",
         theme=gr.themes.Soft(),
         css="""
         .gradio-container {
             max-width: 1200px !important;
         }
+        .performance-info {
+            background: #f0f9ff;
+            padding: 10px;
+            border-radius: 5px;
+            margin: 10px 0;
+        }
         """
     ) as demo:
+        gr.Markdown("# 📰 Analisador de Notícias - Otimizado")
+        gr.Markdown("Versão otimizada com técnicas de alto desempenho para CPU")
         with gr.Row():
             with gr.Column(scale=1):
                 analyze_btn = gr.Button("🔍 Analisar Notícia", variant="primary")
                 # Exemplos predefinidos
+                gr.Markdown("### Exemplos Rápidos:")
+                with gr.Row():
+                    example_btn1 = gr.Button("📻 Músico", size="sm")
+                    example_btn2 = gr.Button("⚽ Esporte", size="sm")
+                    example_btn3 = gr.Button("💼 Negócios", size="sm")
             with gr.Column(scale=1):
                 output = gr.Textbox(
                     show_copy_button=True
                 )
                 status = gr.Textbox(
                     label="Status da Análise",
+                    value="🟡 Aguardando entrada...",
                     interactive=False
                 )
+                # Informações de performance
+                with gr.Accordion("⚡ Otimizações Aplicadas", open=False):
+                    gr.Markdown("""
+                    **Técnicas de Otimização em CPU:**
+                    - 🧵 Threads limitadas (OMP_NUM_THREADS=2)
+                    - 🚫 Paralelismo de tokenizer desabilitado
+                    - 💾 Uso otimizado de memória (bfloat16)
+                    - 🔄 Cache de modelo ativado
+                    - 🧹 Limpeza automática de memória
+                    - 🎯 Modo de inferência otimizado
+                    - 🔥 Warmup automático do modelo
+                    """)
         # Exemplos predefinidos
         def load_example_1():
         # Event handlers
         analyze_btn.click(
+            fn=analyze_news,
             inputs=[title_input, content_input],
             outputs=[status, output]
         )
         )
         # Informações adicionais
+        with gr.Accordion("ℹ️ Informações Técnicas", open=False):
             gr.Markdown("""
+            **Configuração do Modelo:**
+            - Modelo: `habulaj/filterinstruct180`
+            - Formato: `torch.bfloat16` (otimizado para CPU)
+            - Max tokens: 128
+            - Beam search: Desabilitado (mais rápido)
+            - Cache: Ativado
+            **Performance:**
+            - Threads: 2 (OpenMP + MKL)
+            - Memória: Otimizada com limpeza automática
+            - Warmup: Executado automaticamente
             """)
     return demo
 if __name__ == "__main__":
+    # Executa warmup antes de iniciar a interface
+    warmup_model()
+    print("🚀 Iniciando interface Gradio otimizada...")
     demo = create_interface()
     demo.launch(
         share=False,