Spaces:

habulaj
/

filtergradio

Sleeping

App Files Files Community

habulaj commited on Jul 8

Commit

59f6d1a

verified ·

1 Parent(s): 7440293

Update app.py

Browse files

Files changed (1) hide show

app.py +152 -149

app.py CHANGED Viewed

@@ -20,160 +20,159 @@ torch.set_num_interop_threads(1)
 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
 log = logging.getLogger("news-filter-gradio")
-# -------- LOAD MODEL --------
-model_name = "habulaj/filterinstruct180"
 log.info("🚀 Carregando modelo e tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(
-    model_name,
-    use_fast=True,
     padding_side="left"
 )
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token
-model = AutoPeftModelForCausalLM.from_pretrained(
-    model_name,
-    device_map="cpu",
-    torch_dtype=torch.bfloat16,
-    low_cpu_mem_usage=True,
-    use_cache=True,
-    trust_remote_code=True
-)
 model.eval()
 log.info("✅ Modelo carregado (eval mode).")
 generation_config = GenerationConfig(
-    max_new_tokens=128,
-    temperature=1.0,
-    do_sample=False,
-    num_beams=1,
     use_cache=True,
     eos_token_id=tokenizer.eos_token_id,
     pad_token_id=tokenizer.eos_token_id,
-    no_repeat_ngram_size=2,
-    repetition_penalty=1.1,
-    length_penalty=1.0
 )
-def build_chat_prompt(title, content):
-    """Constrói o prompt do chat"""
-    return f"""<|begin_of_text|><|start_header_id|>user<|end_header_id|>
-Analyze the news title and content, and return the filters in JSON format with the defined fields.
 Please respond ONLY with the JSON filter, do NOT add any explanations, system messages, or extra text.
 Title: "{title}"
 Content: "{content}"
-<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 """
-def extract_json(text):
-    """Extrai e limpa o JSON da resposta"""
-    match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', text, re.DOTALL)
-    if match:
-        json_text = match.group(0)
-        # Conversões comuns
-        json_text = re.sub(r"'", '"', json_text)
-        json_text = re.sub(r'\bTrue\b', 'true', json_text)
-        json_text = re.sub(r'\bFalse\b', 'false', json_text)
-        json_text = re.sub(r",\s*}", "}", json_text)
-        json_text = re.sub(r",\s*]", "]", json_text)
-        return json_text.strip()
-    return text
-def infer_filter(title, content):
-    """Função principal de inferência otimizada"""
-    log.info(f"🧠 Inferência iniciada para: {title}")
-    start_time = time.time()
-    chat_prompt = build_chat_prompt(title, content)
-    inputs = tokenizer(
-        chat_prompt,
-        return_tensors="pt",
-        truncation=True,
-        max_length=512,
-        padding=False,
-        add_special_tokens=False
-    )
-    input_ids = inputs.input_ids
-    attention_mask = inputs.attention_mask
-    with torch.no_grad(), torch.inference_mode():
-        outputs = model.generate(
-            input_ids=input_ids,
-            attention_mask=attention_mask,
-            generation_config=generation_config,
-            num_return_sequences=1,
-            output_scores=False,
-            return_dict_in_generate=False
         )
-    generated_tokens = outputs[0][len(input_ids[0]):]
-    generated = tokenizer.decode(
-        generated_tokens,
-        skip_special_tokens=True,
-        clean_up_tokenization_spaces=True
-    )
-    log.info("📤 Resultado gerado:")
-    log.info(generated)
-    json_result = extract_json(generated)
-    duration = time.time() - start_time
-    log.info(f"✅ JSON extraído em {duration:.2f}s")
-    # Limpeza de memória
-    del outputs, generated_tokens, inputs
-    gc.collect()
-    return json_result, duration
-def analyze_news(title, content):
-    """Função principal de análise de notícias para Gradio"""
-    try:
-        if not title.strip() or not content.strip():
-            return "❌ Por favor, preencha tanto o título quanto o conteúdo.", "Erro: Campos obrigatórios não preenchidos."
-        json_result, duration = infer_filter(title, content)
-        if json_result:
-            # Tenta validar e formatar o JSON
-            try:
-                parsed_json = json.loads(json_result)
-                formatted_json = json.dumps(parsed_json, indent=2, ensure_ascii=False)
-                status = f"✅ Análise concluída em {duration:.2f}s"
-                return status, formatted_json
-            except json.JSONDecodeError as e:
-                log.error(f"❌ Erro ao parsear JSON: {e}")
-                status = f"⚠️ JSON retornado como string devido a erro de parsing ({duration:.2f}s)"
-                return status, json_result
-        else:
-            return "❌ Não foi possível extrair JSON da resposta do modelo.", "Erro: Falha na extração do JSON."
     except Exception as e:
         log.exception("❌ Erro inesperado:")
-        return f"❌ Erro durante a análise: {str(e)}", f"Erro: {str(e)}"
 # -------- WARMUP --------
 def warmup_model():
     """Executa warmup do modelo"""
     log.info("🔥 Executando warmup...")
     try:
-        infer_filter("Test title", "Test content")
         log.info("✅ Warmup concluído.")
     except Exception as e:
         log.warning(f"⚠️ Warmup falhou: {e}")
-# Interface Gradio
 def create_interface():
     with gr.Blocks(
         title="Analisador de Notícias - Otimizado",
@@ -191,8 +190,8 @@ def create_interface():
         """
     ) as demo:
-        gr.Markdown("# 📰 Analisador de Notícias - Otimizado")
-        gr.Markdown("Versão otimizada com técnicas de alto desempenho para CPU")
         with gr.Row():
             with gr.Column(scale=1):
@@ -211,12 +210,11 @@ def create_interface():
                 analyze_btn = gr.Button("🔍 Analisar Notícia", variant="primary")
                 # Exemplos predefinidos
-                gr.Markdown("### Exemplos Rápidos:")
-                with gr.Row():
-                    example_btn1 = gr.Button("📻 Músico", size="sm")
-                    example_btn2 = gr.Button("⚽ Esporte", size="sm")
-                    example_btn3 = gr.Button("💼 Negócios", size="sm")
             with gr.Column(scale=1):
                 output = gr.Textbox(
@@ -226,26 +224,25 @@ def create_interface():
                     show_copy_button=True
                 )
                 status = gr.Textbox(
                     label="Status da Análise",
-                    value="🟡 Aguardando entrada...",
                     interactive=False
                 )
-                # Informações de performance
-                with gr.Accordion("⚡ Otimizações Aplicadas", open=False):
-                    gr.Markdown("""
-                    **Técnicas de Otimização em CPU:**
-                    - 🧵 Threads limitadas (OMP_NUM_THREADS=2)
-                    - 🚫 Paralelismo de tokenizer desabilitado
-                    - 💾 Uso otimizado de memória (bfloat16)
-                    - 🔄 Cache de modelo ativado
-                    - 🧹 Limpeza automática de memória
-                    - 🎯 Modo de inferência otimizado
-                    - 🔥 Warmup automático do modelo
-                    """)
-        # Exemplos predefinidos
         def load_example_1():
             return (
                 "Legendary Musician Carlos Mendes Dies at 78",
@@ -264,9 +261,9 @@ def create_interface():
                 "The technology company announced significant workforce reductions citing economic uncertainty and changing market conditions. The layoffs will affect multiple departments across different regions."
             )
-        # Event handlers
         analyze_btn.click(
-            fn=analyze_news,
             inputs=[title_input, content_input],
             outputs=[status, output]
         )
@@ -287,19 +284,25 @@ def create_interface():
         )
         # Informações adicionais
-        with gr.Accordion("ℹ️ Informações Técnicas", open=False):
             gr.Markdown("""
-            **Configuração do Modelo:**
-            - Modelo: `habulaj/filterinstruct180`
-            - Formato: `torch.bfloat16` (otimizado para CPU)
-            - Max tokens: 128
-            - Beam search: Desabilitado (mais rápido)
-            - Cache: Ativado
-            **Performance:**
-            - Threads: 2 (OpenMP + MKL)
-            - Memória: Otimizada com limpeza automática
-            - Warmup: Executado automaticamente
             """)
     return demo
@@ -308,7 +311,7 @@ if __name__ == "__main__":
     # Executa warmup antes de iniciar a interface
     warmup_model()
-    print("🚀 Iniciando interface Gradio otimizada...")
     demo = create_interface()
     demo.launch(
         share=False,

 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
 log = logging.getLogger("news-filter-gradio")
+# Configuração global para usar CPU
+device = "cpu"
+torch.set_default_device(device)
+# Carrega o modelo e tokenizer uma vez no início
+print("🚀 Carregando modelo e tokenizer...")
 log.info("🚀 Carregando modelo e tokenizer...")
+model = AutoPeftModelForCausalLM.from_pretrained(
+    "habulaj/filterinstruct180",
+    device_map=device,
+    torch_dtype=torch.bfloat16,  # Otimização: bfloat16 em vez de float32
+    load_in_4bit=False,
+    low_cpu_mem_usage=True,  # Otimização: reduz uso de memória
+    use_cache=True,  # Otimização: ativa cache
+    trust_remote_code=True
+)
 tokenizer = AutoTokenizer.from_pretrained(
+    "habulaj/filterinstruct180",
+    use_fast=True,  # Otimização: tokenizer rápido
     padding_side="left"
 )
+# Otimização: configuração do pad_token
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token
+# Otimização: modo de avaliação
 model.eval()
 log.info("✅ Modelo carregado (eval mode).")
+# Configura o chat template (mantém o original)
+tokenizer.chat_template = """{% for message in messages %}
+{%- if message['role'] == 'user' %}
+{%- if loop.first %}
+<|begin_of_text|><|start_header_id|>user<|end_header_id|>
+{{ message['content'] }}<|eot_id|>
+{%- else %}
+<|start_header_id|>user<|end_header_id|>
+{{ message['content'] }}<|eot_id|>
+{%- endif %}
+{%- elif message['role'] == 'assistant' %}
+<|start_header_id|>assistant<|end_header_id|>
+{{ message['content'] }}<|eot_id|>
+{%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+<|start_header_id|>assistant<|end_header_id|>
+{%- endif %}"""
+# Otimização: GenerationConfig pré-configurado
 generation_config = GenerationConfig(
+    max_new_tokens=200,  # Mantém o valor original
+    temperature=1.0,  # Mantém o valor original
+    min_p=0.1,  # Mantém o valor original
+    do_sample=True,  # Mantém o valor original
     use_cache=True,
     eos_token_id=tokenizer.eos_token_id,
     pad_token_id=tokenizer.eos_token_id,
 )
+def extract_json(text):
+    """Extrai apenas o JSON da resposta (mantém a função original)"""
+    match = re.search(r'\{.*\}', text, flags=re.DOTALL)
+    if match:
+        return match.group(0)
+    return text
+def analyze_news(title, content):
+    """Função principal de análise de notícias (mantém a lógica original com otimizações)"""
+    try:
+        log.info(f"🧠 Inferência iniciada para: {title}")
+        start_time = time.time()
+        # Prepara a mensagem (mantém o sistema original)
+        messages = [
+            {
+                "role": "user",
+                "content": f"""Analyze the news title and content, and return the filters in JSON format with the defined fields.
 Please respond ONLY with the JSON filter, do NOT add any explanations, system messages, or extra text.
 Title: "{title}"
 Content: "{content}"
 """
+            }
+        ]
+        # Aplica o template e tokeniza (mantém o sistema original)
+        inputs = tokenizer.apply_chat_template(
+            messages,
+            tokenize=True,
+            add_generation_prompt=True,
+            return_tensors="pt",
         )
+        # Otimização: torch.inference_mode() e sem gradiente
+        with torch.no_grad(), torch.inference_mode():
+            outputs = model.generate(
+                input_ids=inputs,
+                generation_config=generation_config,  # Otimização: usa config pré-definido
+                num_return_sequences=1,
+                output_scores=False,
+                return_dict_in_generate=False
+            )
+        # Decode input (prompt) - mantém original
+        prompt_text = tokenizer.decode(inputs[0], skip_special_tokens=False)
+        # Decode output (prompt + resposta) - mantém original
+        decoded_text = tokenizer.decode(outputs[0], skip_special_tokens=False)
+        # Geração pura (remove o prompt) - mantém original
+        generated_only = decoded_text[len(prompt_text):].strip()
+        # Extrai só o JSON - mantém original
+        json_result = extract_json(generated_only)
+        # Otimização: logging de performance
+        duration = time.time() - start_time
+        log.info(f"✅ JSON extraído em {duration:.2f}s")
+        # Otimização: limpeza de memória
+        del outputs, inputs
+        gc.collect()
+        # Tenta validar o JSON - mantém original
+        try:
+            parsed_json = json.loads(json_result)
+            return json.dumps(parsed_json, indent=2, ensure_ascii=False)
+        except json.JSONDecodeError:
+            return json_result
     except Exception as e:
         log.exception("❌ Erro inesperado:")
+        return f"Erro durante a análise: {str(e)}"
 # -------- WARMUP --------
 def warmup_model():
     """Executa warmup do modelo"""
     log.info("🔥 Executando warmup...")
     try:
+        analyze_news("Test title", "Test content")
         log.info("✅ Warmup concluído.")
     except Exception as e:
         log.warning(f"⚠️ Warmup falhou: {e}")
+# Interface Gradio (mantém a interface original)
 def create_interface():
     with gr.Blocks(
         title="Analisador de Notícias - Otimizado",
         """
     ) as demo:
+        gr.Markdown("# 📰 Analisador de Notícias")
+        gr.Markdown("Insira o título e conteúdo da notícia para obter os filtros em formato JSON.")
         with gr.Row():
             with gr.Column(scale=1):
                 analyze_btn = gr.Button("🔍 Analisar Notícia", variant="primary")
                 # Exemplos predefinidos
+                gr.Markdown("### Exemplos:")
+                example_btn1 = gr.Button("📻 Exemplo: Músico", size="sm")
+                example_btn2 = gr.Button("⚽ Exemplo: Esporte", size="sm")
+                example_btn3 = gr.Button("💼 Exemplo: Negócios", size="sm")
             with gr.Column(scale=1):
                 output = gr.Textbox(
                     show_copy_button=True
                 )
+                gr.Markdown("### Status:")
                 status = gr.Textbox(
                     label="Status da Análise",
+                    value="Aguardando entrada...",
                     interactive=False
                 )
+        # Função para atualizar status (mantém original)
+        def update_status_and_analyze(title, content):
+            if not title.strip() or not content.strip():
+                return "❌ Por favor, preencha tanto o título quanto o conteúdo.", "Erro: Campos obrigatórios não preenchidos."
+            try:
+                result = analyze_news(title, content)
+                return f"✅ Análise concluída com sucesso!", result
+            except Exception as e:
+                return f"❌ Erro na análise: {str(e)}", f"Erro: {str(e)}"
+        # Exemplos predefinidos (mantém originais)
         def load_example_1():
             return (
                 "Legendary Musician Carlos Mendes Dies at 78",
                 "The technology company announced significant workforce reductions citing economic uncertainty and changing market conditions. The layoffs will affect multiple departments across different regions."
             )
+        # Event handlers (mantém originais)
         analyze_btn.click(
+            fn=update_status_and_analyze,
             inputs=[title_input, content_input],
             outputs=[status, output]
         )
         )
         # Informações adicionais
+        with gr.Accordion("ℹ️ Informações", open=False):
             gr.Markdown("""
+            **Como usar:**
+            1. Insira o título da notícia
+            2. Insira o conteúdo da notícia
+            3. Clique em "Analisar Notícia"
+            4. O resultado será exibido em formato JSON
+            **Otimizações aplicadas:**
+            - Threads otimizadas para CPU
+            - Modo de inferência acelerado
+            - Limpeza automática de memória
+            - Cache de modelo ativado
+            - Warmup automático
+            **Notas:**
+            - O modelo está rodando em CPU
+            - O processamento pode levar alguns segundos
+            - Use os exemplos predefinidos para testar rapidamente
             """)
     return demo
     # Executa warmup antes de iniciar a interface
     warmup_model()
+    print("🚀 Iniciando interface Gradio...")
     demo = create_interface()
     demo.launch(
         share=False,