Spaces:

habulaj
/

filter

Sleeping

App Files Files Community

habulaj commited on Jul 8

Commit

a15c41a

verified ·

1 Parent(s): 2560f7d

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -30

app.py CHANGED Viewed

@@ -1,39 +1,48 @@
 from fastapi import FastAPI, Query, HTTPException
 import torch
 import re
 from transformers import AutoTokenizer
 from peft import AutoPeftModelForCausalLM
-# Carrega modelo e tokenizer da Hugging Face - LoRA fine-tuned
 model_name = "habulaj/filter"
-print("Carregando tokenizer e modelo (CPU)...")
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-# Otimizações de performance
 model = AutoPeftModelForCausalLM.from_pretrained(
     model_name,
     device_map="cpu",
-    torch_dtype=torch.float32,  # float32 é mais rápido em CPU
-    low_cpu_mem_usage=True,     # Reduz uso de memória
 )
 model.eval()
-# Compilação do modelo para otimizar (PyTorch 2.0+)
 try:
     model = torch.compile(model, mode="reduce-overhead")
-    print("✅ Modelo compilado com torch.compile")
 except Exception as e:
-    print(f"⚠️ torch.compile não disponível: {e}")
 # -------- FASTAPI --------
 app = FastAPI(title="News Filter JSON API")
-# -------- ROOT ENDPOINT --------
 @app.get("/")
 def read_root():
     return {"message": "News Filter JSON API is running!", "docs": "/docs"}
-# Função para inferência otimizada
 def infer_filter(title, content):
     prompt = f"""Analyze the news title and content, and return the filters in JSON format with the defined fields.
@@ -43,44 +52,48 @@ Title: "{title}"
 Content: "{content}"
 """
-    # Otimizações de tokenização
     inputs = tokenizer(
-        prompt,
         return_tensors="pt",
         truncation=True,
-        max_length=512,  # Limita tamanho do input
-        padding=False    # Não faz padding desnecessário
     )
     input_ids = inputs.input_ids.to("cpu")
     with torch.no_grad():
-        # Configurações otimizadas para velocidade
         outputs = model.generate(
             input_ids=input_ids,
-            max_new_tokens=100,      # Reduzido de 128 para 100
-            temperature=1.0,         # Reduzido para ser mais determinístico
             do_sample=True,
             top_p=0.9,
-            num_beams=1,            # Beam search = 1 (greedy) é mais rápido
-            early_stopping=True,    # Para quando encontrar EOS
             eos_token_id=tokenizer.eos_token_id,
             pad_token_id=tokenizer.eos_token_id,
         )
     decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Remove prompt do output
     generated = decoded[len(prompt):].strip()
-    # Extrai JSON
     match = re.search(r"\{.*\}", generated, re.DOTALL)
     if match:
-        result = match.group(0)
-        return result
     else:
         return "⚠️ Failed to extract JSON. Output:\n" + generated
-# -------- API ROUTE --------
 @app.get("/filter")
 def get_filter(
     title: str = Query(..., description="Title of the news"),
@@ -89,10 +102,10 @@ def get_filter(
     try:
         json_output = infer_filter(title, content)
         import json
-        # Retorna como dados brutos (parse do JSON)
         return json.loads(json_output)
     except json.JSONDecodeError:
-        # Se não conseguir fazer parse, retorna como string
         return {"raw_output": json_output}
     except Exception as e:
         raise HTTPException(status_code=422, detail=str(e))

 from fastapi import FastAPI, Query, HTTPException
 import torch
 import re
+import time
+import logging
 from transformers import AutoTokenizer
 from peft import AutoPeftModelForCausalLM
+# -------- LOGGING CONFIG --------
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+)
+log = logging.getLogger("news-filter")
+# -------- CARREGAMENTO DE MODELO --------
 model_name = "habulaj/filter"
+log.info("🚀 Iniciando carregamento do modelo e tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+log.info("✅ Tokenizer carregado.")
 model = AutoPeftModelForCausalLM.from_pretrained(
     model_name,
     device_map="cpu",
+    torch_dtype=torch.float32,
+    low_cpu_mem_usage=True,
 )
 model.eval()
+log.info("✅ Modelo carregado e em modo eval.")
 try:
     model = torch.compile(model, mode="reduce-overhead")
+    log.info("✅ Modelo compilado com torch.compile.")
 except Exception as e:
+    log.warning(f"⚠️ torch.compile indisponível: {e}")
 # -------- FASTAPI --------
 app = FastAPI(title="News Filter JSON API")
 @app.get("/")
 def read_root():
     return {"message": "News Filter JSON API is running!", "docs": "/docs"}
+# -------- INFERÊNCIA --------
 def infer_filter(title, content):
     prompt = f"""Analyze the news title and content, and return the filters in JSON format with the defined fields.
 Content: "{content}"
 """
+    log.info(f"🧠 Iniciando inferência para notícia:\n📰 Title: {title}\n📝 Content: {content[:100]}...")
+    start_time = time.time()
     inputs = tokenizer(
+        prompt,
         return_tensors="pt",
         truncation=True,
+        max_length=512,
+        padding=False,
     )
     input_ids = inputs.input_ids.to("cpu")
     with torch.no_grad():
         outputs = model.generate(
             input_ids=input_ids,
+            max_new_tokens=100,
+            temperature=1.0,
             do_sample=True,
             top_p=0.9,
+            num_beams=1,
+            early_stopping=True,
             eos_token_id=tokenizer.eos_token_id,
             pad_token_id=tokenizer.eos_token_id,
         )
     decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
     generated = decoded[len(prompt):].strip()
+    log.info("📤 Resposta bruta decodificada:")
+    log.info(generated)
     match = re.search(r"\{.*\}", generated, re.DOTALL)
     if match:
+        json_result = match.group(0)
+        duration = time.time() - start_time
+        log.info(f"✅ JSON extraído com sucesso em {duration:.2f}s")
+        return json_result
     else:
+        log.warning("⚠️ Não foi possível extrair JSON.")
         return "⚠️ Failed to extract JSON. Output:\n" + generated
+# -------- ENDPOINT --------
 @app.get("/filter")
 def get_filter(
     title: str = Query(..., description="Title of the news"),
     try:
         json_output = infer_filter(title, content)
         import json
         return json.loads(json_output)
     except json.JSONDecodeError:
+        log.error("❌ Erro ao fazer parse do JSON retornado.")
         return {"raw_output": json_output}
     except Exception as e:
+        log.exception("❌ Erro inesperado durante a inferência:")
         raise HTTPException(status_code=422, detail=str(e))