Spaces:

habulaj
/

filter

Sleeping

App Files Files Community

habulaj commited on Jul 8

Commit

3988dcb

verified ·

1 Parent(s): 480aadb

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -25

app.py CHANGED Viewed

@@ -7,18 +7,14 @@ from transformers import AutoTokenizer
 from peft import AutoPeftModelForCausalLM
 # -------- LOGGING CONFIG --------
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s [%(levelname)s] %(message)s",
-)
 log = logging.getLogger("news-filter")
-# -------- CARREGAMENTO DE MODELO --------
 model_name = "habulaj/filter"
-log.info("🚀 Iniciando carregamento do modelo e tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-log.info("✅ Tokenizer carregado.")
 model = AutoPeftModelForCausalLM.from_pretrained(
     model_name,
@@ -27,21 +23,22 @@ model = AutoPeftModelForCausalLM.from_pretrained(
     low_cpu_mem_usage=True,
 )
 model.eval()
-log.info("✅ Modelo carregado e em modo eval.")
 try:
     log.info("✅ Modelo compilado com torch.compile.")
 except Exception as e:
-    log.warning(f"⚠️ torch.compile indisponível: {e}")
-# -------- FASTAPI --------
 app = FastAPI(title="News Filter JSON API")
 @app.get("/")
 def read_root():
     return {"message": "News Filter JSON API is running!", "docs": "/docs"}
-# -------- INFERÊNCIA --------
 def infer_filter(title, content):
     prompt = f"""Analyze the news title and content, and return the filters in JSON format with the defined fields.
@@ -51,27 +48,30 @@ Title: "{title}"
 Content: "{content}"
 """
-    log.info(f"🧠 Iniciando inferência para notícia:\n📰 Title: {title}\n📝 Content: {content[:100]}...")
     start_time = time.time()
     inputs = tokenizer(
         prompt,
         return_tensors="pt",
         truncation=True,
         max_length=512,
-        padding=False,
     )
     input_ids = inputs.input_ids.to("cpu")
     with torch.no_grad():
         outputs = model.generate(
             input_ids=input_ids,
             max_new_tokens=100,
             temperature=1.0,
-            do_sample=True,
-            top_p=0.9,
             num_beams=1,
-            early_stopping=True,
             eos_token_id=tokenizer.eos_token_id,
             pad_token_id=tokenizer.eos_token_id,
         )
@@ -79,32 +79,32 @@ Content: "{content}"
     decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
     generated = decoded[len(prompt):].strip()
-    log.info("📤 Resposta bruta decodificada:")
     log.info(generated)
     match = re.search(r"\{.*\}", generated, re.DOTALL)
     if match:
-        json_result = match.group(0)
         duration = time.time() - start_time
-        log.info(f"✅ JSON extraído com sucesso em {duration:.2f}s")
         return json_result
     else:
-        log.warning("⚠️ Não foi possível extrair JSON.")
         return "⚠️ Failed to extract JSON. Output:\n" + generated
-# -------- ENDPOINT --------
 @app.get("/filter")
 def get_filter(
-    title: str = Query(..., description="Title of the news"),
-    content: str = Query(..., description="Content of the news")
 ):
     try:
         json_output = infer_filter(title, content)
         import json
         return json.loads(json_output)
     except json.JSONDecodeError:
-        log.error("❌ Erro ao fazer parse do JSON retornado.")
         return {"raw_output": json_output}
     except Exception as e:
-        log.exception("❌ Erro inesperado durante a inferência:")
         raise HTTPException(status_code=422, detail=str(e))

 from peft import AutoPeftModelForCausalLM
 # -------- LOGGING CONFIG --------
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
 log = logging.getLogger("news-filter")
+# -------- LOAD MODEL --------
 model_name = "habulaj/filter"
+log.info("🚀 Carregando modelo e tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoPeftModelForCausalLM.from_pretrained(
     model_name,
     low_cpu_mem_usage=True,
 )
 model.eval()
+log.info("✅ Modelo carregado (eval mode).")
 try:
+    model = torch.compile(model, mode="reduce-overhead")
     log.info("✅ Modelo compilado com torch.compile.")
 except Exception as e:
+    log.warning(f"⚠️ torch.compile não disponível: {e}")
+# -------- FASTAPI INIT --------
 app = FastAPI(title="News Filter JSON API")
 @app.get("/")
 def read_root():
     return {"message": "News Filter JSON API is running!", "docs": "/docs"}
+# -------- INFERENCE --------
 def infer_filter(title, content):
     prompt = f"""Analyze the news title and content, and return the filters in JSON format with the defined fields.
 Content: "{content}"
 """
+    log.info(f"🧠 Inferência iniciada para: {title}")
     start_time = time.time()
+    # Tokenização + attention mask
     inputs = tokenizer(
         prompt,
         return_tensors="pt",
         truncation=True,
         max_length=512,
+        padding=True,
     )
     input_ids = inputs.input_ids.to("cpu")
+    attention_mask = inputs.attention_mask.to("cpu")
     with torch.no_grad():
         outputs = model.generate(
             input_ids=input_ids,
+            attention_mask=attention_mask,
             max_new_tokens=100,
             temperature=1.0,
+            do_sample=False,  # Greedy decoding
+            top_k=50,         # Razoável para limitar
+            no_repeat_ngram_size=2,
             num_beams=1,
             eos_token_id=tokenizer.eos_token_id,
             pad_token_id=tokenizer.eos_token_id,
         )
     decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
     generated = decoded[len(prompt):].strip()
+    log.info("📤 Resultado gerado:")
     log.info(generated)
     match = re.search(r"\{.*\}", generated, re.DOTALL)
     if match:
         duration = time.time() - start_time
+        json_result = match.group(0)
+        log.info(f"✅ JSON extraído em {duration:.2f}s")
         return json_result
     else:
+        log.warning("⚠️ Falha ao extrair JSON.")
         return "⚠️ Failed to extract JSON. Output:\n" + generated
+# -------- API --------
 @app.get("/filter")
 def get_filter(
+    title: str = Query(..., description="News title"),
+    content: str = Query(..., description="News content")
 ):
     try:
         json_output = infer_filter(title, content)
         import json
         return json.loads(json_output)
     except json.JSONDecodeError:
+        log.error("❌ Erro ao fazer parse do JSON.")
         return {"raw_output": json_output}
     except Exception as e:
+        log.exception("❌ Erro inesperado:")
         raise HTTPException(status_code=422, detail=str(e))