Spaces:

habulaj
/

filter

Sleeping

App Files Files Community

habulaj commited on Jul 8

Commit

3e43d33

verified ·

1 Parent(s): 07f0d54

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -12

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ import torch
 import re
 from transformers import AutoTokenizer
 from peft import AutoPeftModelForCausalLM
-import json
 # Carrega modelo e tokenizer da Hugging Face - LoRA fine-tuned
 model_name = "habulaj/filter"
@@ -26,15 +25,24 @@ try:
 except Exception as e:
     print(f"⚠️ torch.compile não disponível: {e}")
 # -------- FASTAPI --------
 app = FastAPI(title="News Filter JSON API")
 @app.get("/")
 def read_root():
     return {"message": "News Filter JSON API is running!", "docs": "/docs"}
 # Função para inferência otimizada
 def infer_filter(title, content):
     prompt = f"""Analyze the news title and content, and return the filters in JSON format with the defined fields.
 Please respond ONLY with the JSON filter, do NOT add any explanations, system messages, or extra text.
@@ -43,37 +51,47 @@ Title: "{title}"
 Content: "{content}"
 """
     inputs = tokenizer(
-        prompt,
         return_tensors="pt",
         truncation=True,
-        max_length=512,
-        padding=False
     )
     input_ids = inputs.input_ids.to("cpu")
     with torch.no_grad():
         outputs = model.generate(
             input_ids=input_ids,
-            max_new_tokens=100,
-            temperature=1.0,
             do_sample=True,
             top_p=0.9,
-            num_beams=1,
-            early_stopping=True,
             eos_token_id=tokenizer.eos_token_id,
             pad_token_id=tokenizer.eos_token_id,
         )
     decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
     generated = decoded[len(prompt):].strip()
     match = re.search(r"\{.*\}", generated, re.DOTALL)
     if match:
-        return match.group(0)
     else:
         return "⚠️ Failed to extract JSON. Output:\n" + generated
 @app.get("/filter")
 def get_filter(
     title: str = Query(..., description="Title of the news"),
@@ -81,8 +99,11 @@ def get_filter(
 ):
     try:
         json_output = infer_filter(title, content)
         return json.loads(json_output)
     except json.JSONDecodeError:
         return {"raw_output": json_output}
     except Exception as e:
         raise HTTPException(status_code=422, detail=str(e))

 import re
 from transformers import AutoTokenizer
 from peft import AutoPeftModelForCausalLM
 # Carrega modelo e tokenizer da Hugging Face - LoRA fine-tuned
 model_name = "habulaj/filter"
 except Exception as e:
     print(f"⚠️ torch.compile não disponível: {e}")
+# Cache para prompts similares
+prompt_cache = {}
 # -------- FASTAPI --------
 app = FastAPI(title="News Filter JSON API")
+# -------- ROOT ENDPOINT --------
 @app.get("/")
 def read_root():
     return {"message": "News Filter JSON API is running!", "docs": "/docs"}
 # Função para inferência otimizada
 def infer_filter(title, content):
+    # Cache key simples
+    cache_key = hash((title[:50], content[:100]))
+    if cache_key in prompt_cache:
+        return prompt_cache[cache_key]
     prompt = f"""Analyze the news title and content, and return the filters in JSON format with the defined fields.
 Please respond ONLY with the JSON filter, do NOT add any explanations, system messages, or extra text.
 Content: "{content}"
 """
+    # Otimizações de tokenização
     inputs = tokenizer(
+        prompt,
         return_tensors="pt",
         truncation=True,
+        max_length=512,  # Limita tamanho do input
+        padding=False    # Não faz padding desnecessário
     )
     input_ids = inputs.input_ids.to("cpu")
     with torch.no_grad():
+        # Configurações otimizadas para velocidade
         outputs = model.generate(
             input_ids=input_ids,
+            max_new_tokens=100,      # Reduzido de 128 para 100
+            temperature=1.0,         # Reduzido para ser mais determinístico
             do_sample=True,
             top_p=0.9,
+            num_beams=1,            # Beam search = 1 (greedy) é mais rápido
+            early_stopping=True,    # Para quando encontrar EOS
             eos_token_id=tokenizer.eos_token_id,
             pad_token_id=tokenizer.eos_token_id,
         )
     decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Remove prompt do output
     generated = decoded[len(prompt):].strip()
+    # Extrai JSON
     match = re.search(r"\{.*\}", generated, re.DOTALL)
     if match:
+        result = match.group(0)
+        # Cache o resultado (limitado a 100 entradas)
+        if len(prompt_cache) < 100:
+            prompt_cache[cache_key] = result
+        return result
     else:
         return "⚠️ Failed to extract JSON. Output:\n" + generated
+# -------- API ROUTE --------
 @app.get("/filter")
 def get_filter(
     title: str = Query(..., description="Title of the news"),
 ):
     try:
         json_output = infer_filter(title, content)
+        import json
+        # Retorna como dados brutos (parse do JSON)
         return json.loads(json_output)
     except json.JSONDecodeError:
+        # Se não conseguir fazer parse, retorna como string
         return {"raw_output": json_output}
     except Exception as e:
         raise HTTPException(status_code=422, detail=str(e))