habulaj commited on
Commit
9537362
·
verified ·
1 Parent(s): 3e43d33

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -34
app.py CHANGED
@@ -3,9 +3,10 @@ import torch
3
  import re
4
  from transformers import AutoTokenizer
5
  from peft import AutoPeftModelForCausalLM
 
6
 
7
  # Carrega modelo e tokenizer da Hugging Face - LoRA fine-tuned
8
- model_name = "habulaj/filter"
9
  print("Carregando tokenizer e modelo (CPU)...")
10
  tokenizer = AutoTokenizer.from_pretrained(model_name)
11
 
@@ -25,24 +26,15 @@ try:
25
  except Exception as e:
26
  print(f"⚠️ torch.compile não disponível: {e}")
27
 
28
- # Cache para prompts similares
29
- prompt_cache = {}
30
-
31
  # -------- FASTAPI --------
32
  app = FastAPI(title="News Filter JSON API")
33
 
34
- # -------- ROOT ENDPOINT --------
35
  @app.get("/")
36
  def read_root():
37
  return {"message": "News Filter JSON API is running!", "docs": "/docs"}
38
 
39
  # Função para inferência otimizada
40
  def infer_filter(title, content):
41
- # Cache key simples
42
- cache_key = hash((title[:50], content[:100]))
43
- if cache_key in prompt_cache:
44
- return prompt_cache[cache_key]
45
-
46
  prompt = f"""Analyze the news title and content, and return the filters in JSON format with the defined fields.
47
 
48
  Please respond ONLY with the JSON filter, do NOT add any explanations, system messages, or extra text.
@@ -51,47 +43,37 @@ Title: "{title}"
51
  Content: "{content}"
52
  """
53
 
54
- # Otimizações de tokenização
55
  inputs = tokenizer(
56
- prompt,
57
  return_tensors="pt",
58
  truncation=True,
59
- max_length=512, # Limita tamanho do input
60
- padding=False # Não faz padding desnecessário
61
  )
62
  input_ids = inputs.input_ids.to("cpu")
63
-
64
  with torch.no_grad():
65
- # Configurações otimizadas para velocidade
66
  outputs = model.generate(
67
  input_ids=input_ids,
68
- max_new_tokens=100, # Reduzido de 128 para 100
69
- temperature=1.0, # Reduzido para ser mais determinístico
70
  do_sample=True,
71
  top_p=0.9,
72
- num_beams=1, # Beam search = 1 (greedy) é mais rápido
73
- early_stopping=True, # Para quando encontrar EOS
74
  eos_token_id=tokenizer.eos_token_id,
75
  pad_token_id=tokenizer.eos_token_id,
76
  )
77
-
78
  decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
79
-
80
- # Remove prompt do output
81
  generated = decoded[len(prompt):].strip()
82
-
83
- # Extrai JSON
84
  match = re.search(r"\{.*\}", generated, re.DOTALL)
85
  if match:
86
- result = match.group(0)
87
- # Cache o resultado (limitado a 100 entradas)
88
- if len(prompt_cache) < 100:
89
- prompt_cache[cache_key] = result
90
- return result
91
  else:
92
  return "⚠️ Failed to extract JSON. Output:\n" + generated
93
 
94
- # -------- API ROUTE --------
95
  @app.get("/filter")
96
  def get_filter(
97
  title: str = Query(..., description="Title of the news"),
@@ -99,11 +81,8 @@ def get_filter(
99
  ):
100
  try:
101
  json_output = infer_filter(title, content)
102
- import json
103
- # Retorna como dados brutos (parse do JSON)
104
  return json.loads(json_output)
105
  except json.JSONDecodeError:
106
- # Se não conseguir fazer parse, retorna como string
107
  return {"raw_output": json_output}
108
  except Exception as e:
109
  raise HTTPException(status_code=422, detail=str(e))
 
3
  import re
4
  from transformers import AutoTokenizer
5
  from peft import AutoPeftModelForCausalLM
6
+ import json
7
 
8
  # Carrega modelo e tokenizer da Hugging Face - LoRA fine-tuned
9
+ model_name = "habulaj/filterinstruct"
10
  print("Carregando tokenizer e modelo (CPU)...")
11
  tokenizer = AutoTokenizer.from_pretrained(model_name)
12
 
 
26
  except Exception as e:
27
  print(f"⚠️ torch.compile não disponível: {e}")
28
 
 
 
 
29
  # -------- FASTAPI --------
30
  app = FastAPI(title="News Filter JSON API")
31
 
 
32
  @app.get("/")
33
  def read_root():
34
  return {"message": "News Filter JSON API is running!", "docs": "/docs"}
35
 
36
  # Função para inferência otimizada
37
  def infer_filter(title, content):
 
 
 
 
 
38
  prompt = f"""Analyze the news title and content, and return the filters in JSON format with the defined fields.
39
 
40
  Please respond ONLY with the JSON filter, do NOT add any explanations, system messages, or extra text.
 
43
  Content: "{content}"
44
  """
45
 
 
46
  inputs = tokenizer(
47
+ prompt,
48
  return_tensors="pt",
49
  truncation=True,
50
+ max_length=512,
51
+ padding=False
52
  )
53
  input_ids = inputs.input_ids.to("cpu")
54
+
55
  with torch.no_grad():
 
56
  outputs = model.generate(
57
  input_ids=input_ids,
58
+ max_new_tokens=100,
59
+ temperature=1.0,
60
  do_sample=True,
61
  top_p=0.9,
62
+ num_beams=1,
63
+ early_stopping=True,
64
  eos_token_id=tokenizer.eos_token_id,
65
  pad_token_id=tokenizer.eos_token_id,
66
  )
67
+
68
  decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
69
  generated = decoded[len(prompt):].strip()
70
+
 
71
  match = re.search(r"\{.*\}", generated, re.DOTALL)
72
  if match:
73
+ return match.group(0)
 
 
 
 
74
  else:
75
  return "⚠️ Failed to extract JSON. Output:\n" + generated
76
 
 
77
  @app.get("/filter")
78
  def get_filter(
79
  title: str = Query(..., description="Title of the news"),
 
81
  ):
82
  try:
83
  json_output = infer_filter(title, content)
 
 
84
  return json.loads(json_output)
85
  except json.JSONDecodeError:
 
86
  return {"raw_output": json_output}
87
  except Exception as e:
88
  raise HTTPException(status_code=422, detail=str(e))