habulaj commited on
Commit
88103b2
·
verified ·
1 Parent(s): cda1138

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -7
app.py CHANGED
@@ -82,11 +82,12 @@ def read_root():
82
 
83
  # -------- INFERENCE OTIMIZADA --------
84
  def infer_filter(title, content):
85
- # Prompt mais conciso para reduzir tokens
86
- prompt = f"""Analyze and return JSON filters:
87
  Title: "{title}"
88
  Content: "{content}"
89
- """
 
90
 
91
  log.info(f"🧠 Inferência iniciada para: {title}")
92
  start_time = time.time()
@@ -112,7 +113,6 @@ Content: "{content}"
112
  attention_mask=attention_mask,
113
  generation_config=generation_config,
114
  # Parâmetros adicionais de otimização
115
- early_stopping=True,
116
  num_return_sequences=1,
117
  output_scores=False,
118
  return_dict_in_generate=False,
@@ -129,11 +129,15 @@ Content: "{content}"
129
  log.info("📤 Resultado gerado:")
130
  log.info(generated)
131
 
132
- # Regex otimizada
133
  match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', generated, re.DOTALL)
134
  if match:
135
  duration = time.time() - start_time
136
  json_result = match.group(0)
 
 
 
 
137
  log.info(f"✅ JSON extraído em {duration:.2f}s")
138
 
139
  # Limpeza de memória
@@ -148,6 +152,28 @@ Content: "{content}"
148
  gc.collect()
149
  raise HTTPException(status_code=404, detail="Unable to extract JSON from model output.")
150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  # -------- API --------
152
  @app.get("/filter")
153
  def get_filter(
@@ -157,12 +183,23 @@ def get_filter(
157
  try:
158
  json_output = infer_filter(title, content)
159
  import json
160
- return json.loads(json_output)
 
 
 
 
 
 
 
 
 
 
 
161
  except HTTPException as he:
162
  raise he
163
  except Exception as e:
164
  log.exception("❌ Erro inesperado:")
165
- raise HTTPException(status_code=404, detail="Invalid or malformed JSON output from model.")
166
 
167
  # -------- WARMUP (OPCIONAL) --------
168
  @app.on_event("startup")
 
82
 
83
  # -------- INFERENCE OTIMIZADA --------
84
  def infer_filter(title, content):
85
+ # Prompt mais específico para JSON válido
86
+ prompt = f"""Analyze the news and return a valid JSON object with double quotes for all keys and string values.
87
  Title: "{title}"
88
  Content: "{content}"
89
+
90
+ Return only valid JSON:"""
91
 
92
  log.info(f"🧠 Inferência iniciada para: {title}")
93
  start_time = time.time()
 
113
  attention_mask=attention_mask,
114
  generation_config=generation_config,
115
  # Parâmetros adicionais de otimização
 
116
  num_return_sequences=1,
117
  output_scores=False,
118
  return_dict_in_generate=False,
 
129
  log.info("📤 Resultado gerado:")
130
  log.info(generated)
131
 
132
+ # Extração e limpeza de JSON
133
  match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', generated, re.DOTALL)
134
  if match:
135
  duration = time.time() - start_time
136
  json_result = match.group(0)
137
+
138
+ # Limpeza do JSON para corrigir formatação
139
+ json_result = fix_json_format(json_result)
140
+
141
  log.info(f"✅ JSON extraído em {duration:.2f}s")
142
 
143
  # Limpeza de memória
 
152
  gc.collect()
153
  raise HTTPException(status_code=404, detail="Unable to extract JSON from model output.")
154
 
155
+ def fix_json_format(json_str):
156
+ """Corrige formatação comum de JSON gerado por LLMs"""
157
+ # Remove quebras de linha dentro do JSON
158
+ json_str = re.sub(r'\n\s*', ' ', json_str)
159
+
160
+ # Corrige aspas simples para duplas
161
+ json_str = re.sub(r"'([^']*)':", r'"\1":', json_str) # Chaves
162
+ json_str = re.sub(r":\s*'([^']*)'", r': "\1"', json_str) # Valores string
163
+
164
+ # Corrige valores booleanos
165
+ json_str = re.sub(r':\s*True\b', ': true', json_str)
166
+ json_str = re.sub(r':\s*False\b', ': false', json_str)
167
+
168
+ # Remove vírgulas extras
169
+ json_str = re.sub(r',\s*}', '}', json_str)
170
+ json_str = re.sub(r',\s*]', ']', json_str)
171
+
172
+ # Remove espaços extras
173
+ json_str = re.sub(r'\s+', ' ', json_str)
174
+
175
+ return json_str.strip()
176
+
177
  # -------- API --------
178
  @app.get("/filter")
179
  def get_filter(
 
183
  try:
184
  json_output = infer_filter(title, content)
185
  import json
186
+
187
+ # Tenta fazer parse do JSON
188
+ try:
189
+ parsed_result = json.loads(json_output)
190
+ return {"result": parsed_result}
191
+ except json.JSONDecodeError as je:
192
+ log.error(f"❌ Erro ao parsear JSON: {je}")
193
+ log.error(f"JSON problemático: {json_output}")
194
+
195
+ # Fallback: retorna JSON como string se não conseguir parsear
196
+ return {"result": json_output, "warning": "JSON returned as string due to parsing error"}
197
+
198
  except HTTPException as he:
199
  raise he
200
  except Exception as e:
201
  log.exception("❌ Erro inesperado:")
202
+ raise HTTPException(status_code=500, detail="Internal server error during inference.")
203
 
204
  # -------- WARMUP (OPCIONAL) --------
205
  @app.on_event("startup")