Spaces:

habulaj
/

filter

Runtime error

App Files Files Community

habulaj commited on Jul 8

Commit

59d7833

verified ·

1 Parent(s): 9650ab7

Update app.py

Browse files

Files changed (1) hide show

app.py +100 -94

app.py CHANGED Viewed

@@ -1,17 +1,12 @@
 from fastapi import FastAPI, Query, HTTPException
-import os
-os.environ["CUDA_VISIBLE_DEVICES"] = ""
 import torch
 import re
 import time
 import logging
 import os
-import gc
-import json
 from transformers import AutoTokenizer, GenerationConfig
 from peft import AutoPeftModelForCausalLM
-from unsloth.chat_templates import get_chat_template
-from unsloth import FastLanguageModel
 # -------- CONFIGURAÇÕES DE OTIMIZAÇÃO --------
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
@@ -20,16 +15,19 @@ os.environ["MKL_NUM_THREADS"] = "2"
 torch.set_num_threads(2)
 torch.set_num_interop_threads(1)
-# -------- LOGGING --------
 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
 log = logging.getLogger("news-filter")
-# -------- MODELO --------
 model_name = "habulaj/filterinstruct180"
 log.info("🚀 Carregando modelo e tokenizer...")
-tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
-tokenizer = get_chat_template(tokenizer, chat_template="llama-3.1")
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token
@@ -39,11 +37,12 @@ model = AutoPeftModelForCausalLM.from_pretrained(
     device_map="cpu",
     torch_dtype=torch.bfloat16,
     low_cpu_mem_usage=True,
     trust_remote_code=True
 )
-FastLanguageModel.for_inference(model, cpu=True)
 model.eval()
-log.info("✅ Modelo carregado (modo eval).")
 generation_config = GenerationConfig(
     max_new_tokens=128,
@@ -53,113 +52,120 @@ generation_config = GenerationConfig(
     use_cache=True,
     eos_token_id=tokenizer.eos_token_id,
     pad_token_id=tokenizer.eos_token_id,
     repetition_penalty=1.1,
     length_penalty=1.0
 )
-# -------- FASTAPI --------
 app = FastAPI(title="News Filter JSON API")
 @app.get("/")
 def read_root():
     return {"message": "News Filter JSON API is running!", "docs": "/docs"}
-@app.get("/filter")
-def get_filter(
-    title: str = Query(..., description="News title"),
-    content: str = Query(..., description="News content")
-):
-    try:
-        result = infer_filter(title, content)
-        try:
-            return {"result": json.loads(result)}
-        except json.JSONDecodeError:
-            return {"result": result, "warning": "Returned as string due to JSON parsing error"}
-    except HTTPException as he:
-        raise he
-    except Exception as e:
-        log.exception("❌ Erro inesperado:")
-        raise HTTPException(status_code=500, detail="Internal server error during inference.")
-@app.on_event("startup")
-async def warmup():
-    log.info("🔥 Executando warmup...")
-    try:
-        infer_filter("Test title", "Test content")
-        log.info("✅ Warmup concluído.")
-    except Exception as e:
-        log.warning(f"⚠️ Warmup falhou: {e}")
 # -------- INFERÊNCIA --------
 def infer_filter(title, content):
-    messages = [
-        {
-            "role": "user",
-            "content": """Analyze the news title and content, and return the filters in JSON format with the defined fields.
-Please respond ONLY with the JSON filter, do NOT add any explanations, system messages, or extra text.
-Title: "New 'Star Wars' Movie Announced"
-Content: "Lucasfilm confirmed a new Star Wars movie set to release in 2026, directed by a rising filmmaker."
-"""
-        },
-        {
-            "role": "assistant",
-            "content": '{ "death_related": false, "relevance": "high", "global_interest": true, "entity_type": "movie", "entity_name": "Star Wars", "breaking_news": true, "has_video_content": false }'
-        },
-        {
-            "role": "user",
-            "content": """Analyze the news title and content, and return the filters in JSON format with the defined fields.
-Please respond ONLY with the JSON filter, do NOT add any explanations, system messages, or extra text.
-Title: "Legendary Musician Carlos Mendes Dies at 78"
-Content: "Carlos Mendes, the internationally acclaimed Brazilian guitarist and composer known for blending traditional bossa nova with modern jazz, has died at the age of 78."
-"""
-        },
-        {
-            "role": "assistant",
-            "content": '{ "death_related": true, "relevance": "high", "global_interest": true, "entity_type": "person", "entity_name": "Carlos Mendes", "breaking_news": true, "has_video_content": false }'
-        },
-        {
-            "role": "user",
-            "content": f"""Analyze the news title and content, and return the filters in JSON format with the defined fields.
-Please respond ONLY with the JSON filter, do NOT add any explanations, system messages, or extra text.
-Title: "{title}"
-Content: "{content}"
-"""
-        }
-    ]
     log.info(f"🧠 Inferência iniciada para: {title}")
     start_time = time.time()
-    inputs = tokenizer.apply_chat_template(
-        messages,
-        tokenize=True,
-        add_generation_prompt=True,
         return_tensors="pt",
-    ).to("cpu")
     with torch.no_grad(), torch.inference_mode():
         outputs = model.generate(
-            input_ids=inputs,
             generation_config=generation_config,
         )
-    prompt_text = tokenizer.decode(inputs[0], skip_special_tokens=True)
-    full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    generated = full_output[len(prompt_text):].strip()
-    json_str = extract_json(generated)
     duration = time.time() - start_time
     log.info(f"✅ JSON extraído em {duration:.2f}s")
-    return json_str
 def extract_json(text):
-    match = re.search(r'\{.*?\}', text, flags=re.DOTALL)
     if match:
-        return match.group(0)
-    return text

 from fastapi import FastAPI, Query, HTTPException
 import torch
 import re
 import time
 import logging
 import os
 from transformers import AutoTokenizer, GenerationConfig
 from peft import AutoPeftModelForCausalLM
+import gc
 # -------- CONFIGURAÇÕES DE OTIMIZAÇÃO --------
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 torch.set_num_threads(2)
 torch.set_num_interop_threads(1)
+# -------- LOGGING CONFIG --------
 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
 log = logging.getLogger("news-filter")
+# -------- LOAD MODEL --------
 model_name = "habulaj/filterinstruct180"
 log.info("🚀 Carregando modelo e tokenizer...")
+tokenizer = AutoTokenizer.from_pretrained(
+    model_name,
+    use_fast=True,
+    padding_side="left"
+)
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token
     device_map="cpu",
     torch_dtype=torch.bfloat16,
     low_cpu_mem_usage=True,
+    use_cache=True,
     trust_remote_code=True
 )
 model.eval()
+log.info("✅ Modelo carregado (eval mode).")
 generation_config = GenerationConfig(
     max_new_tokens=128,
     use_cache=True,
     eos_token_id=tokenizer.eos_token_id,
     pad_token_id=tokenizer.eos_token_id,
+    no_repeat_ngram_size=2,
     repetition_penalty=1.1,
     length_penalty=1.0
 )
+# -------- FASTAPI INIT --------
 app = FastAPI(title="News Filter JSON API")
 @app.get("/")
 def read_root():
     return {"message": "News Filter JSON API is running!", "docs": "/docs"}
 # -------- INFERÊNCIA --------
 def infer_filter(title, content):
     log.info(f"🧠 Inferência iniciada para: {title}")
     start_time = time.time()
+    chat_prompt = build_chat_prompt(title, content)
+    inputs = tokenizer(
+        chat_prompt,
         return_tensors="pt",
+        truncation=True,
+        max_length=512,
+        padding=False,
+        add_special_tokens=False
+    )
+    input_ids = inputs.input_ids
+    attention_mask = inputs.attention_mask
     with torch.no_grad(), torch.inference_mode():
         outputs = model.generate(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
             generation_config=generation_config,
+            num_return_sequences=1,
+            output_scores=False,
+            return_dict_in_generate=False
         )
+    generated_tokens = outputs[0][len(input_ids[0]):]
+    generated = tokenizer.decode(
+        generated_tokens,
+        skip_special_tokens=True,
+        clean_up_tokenization_spaces=True
+    )
+    log.info("📤 Resultado gerado:")
+    log.info(generated)
+    json_result = extract_json(generated)
     duration = time.time() - start_time
     log.info(f"✅ JSON extraído em {duration:.2f}s")
+    # Limpeza de memória
+    del outputs, generated_tokens, inputs
+    gc.collect()
+    if json_result:
+        return json_result
+    else:
+        raise HTTPException(status_code=404, detail="Unable to extract JSON from model output.")
+def build_chat_prompt(title: str, content: str) -> str:
+    return f"""<|begin_of_text|><|start_header_id|>user<|end_header_id|>
+Analyze the news title and content, and return the filters in JSON format with the defined fields.
+Please respond ONLY with the JSON filter, do NOT add any explanations, system messages, or extra text.
+Title: "{title}"
+Content: "{content}"<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
 def extract_json(text):
+    match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', text, re.DOTALL)
     if match:
+        json_text = match.group(0)
+        # Conversões comuns
+        json_text = re.sub(r"'", '"', json_text)
+        json_text = re.sub(r'\bTrue\b', 'true', json_text)
+        json_text = re.sub(r'\bFalse\b', 'false', json_text)
+        json_text = re.sub(r",\s*}", "}", json_text)
+        json_text = re.sub(r",\s*]", "]", json_text)
+        return json_text.strip()
+    return text
+# -------- API ROUTE --------
+@app.get("/filter")
+def get_filter(
+    title: str = Query(..., description="News title"),
+    content: str = Query(..., description="News content")
+):
+    try:
+        json_output = infer_filter(title, content)
+        import json
+        try:
+            parsed = json.loads(json_output)
+            return {"result": parsed}
+        except json.JSONDecodeError as e:
+            log.error(f"❌ Erro ao parsear JSON: {e}")
+            return {"result": json_output, "warning": "JSON returned as string due to parsing error"}
+    except HTTPException as e:
+        raise e
+    except Exception as e:
+        log.exception("❌ Erro inesperado:")
+        raise HTTPException(status_code=500, detail="Internal server error during inference.")
+@app.on_event("startup")
+async def warmup():
+    log.info("🔥 Executando warmup...")
+    try:
+        infer_filter("Test title", "Test content")
+        log.info("✅ Warmup concluído.")
+    except Exception as e:
+        log.warning(f"⚠️ Warmup falhou: {e}")