Spaces:

VietCat
/

ViT5BaseNode

Sleeping

App Files Files Community

VietCat commited on Jun 11

Commit

fd6737e

1 Parent(s): 831df6f

add time log and reduce processing time

Browse files

Files changed (1) hide show

app.py +31 -37

app.py CHANGED Viewed

@@ -1,65 +1,59 @@
 import time
 import logging
-import torch
 from fastapi import FastAPI, Request
 from pydantic import BaseModel
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-from concurrent.futures import ThreadPoolExecutor
-import asyncio
-# Khởi tạo app
-app = FastAPI()
-# Logging
 logging.basicConfig(level=logging.INFO)
-# Load model và tokenizer
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-tokenizer = AutoTokenizer.from_pretrained("VietAI/vit5-base")
-model = AutoModelForSeq2SeqLM.from_pretrained("VietAI/vit5-base").to(device)
-# Thread executor để xử lý blocking
-executor = ThreadPoolExecutor(max_workers=2)
-# Kiểu dữ liệu đầu vào
-class TextIn(BaseModel):
     text: str
-# -------------------------------
-# GET: kiểm tra API sẵn sàng
 @app.get("/")
-def read_root():
-    return {"message": "API is ready."}
-# -------------------------------
-# Hàm tóm tắt (blocking)
-def summarize_text(text: str) -> str:
-    prompt = "vietnews: " + text.strip() + " </s>"
-    encoding = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
     input_ids = encoding["input_ids"].to(device)
     attention_mask = encoding["attention_mask"].to(device)
     outputs = model.generate(
         input_ids=input_ids,
         attention_mask=attention_mask,
         max_length=128,
         num_beams=2,
-        early_stopping=True
     )
-    return tokenizer.decode(outputs[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
-# -------------------------------
-# POST: async API tóm tắt
-@app.post("/summarize")
-async def summarize(request: Request, payload: TextIn):
-    start_time = time.time()
-    client_ip = request.client.host
-    logging.info(f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] 🔵 Received request from {client_ip}")
-    summary = await asyncio.get_event_loop().run_in_executor(executor, summarize_text, payload.text)
     end_time = time.time()
-    duration = end_time - start_time
-    logging.info(f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] ✅ Response sent — total time: {duration:.2f}s")
     return {"summary": summary}

 import time
 import logging
 from fastapi import FastAPI, Request
 from pydantic import BaseModel
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import torch
 logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+app = FastAPI()
+# Load model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained("VietAI/vit5-base")
+model = AutoModelForSeq2SeqLM.from_pretrained("VietAI/vit5-base")
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+class SummarizeRequest(BaseModel):
     text: str
 @app.get("/")
+async def root():
+    return {"message": "Model is ready."}
+@app.post("/summarize")
+async def summarize(req: Request, body: SummarizeRequest):
+    start_time = time.time()
+    client_ip = req.client.host
+    logger.info(f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] 🔵 Received request from {client_ip}")
+    text = body.text.strip()
+    # Tiền xử lý: nếu không giống tin tức thì thêm "Tin nhanh:"
+    if not text.lower().startswith(("theo", "trong khi", "bộ", "ngày", "việt nam", "công an")):
+        text = "Tin nhanh: " + text
+    input_text = text + " </s>"
+    encoding = tokenizer(input_text, return_tensors="pt")
     input_ids = encoding["input_ids"].to(device)
     attention_mask = encoding["attention_mask"].to(device)
+    # Sinh tóm tắt với cấu hình ổn định
     outputs = model.generate(
         input_ids=input_ids,
         attention_mask=attention_mask,
         max_length=128,
         num_beams=2,
+        early_stopping=True,
+        no_repeat_ngram_size=2,
+        num_return_sequences=1
     )
+    summary = tokenizer.decode(outputs[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
     end_time = time.time()
+    logger.info(f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] ✅ Response sent — total time: {end_time - start_time:.2f}s")
     return {"summary": summary}