ViT5BaseNode / app.py
VietCat's picture
add time log and reduce processing time
fd6737e
raw
history blame
1.91 kB
import time
import logging
from fastapi import FastAPI, Request
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
app = FastAPI()
# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("VietAI/vit5-base")
model = AutoModelForSeq2SeqLM.from_pretrained("VietAI/vit5-base")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
class SummarizeRequest(BaseModel):
text: str
@app.get("/")
async def root():
return {"message": "Model is ready."}
@app.post("/summarize")
async def summarize(req: Request, body: SummarizeRequest):
start_time = time.time()
client_ip = req.client.host
logger.info(f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] 🔵 Received request from {client_ip}")
text = body.text.strip()
# Tiền xử lý: nếu không giống tin tức thì thêm "Tin nhanh:"
if not text.lower().startswith(("theo", "trong khi", "bộ", "ngày", "việt nam", "công an")):
text = "Tin nhanh: " + text
input_text = text + " </s>"
encoding = tokenizer(input_text, return_tensors="pt")
input_ids = encoding["input_ids"].to(device)
attention_mask = encoding["attention_mask"].to(device)
# Sinh tóm tắt với cấu hình ổn định
outputs = model.generate(
input_ids=input_ids,
attention_mask=attention_mask,
max_length=128,
num_beams=2,
early_stopping=True,
no_repeat_ngram_size=2,
num_return_sequences=1
)
summary = tokenizer.decode(outputs[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
end_time = time.time()
logger.info(f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] ✅ Response sent — total time: {end_time - start_time:.2f}s")
return {"summary": summary}