ViT5BaseNode / app.py
VietCat's picture
fix duplicate issue
29e22ca
raw
history blame
1.45 kB
import os
from flask import Flask, request, jsonify
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
# ⚙️ Khắc phục lỗi không ghi được cache khi deploy trên HFS
os.environ["HF_HOME"] = "/app/cache"
os.environ["TRANSFORMERS_CACHE"] = "/app/cache/transformers"
app = Flask(__name__)
# 🚀 Load mô hình
model_name = "VietAI/vit5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
@app.route("/summarize", methods=["POST"])
def summarize():
data = request.get_json()
text = data.get("text", "").strip()
if not text:
return jsonify({"error": "Missing 'text' field"}), 400
# ⚠️ Giới hạn đầu vào (ViT5-base tối đa 512 tokens)
inputs = tokenizer.encode(text, return_tensors="pt", max_length=512, truncation=True)
# ✅ Tham số sinh văn bản chống lặp + chất lượng cao
summary_ids = model.generate(
inputs,
max_length=100,
min_length=10,
num_beams=4,
no_repeat_ngram_size=3,
repetition_penalty=2.5,
length_penalty=1.0,
early_stopping=True
)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
return jsonify({"summary": summary})
@app.route("/", methods=["GET"])
def index():
return "✅ ViT5 summarization API is running."
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)