ViT5BaseNode / app.py
VietCat's picture
fix broken encoding text issue
4d593bf
raw
history blame
1.43 kB
import os
from flask import Flask, request, jsonify
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
# Set thư mục cache hợp lệ cho Hugging Face
os.environ["HF_HOME"] = "/app/cache"
os.environ["TRANSFORMERS_CACHE"] = "/app/cache/transformers"
app = Flask(__name__)
# Load mô hình và tokenizer
model_name = "VietAI/vit5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
@app.route("/summarize", methods=["POST"])
def summarize():
data = request.get_json()
text = data.get("text", "").strip()
if not text:
return jsonify({"error": "Missing 'text' field"}), 400
# ✅ Thêm tiền tố đúng kiểu huấn luyện
prompt = f"summarize: {text}"
inputs = tokenizer.encode(prompt, return_tensors="pt", max_length=512, truncation=True)
# Generate với các tham số tối ưu
summary_ids = model.generate(
inputs,
max_length=100,
min_length=10,
num_beams=4,
no_repeat_ngram_size=3,
repetition_penalty=2.5,
length_penalty=1.0,
early_stopping=True
)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
return jsonify({"summary": summary})
@app.route("/", methods=["GET"])
def index():
return "✅ ViT5 summarization API is running."
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)