Spaces:
Sleeping
Sleeping
import os | |
from flask import Flask, request, jsonify | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
# Set thư mục cache hợp lệ cho Hugging Face | |
os.environ["HF_HOME"] = "/app/cache" | |
os.environ["TRANSFORMERS_CACHE"] = "/app/cache/transformers" | |
app = Flask(__name__) | |
# Load mô hình và tokenizer | |
model_name = "VietAI/vit5-base" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
def summarize(): | |
data = request.get_json() | |
text = data.get("text", "").strip() | |
if not text: | |
return jsonify({"error": "Missing 'text' field"}), 400 | |
# ✅ Thêm tiền tố đúng kiểu huấn luyện | |
prompt = f"summarize: {text}" | |
inputs = tokenizer.encode(prompt, return_tensors="pt", max_length=512, truncation=True) | |
# Generate với các tham số tối ưu | |
summary_ids = model.generate( | |
inputs, | |
max_length=100, | |
min_length=10, | |
num_beams=4, | |
no_repeat_ngram_size=3, | |
repetition_penalty=2.5, | |
length_penalty=1.0, | |
early_stopping=True | |
) | |
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) | |
return jsonify({"summary": summary}) | |
def index(): | |
return "✅ ViT5 summarization API is running." | |
if __name__ == "__main__": | |
app.run(host="0.0.0.0", port=7860) | |