Spaces:
Sleeping
Sleeping
File size: 1,362 Bytes
29e22ca 20688a8 a9b7eee 29e22ca 20688a8 29e22ca 20688a8 a9b7eee 4d593bf 29e22ca 20688a8 29e22ca 20688a8 29e22ca 20688a8 a9b7eee 20688a8 29e22ca 20688a8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import os
from flask import Flask, request, jsonify
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
# Khai báo thư mục cache an toàn cho Hugging Face
os.environ["HF_HOME"] = "/app/cache"
os.environ["TRANSFORMERS_CACHE"] = "/app/cache/transformers"
app = Flask(__name__)
model_name = "VietAI/vit5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
@app.route("/summarize", methods=["POST"])
def summarize():
data = request.get_json()
text = data.get("text", "").strip()
if not text:
return jsonify({"error": "Missing 'text' field"}), 400
# ✅ Rất quan trọng: Thêm tiền tố 'summarize:'
prompt = f"summarize: {text}"
inputs = tokenizer.encode(prompt, return_tensors="pt", max_length=512, truncation=True)
summary_ids = model.generate(
inputs,
max_length=100,
min_length=10,
num_beams=4,
no_repeat_ngram_size=3,
repetition_penalty=2.5,
length_penalty=1.0,
early_stopping=True
)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
return jsonify({"summary": summary})
@app.route("/", methods=["GET"])
def index():
return "✅ ViT5 summarization API is running."
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)
|