File size: 1,362 Bytes
29e22ca
20688a8
 
 
a9b7eee
29e22ca
 
 
20688a8
 
 
 
 
 
 
 
29e22ca
 
 
 
 
20688a8
a9b7eee
4d593bf
 
29e22ca
20688a8
 
 
29e22ca
20688a8
29e22ca
 
 
20688a8
 
a9b7eee
20688a8
 
 
 
29e22ca
 
20688a8
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import os
from flask import Flask, request, jsonify
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Khai báo thư mục cache an toàn cho Hugging Face
os.environ["HF_HOME"] = "/app/cache"
os.environ["TRANSFORMERS_CACHE"] = "/app/cache/transformers"

app = Flask(__name__)

model_name = "VietAI/vit5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

@app.route("/summarize", methods=["POST"])
def summarize():
    data = request.get_json()
    text = data.get("text", "").strip()

    if not text:
        return jsonify({"error": "Missing 'text' field"}), 400

    # ✅ Rất quan trọng: Thêm tiền tố 'summarize:'
    prompt = f"summarize: {text}"
    inputs = tokenizer.encode(prompt, return_tensors="pt", max_length=512, truncation=True)

    summary_ids = model.generate(
        inputs,
        max_length=100,
        min_length=10,
        num_beams=4,
        no_repeat_ngram_size=3,
        repetition_penalty=2.5,
        length_penalty=1.0,
        early_stopping=True
    )

    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return jsonify({"summary": summary})

@app.route("/", methods=["GET"])
def index():
    return "✅ ViT5 summarization API is running."

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860)