Spaces:
Sleeping
Sleeping
File size: 1,459 Bytes
d1f836f e20523b d1f836f c969a0f 1f33001 d1f836f cde183a f76838b cb73ab4 f76838b 1f33001 d1f836f 1f33001 7e7fb74 d1f836f 1f33001 d1f836f 1f33001 d1f836f 1f33001 d1f836f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
# app.py – CPU-only summariser for Hugging Face Spaces (free tier)
from optimum.onnxruntime import ORTModelForSeq2SeqLM
import textwrap, gradio as gr
from transformers import AutoTokenizer, pipeline
# 1️⃣ small, open model that needs no access-token
MODEL_ID = "Xenova/distilbart-cnn-6-6"
# summariser = pipeline("summarization", model=MODEL_ID, device=-1) # -1 = CPU
tok = AutoTokenizer.from_pretrained(MODEL_ID)
model = ORTModelForSeq2SeqLM.from_pretrained(MODEL_ID)
summariser = pipeline("summarization", model=model, tokenizer=tok, device=-1)
# 2️⃣ rough char limit that maps to the model’s 1 024-token window
MAX_CHUNK = 3_500
def summarize(txt: str) -> str:
"""Chunk long transcripts, summarise each, then summarise the summaries."""
chunks = textwrap.wrap(txt, MAX_CHUNK, break_long_words=False)
partials = [
summariser(c, max_length=160, min_length=30, do_sample=False)[0]["summary_text"]
for c in chunks
]
first_pass = " ".join(partials)
# if we had to chunk, do a second pass to get a coherent overall summary
if len(chunks) > 1:
first_pass = summariser(first_pass, max_length=180, min_length=40, do_sample=False)[0]["summary_text"]
return first_pass
demo = gr.Interface(
fn=summarize,
inputs=gr.Textbox(lines=20, label="Transcript"),
outputs="text",
title="Free Transcript Summariser – DistilBART-CNN",
)
if __name__ == "__main__":
demo.launch()
|