# app.py – CPU-only summariser for Hugging Face Spaces (free tier) from optimum.onnxruntime import ORTModelForSeq2SeqLM import textwrap, gradio as gr from transformers import AutoTokenizer, pipeline # 1️⃣ small, open model that needs no access-token MODEL_ID = "Xenova/distilbart-cnn-6-6" # summariser = pipeline("summarization", model=MODEL_ID, device=-1) # -1 = CPU tok = AutoTokenizer.from_pretrained(MODEL_ID) model = ORTModelForSeq2SeqLM.from_pretrained(MODEL_ID) summariser = pipeline("summarization", model=model, tokenizer=tok, device=-1) # 2️⃣ rough char limit that maps to the model’s 1 024-token window MAX_CHUNK = 3_500 def summarize(txt: str) -> str: """Chunk long transcripts, summarise each, then summarise the summaries.""" chunks = textwrap.wrap(txt, MAX_CHUNK, break_long_words=False) partials = [ summariser(c, max_length=160, min_length=30, do_sample=False)[0]["summary_text"] for c in chunks ] first_pass = " ".join(partials) # if we had to chunk, do a second pass to get a coherent overall summary if len(chunks) > 1: first_pass = summariser(first_pass, max_length=180, min_length=40, do_sample=False)[0]["summary_text"] return first_pass demo = gr.Interface( fn=summarize, inputs=gr.Textbox(lines=20, label="Transcript"), outputs="text", title="Free Transcript Summariser – DistilBART-CNN", ) if __name__ == "__main__": demo.launch()