File size: 1,208 Bytes
d1f836f
1f33001
d1f836f
 
1f33001
d1f836f
cde183a
d1f836f
1f33001
d1f836f
 
1f33001
7e7fb74
d1f836f
 
1f33001
d1f836f
 
1f33001
d1f836f
 
 
 
 
 
 
 
 
 
 
 
1f33001
 
d1f836f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# app.py  – CPU-only summariser for Hugging Face Spaces (free tier)

import textwrap, gradio as gr
from transformers import pipeline

# 1️⃣  small, open model that needs no access-token
MODEL_ID = "Xenova/distilbart-cnn-6-6"
summariser = pipeline("summarization", model=MODEL_ID, device=-1)  # -1 = CPU

# 2️⃣  rough char limit that maps to the model’s 1 024-token window
MAX_CHUNK = 3_500

def summarize(txt: str) -> str:
    """Chunk long transcripts, summarise each, then summarise the summaries."""
    chunks = textwrap.wrap(txt, MAX_CHUNK, break_long_words=False)
    partials = [
        summariser(c, max_length=160, min_length=30, do_sample=False)[0]["summary_text"]
        for c in chunks
    ]
    first_pass = " ".join(partials)
    # if we had to chunk, do a second pass to get a coherent overall summary
    if len(chunks) > 1:
        first_pass = summariser(first_pass, max_length=180, min_length=40, do_sample=False)[0]["summary_text"]
    return first_pass

demo = gr.Interface(
    fn=summarize,
    inputs=gr.Textbox(lines=20, label="Transcript"),
    outputs="text",
    title="Free Transcript Summariser – DistilBART-CNN",
)

if __name__ == "__main__":
    demo.launch()