File size: 1,094 Bytes
6648389
bbb8ec0
6648389
bbb8ec0
 
 
 
6648389
 
bbb8ec0
 
 
 
 
 
 
 
 
 
 
 
6648389
 
 
bbb8ec0
6648389
 
 
 
bbb8ec0
 
6648389
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import gradio as gr
from transformers import LEDTokenizer, LEDForConditionalGeneration

# Use Longformer Encoder-Decoder (LED) model
model_name = "allenai/led-large-16384"
tokenizer = LEDTokenizer.from_pretrained(model_name)
model = LEDForConditionalGeneration.from_pretrained(model_name)

def summarize_text(text):
    # Tokenize input with truncation to fit within 16,384 tokens
    inputs = tokenizer([text], max_length=16384, return_tensors="pt", truncation=True)

    # Generate summary with adjusted parameters
    summary_ids = model.generate(
        inputs["input_ids"],
        num_beams=4,
        max_length=512,  # Can be adjusted based on summary size needs
        min_length=100,
        early_stopping=True
    )

    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Gradio Interface
iface = gr.Interface(
    fn=summarize_text,
    inputs="text",
    outputs="text",
    title="Longformer Summarizer",
    description="Enter text to get a summary using the Longformer Encoder-Decoder."
)

if __name__ == "__main__":
    iface.launch()