File size: 4,083 Bytes
d61cd07
e673826
 
bc1a0a8
 
8df474c
d2894f7
e673826
 
8df474c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc1a0a8
8df474c
 
d2894f7
e673826
 
 
9b95b6c
e673826
 
9b95b6c
5d174b9
 
bc1a0a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8df474c
bc1a0a8
 
 
 
 
8df474c
9b95b6c
8df474c
bc1a0a8
8df474c
9b95b6c
8df474c
 
 
 
 
 
 
 
 
 
 
bc1a0a8
 
 
8df474c
d2894f7
9b95b6c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import gradio as gr
import random
from datasets import load_dataset
import requests
from bs4 import BeautifulSoup
import os

dataset = load_dataset("cnn_dailymail", "3.0.0")

NEWS_API_KEY = os.environ['NEWS_API_KEY']
HF_TOKEN = os.environ['HF_TOKEN']

def summarize(model_name, article):
    API_URL = f"https://api-inference.huggingface.co/models/{model_name}"
    headers = {"Authorization": "Bearer {HF_TOKEN}"}

    payload = {"inputs": article}
    response = requests.post(API_URL, headers=headers, json=payload)

    # Check if the response is successful
    if response.status_code == 200:
        # Assuming the response structure has a 'generated_text' field
        return format(response.json())
    else:
        # Handle different types of errors
        if response.status_code == 401:
            return "Error: Unauthorized. Check your API token."
        elif response.status_code == 503:
            return "Error: Service unavailable or model is currently loading."
        else:
            return f"{response} - Error: Encountered an issue (status code: {response.status_code}). Please try again."
    return format(response.json())

def format(response):
    return response[0]['generated_text']

def get_random_article():
    random.seed()
    val_example = dataset["validation"].shuffle().select(range(1))
    val_article = val_example['article'][0][:1024]
    return val_article

def load_article():
    return get_random_article()

def get_news_article():
    url = 'https://newsapi.org/v2/top-headlines'
    news_url = ''
    params = {
        'apiKey': NEWS_API_KEY,
        'country': 'us',  # You can change this as needed
        'pageSize': 100
    }
    response = requests.get(url, params=params)
    articles = response.json().get('articles', [])
    if articles:
        random_article = random.choice(articles)
        news_url = random_article.get('url')
    else:
        return None
    
    if news_url:
        full_article, title = scrape_article(news_url)
        return full_article, title
    else:
        return "No news article found.", ""

def scrape_article(url):
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Extracting the title - this is a general approach
        title = soup.title.string if soup.title else "No Title Available"

        article_content = soup.find_all('p')  # This is a simplification
        
        text = ' '.join([p.get_text() for p in article_content])
        words = text.split()
        truncated_text = ' '.join(words[:512])  # Truncate to first 1024 words

        return truncated_text, title
    except Exception as e:
        return "Error scraping article: " + str(e), ""

# Using Gradio Blocks with improved layout and styling
with gr.Blocks() as demo:
    gr.Markdown("# News Summary App", elem_id="header")
    gr.Markdown("Enter a news text and get its summary, or load a random article.")
    
    with gr.Row():
        with gr.Column():
            with gr.Row():
                load_dataset_article_button = gr.Button("Load Random Article from Val Dataset")
                load_news_article_button = gr.Button("Pull Random News Article from NewsAPI")
            article_title = gr.Label()  # Component to display the article title
            input_text = gr.Textbox(lines=10, label="Input Text")
        with gr.Column():
            with gr.Row():
                summarize_button = gr.Button("Summarize")
            model_name = gr.Dropdown(label="Model Name", choices=["liamvbetts/bart-news-summary-v1", "liamvbetts/bart-base-cnn-v1", "liamvbetts/bart-large-cnn-v2", "liamvbetts/bart-large-cnn-v4"], value="liamvbetts/bart-news-summary-v1")
            output_text = gr.Textbox(label="Summary")

    load_dataset_article_button.click(fn=load_article, inputs=[], outputs=input_text)
    load_news_article_button.click(fn=get_news_article, inputs=[], outputs=[input_text, article_title])
    summarize_button.click(fn=summarize, inputs=[model_name, input_text], outputs=output_text)

demo.launch()