Spaces:
Sleeping
Sleeping
File size: 4,083 Bytes
d61cd07 e673826 bc1a0a8 8df474c d2894f7 e673826 8df474c bc1a0a8 8df474c d2894f7 e673826 9b95b6c e673826 9b95b6c 5d174b9 bc1a0a8 8df474c bc1a0a8 8df474c 9b95b6c 8df474c bc1a0a8 8df474c 9b95b6c 8df474c bc1a0a8 8df474c d2894f7 9b95b6c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import gradio as gr
import random
from datasets import load_dataset
import requests
from bs4 import BeautifulSoup
import os
dataset = load_dataset("cnn_dailymail", "3.0.0")
NEWS_API_KEY = os.environ['NEWS_API_KEY']
HF_TOKEN = os.environ['HF_TOKEN']
def summarize(model_name, article):
API_URL = f"https://api-inference.huggingface.co/models/{model_name}"
headers = {"Authorization": "Bearer {HF_TOKEN}"}
payload = {"inputs": article}
response = requests.post(API_URL, headers=headers, json=payload)
# Check if the response is successful
if response.status_code == 200:
# Assuming the response structure has a 'generated_text' field
return format(response.json())
else:
# Handle different types of errors
if response.status_code == 401:
return "Error: Unauthorized. Check your API token."
elif response.status_code == 503:
return "Error: Service unavailable or model is currently loading."
else:
return f"{response} - Error: Encountered an issue (status code: {response.status_code}). Please try again."
return format(response.json())
def format(response):
return response[0]['generated_text']
def get_random_article():
random.seed()
val_example = dataset["validation"].shuffle().select(range(1))
val_article = val_example['article'][0][:1024]
return val_article
def load_article():
return get_random_article()
def get_news_article():
url = 'https://newsapi.org/v2/top-headlines'
news_url = ''
params = {
'apiKey': NEWS_API_KEY,
'country': 'us', # You can change this as needed
'pageSize': 100
}
response = requests.get(url, params=params)
articles = response.json().get('articles', [])
if articles:
random_article = random.choice(articles)
news_url = random_article.get('url')
else:
return None
if news_url:
full_article, title = scrape_article(news_url)
return full_article, title
else:
return "No news article found.", ""
def scrape_article(url):
try:
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
# Extracting the title - this is a general approach
title = soup.title.string if soup.title else "No Title Available"
article_content = soup.find_all('p') # This is a simplification
text = ' '.join([p.get_text() for p in article_content])
words = text.split()
truncated_text = ' '.join(words[:512]) # Truncate to first 1024 words
return truncated_text, title
except Exception as e:
return "Error scraping article: " + str(e), ""
# Using Gradio Blocks with improved layout and styling
with gr.Blocks() as demo:
gr.Markdown("# News Summary App", elem_id="header")
gr.Markdown("Enter a news text and get its summary, or load a random article.")
with gr.Row():
with gr.Column():
with gr.Row():
load_dataset_article_button = gr.Button("Load Random Article from Val Dataset")
load_news_article_button = gr.Button("Pull Random News Article from NewsAPI")
article_title = gr.Label() # Component to display the article title
input_text = gr.Textbox(lines=10, label="Input Text")
with gr.Column():
with gr.Row():
summarize_button = gr.Button("Summarize")
model_name = gr.Dropdown(label="Model Name", choices=["liamvbetts/bart-news-summary-v1", "liamvbetts/bart-base-cnn-v1", "liamvbetts/bart-large-cnn-v2", "liamvbetts/bart-large-cnn-v4"], value="liamvbetts/bart-news-summary-v1")
output_text = gr.Textbox(label="Summary")
load_dataset_article_button.click(fn=load_article, inputs=[], outputs=input_text)
load_news_article_button.click(fn=get_news_article, inputs=[], outputs=[input_text, article_title])
summarize_button.click(fn=summarize, inputs=[model_name, input_text], outputs=output_text)
demo.launch() |