Spaces:

liamvbetts
/

bart-news-summary-v1

Sleeping

App Files Files Community

bart-news-summary-v1 / app.py

liamvbetts

new changes

8df474c over 1 year ago

raw

history blame

4.08 kB

	import gradio as gr
	import random
	from datasets import load_dataset
	import requests
	from bs4 import BeautifulSoup
	import os

	dataset = load_dataset("cnn_dailymail", "3.0.0")

	NEWS_API_KEY = os.environ['NEWS_API_KEY']
	HF_TOKEN = os.environ['HF_TOKEN']

	def summarize(model_name, article):
	API_URL = f"https://api-inference.huggingface.co/models/{model_name}"
	headers = {"Authorization": "Bearer {HF_TOKEN}"}

	payload = {"inputs": article}
	response = requests.post(API_URL, headers=headers, json=payload)

	# Check if the response is successful
	if response.status_code == 200:
	# Assuming the response structure has a 'generated_text' field
	return format(response.json())
	else:
	# Handle different types of errors
	if response.status_code == 401:
	return "Error: Unauthorized. Check your API token."
	elif response.status_code == 503:
	return "Error: Service unavailable or model is currently loading."
	else:
	return f"{response} - Error: Encountered an issue (status code: {response.status_code}). Please try again."
	return format(response.json())

	def format(response):
	return response[0]['generated_text']

	def get_random_article():
	random.seed()
	val_example = dataset["validation"].shuffle().select(range(1))
	val_article = val_example['article'][0][:1024]
	return val_article

	def load_article():
	return get_random_article()

	def get_news_article():
	url = 'https://newsapi.org/v2/top-headlines'
	news_url = ''
	params = {
	'apiKey': NEWS_API_KEY,
	'country': 'us', # You can change this as needed
	'pageSize': 100
	}
	response = requests.get(url, params=params)
	articles = response.json().get('articles', [])
	if articles:
	random_article = random.choice(articles)
	news_url = random_article.get('url')
	else:
	return None

	if news_url:
	full_article, title = scrape_article(news_url)
	return full_article, title
	else:
	return "No news article found.", ""

	def scrape_article(url):
	try:
	response = requests.get(url)
	soup = BeautifulSoup(response.content, 'html.parser')

	# Extracting the title - this is a general approach
	title = soup.title.string if soup.title else "No Title Available"

	article_content = soup.find_all('p') # This is a simplification

	text = ' '.join([p.get_text() for p in article_content])
	words = text.split()
	truncated_text = ' '.join(words[:512]) # Truncate to first 1024 words

	return truncated_text, title
	except Exception as e:
	return "Error scraping article: " + str(e), ""

	# Using Gradio Blocks with improved layout and styling
	with gr.Blocks() as demo:
	gr.Markdown("# News Summary App", elem_id="header")
	gr.Markdown("Enter a news text and get its summary, or load a random article.")

	with gr.Row():
	with gr.Column():
	with gr.Row():
	load_dataset_article_button = gr.Button("Load Random Article from Val Dataset")
	load_news_article_button = gr.Button("Pull Random News Article from NewsAPI")
	article_title = gr.Label() # Component to display the article title
	input_text = gr.Textbox(lines=10, label="Input Text")
	with gr.Column():
	with gr.Row():
	summarize_button = gr.Button("Summarize")
	model_name = gr.Dropdown(label="Model Name", choices=["liamvbetts/bart-news-summary-v1", "liamvbetts/bart-base-cnn-v1", "liamvbetts/bart-large-cnn-v2", "liamvbetts/bart-large-cnn-v4"], value="liamvbetts/bart-news-summary-v1")
	output_text = gr.Textbox(label="Summary")

	load_dataset_article_button.click(fn=load_article, inputs=[], outputs=input_text)
	load_news_article_button.click(fn=get_news_article, inputs=[], outputs=[input_text, article_title])
	summarize_button.click(fn=summarize, inputs=[model_name, input_text], outputs=output_text)

	demo.launch()