liamvbetts's picture
news api integration
bc1a0a8
raw
history blame
3.06 kB
import gradio as gr
import random
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from datasets import load_dataset
import requests
from bs4 import BeautifulSoup
tokenizer = AutoTokenizer.from_pretrained("liamvbetts/bart-large-cnn-v4")
model = AutoModelForSeq2SeqLM.from_pretrained("liamvbetts/bart-large-cnn-v4")
dataset = load_dataset("cnn_dailymail", "3.0.0")
NEWS_API_KEY = "da2cc601304341e7a39cb5604d0b076b"
def summarize(article):
inputs = tokenizer(article, return_tensors="pt").input_ids
outputs = model.generate(inputs, max_new_tokens=128, do_sample=False)
summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
return summary
def get_random_article():
random.seed()
val_example = dataset["validation"].shuffle().select(range(1))
val_article = val_example['article'][0][:1024]
return val_article
def load_article():
return get_random_article()
def get_news_article():
url = 'https://newsapi.org/v2/top-headlines'
news_url = ''
params = {
'apiKey': NEWS_API_KEY,
'country': 'us', # You can change this as needed
'pageSize': 100
}
response = requests.get(url, params=params)
articles = response.json().get('articles', [])
if articles:
random_article = random.choice(articles)
news_url = random_article.get('url')
else:
return None
if news_url:
full_article, title = scrape_article(news_url)
return full_article, title
else:
return "No news article found.", ""
def scrape_article(url):
try:
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
# Extracting the title - this is a general approach
title = soup.title.string if soup.title else "No Title Available"
article_content = soup.find_all('p') # This is a simplification
text = ' '.join([p.get_text() for p in article_content])
words = text.split()
truncated_text = ' '.join(words[:1024]) # Truncate to first 1024 words
return truncated_text, title
except Exception as e:
return "Error scraping article: " + str(e), ""
# Using Gradio Blocks
with gr.Blocks() as demo:
gr.Markdown("## News Summary App")
gr.Markdown("Enter a news text and get its summary, or load a random article.")
with gr.Row():
article_title = gr.Label() # Component to display the article title
input_text = gr.Textbox(lines=10, label="Input Text")
output_text = gr.Textbox(label="Summary")
load_dataset_article_button = gr.Button("Load Random Article from Dataset")
load_news_article_button = gr.Button("Load News Article")
load_dataset_article_button.click(fn=load_article, inputs=[], outputs=input_text)
load_news_article_button.click(fn=get_news_article, inputs=[], outputs=[input_text, article_title])
summarize_button = gr.Button("Summarize")
summarize_button.click(fn=summarize, inputs=input_text, outputs=output_text)
demo.launch()