Spaces:

tracinginsights
/

QuotesBot

Runtime error

QuotesBot / app.py

Create app.py

eb67193 over 2 years ago

2 kB

	import pandas as pd
	import requests
	import isort
	import black
	import flair
	import time
	from bs4 import BeautifulSoup



	URL = "https://www.formula1.com/content/fom-website/en/latest/all.xml"

	def get_xml(url):
	# xpath is only for formula1
	# use urllib.parse to check for formula1.com website or other news
	xml = pd.read_xml(url,xpath='channel/item')

	def check_updates(every=60):
	while True:
	time.sleep(every)
	latest_xml = get_xml()
	if ~previous_xml.equals(latest_xml):
	print('New articles found')
	new_articles_df = latest_xml[~latest_xml["guid"].isin(previous_xml["guid"])]
	for article in new_articles_df.iterrows():
	link = row[1]["guid"]
	request = requests.get(link)
	soup = BeautifulSoup(request.content, "html.parser")
	# class_ below will be different for different websites
	s = soup.find("div", class_="col-lg-8 col-xl-7 offset-xl-1 f1-article--content")
	lines = s.find_all("p")
	text_content = pd.DataFrame(data={"text": []})
	for i, line in enumerate(lines):
	df = pd.DataFrame(data={"text": [line.text]})
	text_content = pd.concat([text_content, df], ignore_index=True)

	strongs = s.find_all("strong")
	strong_content = pd.DataFrame(data={"text": []})
	for i, strong in enumerate(strongs):
	if i > 0:
	df = pd.DataFrame(data={"text": [strong.text]})
	strong_content = pd.concat([strong_content, df], ignore_index=True)
	# df has content
	df = text_content[~text_content["text"].isin(strong_content["text"])].reset_index(
	drop=True
	)

	return df


	else:
	print('No New article is found')