Spaces:
Runtime error
Runtime error
import requests | |
from bs4 import BeautifulSoup | |
import streamlit as st | |
urls = ['https://en.wikipedia.org/wiki/Health_care', | |
'https://en.wikipedia.org/wiki/Health_information_on_the_Internet', | |
'https://www.who.int/health-topics/coronavirus#tab=tab_1'] | |
def scrape_wikipedia(url): | |
try: | |
response = requests.get(url) | |
soup = BeautifulSoup(response.content, 'html.parser') | |
div_element = soup.find('div', {'class': 'div-col columns column-width'}) | |
if div_element is not None: | |
articles_list = div_element.find_all('li') | |
else: | |
articles_list = [] | |
return articles_list | |
except: | |
st.write(f"Error scraping {url}") | |
return [] | |
def main(): | |
st.title("List of Articles on Health Care") | |
data = [] | |
for url in urls: | |
articles_list = scrape_wikipedia(url) | |
for article in articles_list: | |
data.append({'url': urls.index(url), 'article': article.text}) | |
st.write('## Dataset') | |
st.dataframe(data) | |
st.write('## Grid') | |
st.write('url', 'article') | |
for d in data: | |
st.write(d['url'], d['article']) | |
if __name__ == '__main__': | |
main() | |