Spaces:
Runtime error
Runtime error
import requests | |
from bs4 import BeautifulSoup | |
import streamlit as st | |
import asyncio | |
import concurrent.futures | |
urls = ['https://en.wikipedia.org/wiki/Health_care', | |
'https://en.wikipedia.org/wiki/Health_information_on_the_Internet', | |
'https://www.who.int/health-topics/coronavirus#tab=tab_1'] | |
async def scrape_wikipedia(url): | |
response = requests.get(url) | |
soup = BeautifulSoup(response.content, 'html.parser') | |
div_element = soup.find('div', {'class': 'div-col columns column-width'}) | |
if div_element is not None: | |
articles_list = div_element.find_all('li') | |
else: | |
articles_list = [] | |
return articles_list | |
async def scrape_urls(urls): | |
tasks = [] | |
async with concurrent.futures.ThreadPoolExecutor(max_workers=len(urls)) as executor: | |
for url in urls: | |
tasks.append(asyncio.ensure_future(scrape_wikipedia(url))) | |
await asyncio.gather(*tasks) | |
return tasks | |
def main(): | |
st.title("List of Articles on Health Care") | |
loop = asyncio.get_event_loop() | |
tasks = loop.run_until_complete(scrape_urls(urls)) | |
data = [] | |
for task in tasks: | |
for article in task.result(): | |
data.append({'url': task.result().index(article), 'article': article.text}) | |
st.write('## Dataset') | |
st.dataframe(data) | |
st.write('## Grid') | |
st.write('url', 'article') | |
for d in data: | |
st.write(d['url'], d['article']) | |
if __name__ == '__main__': | |
main() | |