Spaces:

awacke1
/

VizLib-BeautifulSoup

Runtime error

File size: 1,536 Bytes

e7120e9
 
 
8ddf073
e7120e9
bee76fe
 
 
 
f025397
 
8ddf073
f025397
8ddf073
f025397
 
 
 
 
 
6f7bd20
f025397
6f7bd20
bee76fe
b4e1b44
bee76fe
 
 
f025397
8ddf073
 
 
6f7bd20
8ddf073
 
bee76fe
 
 
 
 
 
 
 
e7120e9
b4e1b44

import requests
from bs4 import BeautifulSoup
import streamlit as st
import time

urls = ['https://en.wikipedia.org/wiki/Health_care',
        'https://en.wikipedia.org/wiki/Health_information_on_the_Internet',
        'https://www.who.int/health-topics/coronavirus#tab=tab_1']

def scrape_wikipedia(url):
    try:
        start_time = time.time()
        response = requests.get(url)
        end_time = time.time()
        soup = BeautifulSoup(response.content, 'html.parser')
        div_element = soup.find('div', {'class': 'div-col columns column-width'})
        if div_element is not None:
            articles_list = div_element.find_all('li')
        else:
            articles_list = []
        return {'url': url, 'response_time': end_time - start_time, 'response': response, 'articles': articles_list}
    except:
        return {'url': url, 'response_time': None, 'response': None, 'articles': []}

def main():
    st.title("List of Articles on Health Care")

    data = []
    for url in urls:
        st.write(f"Scraping {url}...")
        scraped_data = scrape_wikipedia(url)
        st.write(f"Response time: {scraped_data['response_time']}")
        st.write(scraped_data['response'])
        for article in scraped_data['articles']:
            data.append({'url': scraped_data['url'], 'article': article.text})

    st.write('## Dataset')
    st.dataframe(data)

    st.write('## Grid')
    st.write('url', 'article')
    for d in data:
        st.write(d['url'], d['article'])

if __name__ == '__main__':
    main()