Spaces:

awacke1
/

VizLib-BeautifulSoup

Runtime error

awacke1 commited on Feb 21, 2023

Commit

f025397

1 Parent(s): c36fabe

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,43 +1,33 @@
 import requests
 from bs4 import BeautifulSoup
 import streamlit as st
-import asyncio
-import concurrent.futures
 urls = ['https://en.wikipedia.org/wiki/Health_care',
         'https://en.wikipedia.org/wiki/Health_information_on_the_Internet',
         'https://www.who.int/health-topics/coronavirus#tab=tab_1']
-async def scrape_wikipedia(url):
-    response = requests.get(url)
-    soup = BeautifulSoup(response.content, 'html.parser')
-    div_element = soup.find('div', {'class': 'div-col columns column-width'})
-    if div_element is not None:
-        articles_list = div_element.find_all('li')
-    else:
-        articles_list = []
-    return articles_list
-async def scrape_urls(urls):
-    tasks = []
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-    async with concurrent.futures.ThreadPoolExecutor(max_workers=len(urls)) as executor:
-        for url in urls:
-            tasks.append(loop.run_in_executor(executor, scrape_wikipedia, url))
-        await asyncio.gather(*tasks)
-        return tasks
 def main():
     st.title("List of Articles on Health Care")
-    loop = asyncio.get_event_loop()
-    tasks = loop.run_until_complete(scrape_urls(urls))
     data = []
-    for task in tasks:
-        for article in task.result():
-            data.append({'url': task.result().index(article), 'article': article.text})
     st.write('## Dataset')
     st.dataframe(data)

 import requests
 from bs4 import BeautifulSoup
 import streamlit as st
 urls = ['https://en.wikipedia.org/wiki/Health_care',
         'https://en.wikipedia.org/wiki/Health_information_on_the_Internet',
         'https://www.who.int/health-topics/coronavirus#tab=tab_1']
+def scrape_wikipedia(url):
+    try:
+        response = requests.get(url)
+        soup = BeautifulSoup(response.content, 'html.parser')
+        div_element = soup.find('div', {'class': 'div-col columns column-width'})
+        if div_element is not None:
+            articles_list = div_element.find_all('li')
+        else:
+            articles_list = []
+        return articles_list
+    except:
+        st.write(f"Error scraping {url}")
+        return []
 def main():
     st.title("List of Articles on Health Care")
     data = []
+    for url in urls:
+        articles_list = scrape_wikipedia(url)
+        for article in articles_list:
+            data.append({'url': urls.index(url), 'article': article.text})
     st.write('## Dataset')
     st.dataframe(data)