awacke1 commited on
Commit
bee76fe
·
1 Parent(s): 59ddf59

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -6
app.py CHANGED
@@ -1,8 +1,14 @@
1
  import requests
2
  from bs4 import BeautifulSoup
3
  import streamlit as st
 
 
4
 
5
- def scrape_wikipedia(url):
 
 
 
 
6
  response = requests.get(url)
7
  soup = BeautifulSoup(response.content, 'html.parser')
8
  div_element = soup.find('div', {'class': 'div-col columns column-width'})
@@ -12,12 +18,32 @@ def scrape_wikipedia(url):
12
  articles_list = []
13
  return articles_list
14
 
 
 
 
 
 
 
 
 
15
  def main():
16
- url = 'https://en.wikipedia.org/wiki/Health_care'
17
- articles_list = scrape_wikipedia(url)
18
- st.write("List of articles on health care:")
19
- for article in articles_list:
20
- st.write(article.text)
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  if __name__ == '__main__':
23
  main()
 
1
  import requests
2
  from bs4 import BeautifulSoup
3
  import streamlit as st
4
+ import asyncio
5
+ import concurrent.futures
6
 
7
+ urls = ['https://en.wikipedia.org/wiki/Health_care',
8
+ 'https://en.wikipedia.org/wiki/Health_information_on_the_Internet',
9
+ 'https://www.who.int/health-topics/coronavirus#tab=tab_1']
10
+
11
+ async def scrape_wikipedia(url):
12
  response = requests.get(url)
13
  soup = BeautifulSoup(response.content, 'html.parser')
14
  div_element = soup.find('div', {'class': 'div-col columns column-width'})
 
18
  articles_list = []
19
  return articles_list
20
 
21
+ async def scrape_urls(urls):
22
+ tasks = []
23
+ async with concurrent.futures.ThreadPoolExecutor(max_workers=len(urls)) as executor:
24
+ for url in urls:
25
+ tasks.append(asyncio.ensure_future(scrape_wikipedia(url)))
26
+ await asyncio.gather(*tasks)
27
+ return tasks
28
+
29
  def main():
30
+ st.title("List of Articles on Health Care")
31
+
32
+ loop = asyncio.get_event_loop()
33
+ tasks = loop.run_until_complete(scrape_urls(urls))
34
+
35
+ data = []
36
+ for task in tasks:
37
+ for article in task.result():
38
+ data.append({'url': task.result().index(article), 'article': article.text})
39
+
40
+ st.write('## Dataset')
41
+ st.dataframe(data)
42
+
43
+ st.write('## Grid')
44
+ st.write('url', 'article')
45
+ for d in data:
46
+ st.write(d['url'], d['article'])
47
 
48
  if __name__ == '__main__':
49
  main()