awacke1 commited on
Commit
cf20733
·
1 Parent(s): a55c8f5

Update backup-app.py

Browse files
Files changed (1) hide show
  1. backup-app.py +11 -7
backup-app.py CHANGED
@@ -2,7 +2,8 @@ import requests
2
  from bs4 import BeautifulSoup
3
  import streamlit as st
4
  import time
5
- import matplotlib.pyplot as plt
 
6
  from sklearn.feature_extraction.text import CountVectorizer
7
 
8
  urls = ['https://en.wikipedia.org/wiki/Health_care',
@@ -26,10 +27,12 @@ def plot_word_frequencies(content):
26
  for word in words:
27
  word_freq[word] = word_freq.get(word, 0) + 1
28
  sorted_word_freq = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
29
- top_words = [word for word, freq in sorted_word_freq[:10]]
30
- plt.bar(top_words, [word_freq[word] for word in top_words])
31
- plt.xticks(rotation=45)
32
- st.pyplot()
 
 
33
 
34
  def display_top_words(content):
35
  soup = BeautifulSoup(content, 'html.parser')
@@ -42,6 +45,7 @@ def display_top_words(content):
42
  st.write(f"Top words: {', '.join(top_words)}")
43
 
44
  def main():
 
45
  st.title("List of Articles on Health Care")
46
 
47
  for url in urls:
@@ -50,10 +54,10 @@ def main():
50
  st.write(f"Response time: {scraped_data['response_time']}")
51
  content = scraped_data['content']
52
  st.write(f"Content: ")
53
- st.write(content.decode(), unsafe_allow_html=True)
54
 
55
  plot_word_frequencies(content)
56
  display_top_words(content)
57
 
58
  if __name__ == '__main__':
59
- main()
 
2
  from bs4 import BeautifulSoup
3
  import streamlit as st
4
  import time
5
+ import plotly.express as px
6
+ import pandas as pd
7
  from sklearn.feature_extraction.text import CountVectorizer
8
 
9
  urls = ['https://en.wikipedia.org/wiki/Health_care',
 
27
  for word in words:
28
  word_freq[word] = word_freq.get(word, 0) + 1
29
  sorted_word_freq = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
30
+ df = pd.DataFrame({'word': [word for word, freq in sorted_word_freq],
31
+ 'freq': [freq for word, freq in sorted_word_freq],
32
+ 'len': [len(word) for word, freq in sorted_word_freq]})
33
+ fig = px.treemap(df, path=['len', 'word'], values='freq', color='len')
34
+ fig.update_layout(margin=dict(l=0, r=0, t=0, b=0))
35
+ st.plotly_chart(fig)
36
 
37
  def display_top_words(content):
38
  soup = BeautifulSoup(content, 'html.parser')
 
45
  st.write(f"Top words: {', '.join(top_words)}")
46
 
47
  def main():
48
+ st.set_page_config(layout='wide')
49
  st.title("List of Articles on Health Care")
50
 
51
  for url in urls:
 
54
  st.write(f"Response time: {scraped_data['response_time']}")
55
  content = scraped_data['content']
56
  st.write(f"Content: ")
57
+ st.markdown(f"```{content.decode()}```")
58
 
59
  plot_word_frequencies(content)
60
  display_top_words(content)
61
 
62
  if __name__ == '__main__':
63
+ main()