Spaces:

awacke1
/

VizLib-BeautifulSoup

Runtime error

awacke1 commited on Feb 21, 2023

Commit

42b3f33

1 Parent(s): fae8400

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,7 +2,8 @@ import requests
 from bs4 import BeautifulSoup
 import streamlit as st
 import time
-import matplotlib.pyplot as plt
 from sklearn.feature_extraction.text import CountVectorizer
 urls = ['https://en.wikipedia.org/wiki/Health_care',
@@ -26,10 +27,12 @@ def plot_word_frequencies(content):
     for word in words:
         word_freq[word] = word_freq.get(word, 0) + 1
     sorted_word_freq = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
-    top_words = [word for word, freq in sorted_word_freq[:10]]
-    plt.bar(top_words, [word_freq[word] for word in top_words])
-    plt.xticks(rotation=45)
-    st.pyplot()
 def display_top_words(content):
     soup = BeautifulSoup(content, 'html.parser')
@@ -42,6 +45,7 @@ def display_top_words(content):
     st.write(f"Top words: {', '.join(top_words)}")
 def main():
     st.title("List of Articles on Health Care")
     for url in urls:
@@ -50,7 +54,7 @@ def main():
         st.write(f"Response time: {scraped_data['response_time']}")
         content = scraped_data['content']
         st.write(f"Content: ")
-        st.write(content.decode(), unsafe_allow_html=True)
         plot_word_frequencies(content)
         display_top_words(content)

 from bs4 import BeautifulSoup
 import streamlit as st
 import time
+import plotly.express as px
+import pandas as pd
 from sklearn.feature_extraction.text import CountVectorizer
 urls = ['https://en.wikipedia.org/wiki/Health_care',
     for word in words:
         word_freq[word] = word_freq.get(word, 0) + 1
     sorted_word_freq = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
+    df = pd.DataFrame({'word': [word for word, freq in sorted_word_freq],
+                       'freq': [freq for word, freq in sorted_word_freq],
+                       'len': [len(word) for word, freq in sorted_word_freq]})
+    fig = px.treemap(df, path=['len', 'word'], values='freq', color='len')
+    fig.update_layout(margin=dict(l=0, r=0, t=0, b=0))
+    st.plotly_chart(fig)
 def display_top_words(content):
     soup = BeautifulSoup(content, 'html.parser')
     st.write(f"Top words: {', '.join(top_words)}")
 def main():
+    st.set_page_config(layout='wide')
     st.title("List of Articles on Health Care")
     for url in urls:
         st.write(f"Response time: {scraped_data['response_time']}")
         content = scraped_data['content']
         st.write(f"Content: ")
+        st.markdown(f"```{content.decode()}```")
         plot_word_frequencies(content)
         display_top_words(content)