Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -2,7 +2,8 @@ import requests
|
|
2 |
from bs4 import BeautifulSoup
|
3 |
import streamlit as st
|
4 |
import time
|
5 |
-
import
|
|
|
6 |
from sklearn.feature_extraction.text import CountVectorizer
|
7 |
|
8 |
urls = ['https://en.wikipedia.org/wiki/Health_care',
|
@@ -26,10 +27,12 @@ def plot_word_frequencies(content):
|
|
26 |
for word in words:
|
27 |
word_freq[word] = word_freq.get(word, 0) + 1
|
28 |
sorted_word_freq = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
|
|
|
|
33 |
|
34 |
def display_top_words(content):
|
35 |
soup = BeautifulSoup(content, 'html.parser')
|
@@ -42,6 +45,7 @@ def display_top_words(content):
|
|
42 |
st.write(f"Top words: {', '.join(top_words)}")
|
43 |
|
44 |
def main():
|
|
|
45 |
st.title("List of Articles on Health Care")
|
46 |
|
47 |
for url in urls:
|
@@ -50,7 +54,7 @@ def main():
|
|
50 |
st.write(f"Response time: {scraped_data['response_time']}")
|
51 |
content = scraped_data['content']
|
52 |
st.write(f"Content: ")
|
53 |
-
st.
|
54 |
|
55 |
plot_word_frequencies(content)
|
56 |
display_top_words(content)
|
|
|
2 |
from bs4 import BeautifulSoup
|
3 |
import streamlit as st
|
4 |
import time
|
5 |
+
import plotly.express as px
|
6 |
+
import pandas as pd
|
7 |
from sklearn.feature_extraction.text import CountVectorizer
|
8 |
|
9 |
urls = ['https://en.wikipedia.org/wiki/Health_care',
|
|
|
27 |
for word in words:
|
28 |
word_freq[word] = word_freq.get(word, 0) + 1
|
29 |
sorted_word_freq = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
|
30 |
+
df = pd.DataFrame({'word': [word for word, freq in sorted_word_freq],
|
31 |
+
'freq': [freq for word, freq in sorted_word_freq],
|
32 |
+
'len': [len(word) for word, freq in sorted_word_freq]})
|
33 |
+
fig = px.treemap(df, path=['len', 'word'], values='freq', color='len')
|
34 |
+
fig.update_layout(margin=dict(l=0, r=0, t=0, b=0))
|
35 |
+
st.plotly_chart(fig)
|
36 |
|
37 |
def display_top_words(content):
|
38 |
soup = BeautifulSoup(content, 'html.parser')
|
|
|
45 |
st.write(f"Top words: {', '.join(top_words)}")
|
46 |
|
47 |
def main():
|
48 |
+
st.set_page_config(layout='wide')
|
49 |
st.title("List of Articles on Health Care")
|
50 |
|
51 |
for url in urls:
|
|
|
54 |
st.write(f"Response time: {scraped_data['response_time']}")
|
55 |
content = scraped_data['content']
|
56 |
st.write(f"Content: ")
|
57 |
+
st.markdown(f"```{content.decode()}```")
|
58 |
|
59 |
plot_word_frequencies(content)
|
60 |
display_top_words(content)
|