awacke1 commited on
Commit
58e20ab
·
1 Parent(s): cfaabfd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -0
app.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ from transformers import pipeline
4
+ import plotly.express as px
5
+ import pandas as pd
6
+ from collections import Counter
7
+ import re
8
+
9
+ def get_markdown_from_github(url):
10
+ response = requests.get(url)
11
+ markdown = response.text
12
+ return markdown
13
+
14
+ def preprocess_text(text):
15
+ text = text.lower()
16
+ text = re.sub('[^A-Za-z0-9]+', ' ', text)
17
+ return text
18
+
19
+ def get_most_frequent_words(text, n):
20
+ words = text.split()
21
+ word_count = Counter(words)
22
+ most_common_words = word_count.most_common(n)
23
+ return most_common_words
24
+
25
+ def get_sentences_with_common_words(text, common_words):
26
+ sentences = re.split('[.?!]', text)
27
+ selected_sentences = []
28
+ for sentence in sentences:
29
+ for word in common_words:
30
+ if word in sentence:
31
+ selected_sentences.append(sentence.strip())
32
+ break
33
+ return selected_sentences
34
+
35
+ def render_heatmap(words, sentences):
36
+ df = pd.DataFrame(words, columns=['word', 'frequency'])
37
+ fig = px.treemap(df, path=['word'], values='frequency', color='frequency', hover_data=['frequency'], color_continuous_scale='reds')
38
+ st.plotly_chart(fig, use_container_width=True)
39
+ st.write('Sentences containing the most common words:')
40
+ for sentence in sentences:
41
+ st.write('- ' + sentence)
42
+
43
+ def main():
44
+ st.title('Markdown Analyzer')
45
+
46
+ # Get markdown from GitHub
47
+ markdown_url = 'https://github.com/AaronCWacker/Yggdrasil/blob/main/README.md'
48
+ markdown = get_markdown_from_github(markdown_url)
49
+
50
+ # Preprocess text
51
+ text = preprocess_text(markdown)
52
+
53
+ # Get most frequent words
54
+ n_most_frequent_words = st.sidebar.slider('Number of most frequent words to display', 1, 20, 10)
55
+ most_frequent_words = get_most_frequent_words(text, n_most_frequent_words)
56
+
57
+ # Get sentences containing common words
58
+ common_words = [word for word, _ in most_frequent_words]
59
+ sentences = get_sentences_with_common_words(text, common_words)
60
+
61
+ # Render heatmap
62
+ render_heatmap(most_frequent_words, sentences)
63
+
64
+ if __name__ == '__main__':
65
+ main()