Spaces:

awacke1
/

Torch-Git-Markdown-NLP

Runtime error

App Files Files Community

awacke1 commited on Mar 11, 2023

Commit

58e20ab

1 Parent(s): cfaabfd

Create app.py

Browse files

Files changed (1) hide show

app.py +65 -0

app.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import streamlit as st
+import requests
+from transformers import pipeline
+import plotly.express as px
+import pandas as pd
+from collections import Counter
+import re
+def get_markdown_from_github(url):
+    response = requests.get(url)
+    markdown = response.text
+    return markdown
+def preprocess_text(text):
+    text = text.lower()
+    text = re.sub('[^A-Za-z0-9]+', ' ', text)
+    return text
+def get_most_frequent_words(text, n):
+    words = text.split()
+    word_count = Counter(words)
+    most_common_words = word_count.most_common(n)
+    return most_common_words
+def get_sentences_with_common_words(text, common_words):
+    sentences = re.split('[.?!]', text)
+    selected_sentences = []
+    for sentence in sentences:
+        for word in common_words:
+            if word in sentence:
+                selected_sentences.append(sentence.strip())
+                break
+    return selected_sentences
+def render_heatmap(words, sentences):
+    df = pd.DataFrame(words, columns=['word', 'frequency'])
+    fig = px.treemap(df, path=['word'], values='frequency', color='frequency', hover_data=['frequency'], color_continuous_scale='reds')
+    st.plotly_chart(fig, use_container_width=True)
+    st.write('Sentences containing the most common words:')
+    for sentence in sentences:
+        st.write('- ' + sentence)
+def main():
+    st.title('Markdown Analyzer')
+    # Get markdown from GitHub
+    markdown_url = 'https://github.com/AaronCWacker/Yggdrasil/blob/main/README.md'
+    markdown = get_markdown_from_github(markdown_url)
+    # Preprocess text
+    text = preprocess_text(markdown)
+    # Get most frequent words
+    n_most_frequent_words = st.sidebar.slider('Number of most frequent words to display', 1, 20, 10)
+    most_frequent_words = get_most_frequent_words(text, n_most_frequent_words)
+    # Get sentences containing common words
+    common_words = [word for word, _ in most_frequent_words]
+    sentences = get_sentences_with_common_words(text, common_words)
+    # Render heatmap
+    render_heatmap(most_frequent_words, sentences)
+if __name__ == '__main__':
+    main()