Spaces:

awacke1
/

Torch-Git-Markdown-NLP

Runtime error

App Files Files Community

Torch-Git-Markdown-NLP / version1-app.py

awacke1

Create version1-app.py

5cfa291 over 2 years ago

raw

history blame

2.09 kB

	import streamlit as st
	import requests
	from transformers import pipeline
	import plotly.express as px
	import pandas as pd
	from collections import Counter
	import re

	def get_markdown_from_github(url):
	response = requests.get(url)
	markdown = response.text
	return markdown

	def preprocess_text(text):
	text = text.lower()
	text = re.sub('[^A-Za-z0-9]+', ' ', text)
	return text

	def get_most_frequent_words(text, n):
	words = text.split()
	word_count = Counter(words)
	most_common_words = word_count.most_common(n)
	return most_common_words

	def get_sentences_with_common_words(text, common_words):
	sentences = re.split('[.?!]', text)
	selected_sentences = []
	for sentence in sentences:
	for word in common_words:
	if word in sentence:
	selected_sentences.append(sentence.strip())
	break
	return selected_sentences

	def render_heatmap(words, sentences):
	df = pd.DataFrame(words, columns=['word', 'frequency'])
	fig = px.treemap(df, path=['word'], values='frequency', color='frequency', hover_data=['frequency'], color_continuous_scale='reds')
	st.plotly_chart(fig, use_container_width=True)
	st.write('Sentences containing the most common words:')
	for sentence in sentences:
	st.write('- ' + sentence)

	def main():
	st.title('Markdown Analyzer')

	# Get markdown from GitHub
	markdown_url = 'https://github.com/AaronCWacker/Yggdrasil/blob/main/README.md'
	markdown = get_markdown_from_github(markdown_url)

	# Preprocess text
	text = preprocess_text(markdown)

	# Get most frequent words
	n_most_frequent_words = st.sidebar.slider('Number of most frequent words to display', 1, 20, 10)
	most_frequent_words = get_most_frequent_words(text, n_most_frequent_words)

	# Get sentences containing common words
	common_words = [word for word, _ in most_frequent_words]
	sentences = get_sentences_with_common_words(text, common_words)

	# Render heatmap
	render_heatmap(most_frequent_words, sentences)

	if __name__ == '__main__':
	main()