Spaces:

awacke1
/

Transcript-AI-Learner-From-Youtube

Runtime error

App Files Files Community

awacke1 commited on Apr 16, 2023

Commit

1c12389

1 Parent(s): 2bc1c14

Update backupapp.py

Browse files

Files changed (1) hide show

backupapp.py +5 -31

backupapp.py CHANGED Viewed

@@ -1,89 +1,71 @@
 import streamlit as st
 import re
 import json
 import nltk
 from nltk.corpus import stopwords
 from nltk import FreqDist
-from graphviz import Digraph
-from collections import Counter
 nltk.download('punkt')
 nltk.download('stopwords')
 def remove_timestamps(text):
     return re.sub(r'\d{1,2}:\d{2}\n', '', text)
 def process_text(text):
     lines = text.split("\n")
     processed_lines = []
     for line in lines:
         if line:
             processed_lines.append(line)
     outline = ""
     for i, line in enumerate(processed_lines):
         if i % 2 == 0:
             outline += f"**{line}**\n"
         else:
             outline += f"- {line} 😄\n"
     return outline
 def create_jsonl_list(text):
     lines = text.split("\n")
     jsonl_list = []
     for line in lines:
         if line:
             jsonl_list.append({"text": line})
     return jsonl_list
 def unit_test(input_text):
     st.write("Test Text without Timestamps:")
     test_text_without_timestamps = remove_timestamps(input_text)
     st.write(test_text_without_timestamps)
     st.write("Test JSONL List:")
     test_jsonl_list = create_jsonl_list(test_text_without_timestamps)
     st.write(test_jsonl_list)
 def extract_high_information_words(text, top_n=10):
     words = nltk.word_tokenize(text)
     words = [word.lower() for word in words if word.isalpha()]
     stop_words = set(stopwords.words('english'))
     filtered_words = [word for word in words if word not in stop_words]
     freq_dist = FreqDist(filtered_words)
     high_information_words = [word for word, _ in freq_dist.most_common(top_n)]
     return high_information_words
 def create_relationship_graph(words):
     graph = Digraph()
     for index, word in enumerate(words):
         graph.node(str(index), word)
         if index > 0:
             graph.edge(str(index - 1), str(index), label=str(index))
     return graph
 def display_relationship_graph(words):
     graph = create_relationship_graph(words)
     st.graphviz_chart(graph)
 text_input = st.text_area("Enter text:", value="", height=300)
 text_without_timestamps = remove_timestamps(text_input)
@@ -186,20 +168,12 @@ that's it that's literally it this is the core idea now it turns out it's not
 difficult to formalize mathematically but this is really what's going on if in a neural network
 '''
 unit_test(unit_test_text_3)
 # Adding new functionality to the existing code
 text_without_timestamps = remove_timestamps(unit_test_text_2)
 top_words = extract_high_information_words(text_without_timestamps, 10)
 st.markdown("**Top 10 High Information Words:**")
 st.write(top_words)
 st.markdown("**Relationship Graph:**")
-display_relationship_graph(top_words)

 import streamlit as st
 import re
 import json
 import nltk
 from nltk.corpus import stopwords
 from nltk import FreqDist
 nltk.download('punkt')
 nltk.download('stopwords')
+from graphviz import Digraph
+from collections import Counter
 def remove_timestamps(text):
     return re.sub(r'\d{1,2}:\d{2}\n', '', text)
 def process_text(text):
     lines = text.split("\n")
     processed_lines = []
     for line in lines:
         if line:
             processed_lines.append(line)
     outline = ""
     for i, line in enumerate(processed_lines):
         if i % 2 == 0:
             outline += f"**{line}**\n"
         else:
             outline += f"- {line} 😄\n"
     return outline
 def create_jsonl_list(text):
     lines = text.split("\n")
     jsonl_list = []
     for line in lines:
         if line:
             jsonl_list.append({"text": line})
     return jsonl_list
 def unit_test(input_text):
     st.write("Test Text without Timestamps:")
     test_text_without_timestamps = remove_timestamps(input_text)
     st.write(test_text_without_timestamps)
     st.write("Test JSONL List:")
     test_jsonl_list = create_jsonl_list(test_text_without_timestamps)
     st.write(test_jsonl_list)
 def extract_high_information_words(text, top_n=10):
     words = nltk.word_tokenize(text)
     words = [word.lower() for word in words if word.isalpha()]
     stop_words = set(stopwords.words('english'))
     filtered_words = [word for word in words if word not in stop_words]
     freq_dist = FreqDist(filtered_words)
     high_information_words = [word for word, _ in freq_dist.most_common(top_n)]
     return high_information_words
 def create_relationship_graph(words):
     graph = Digraph()
     for index, word in enumerate(words):
         graph.node(str(index), word)
         if index > 0:
             graph.edge(str(index - 1), str(index), label=str(index))
     return graph
 def display_relationship_graph(words):
     graph = create_relationship_graph(words)
     st.graphviz_chart(graph)
 text_input = st.text_area("Enter text:", value="", height=300)
 text_without_timestamps = remove_timestamps(text_input)
 difficult to formalize mathematically but this is really what's going on if in a neural network
 '''
 unit_test(unit_test_text_3)
 # Adding new functionality to the existing code
 text_without_timestamps = remove_timestamps(unit_test_text_2)
 top_words = extract_high_information_words(text_without_timestamps, 10)
 st.markdown("**Top 10 High Information Words:**")
 st.write(top_words)
 st.markdown("**Relationship Graph:**")
+display_relationship_graph(top_words)