awacke1 commited on
Commit
2bc1c14
Β·
1 Parent(s): f39e046

Update backupapp.py

Browse files
Files changed (1) hide show
  1. backupapp.py +78 -1
backupapp.py CHANGED
@@ -1,6 +1,15 @@
 
1
  import streamlit as st
2
  import re
3
  import json
 
 
 
 
 
 
 
 
4
 
5
  def remove_timestamps(text):
6
  return re.sub(r'\d{1,2}:\d{2}\n', '', text)
@@ -41,6 +50,40 @@ def unit_test(input_text):
41
  test_jsonl_list = create_jsonl_list(test_text_without_timestamps)
42
  st.write(test_jsonl_list)
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  text_input = st.text_area("Enter text:", value="", height=300)
45
  text_without_timestamps = remove_timestamps(text_input)
46
 
@@ -125,4 +168,38 @@ it be parametrized with a neural net and you apply learning algorithm so I want
125
  learning works this is model free reinforcement learning the reinforcement learning has actually been used in practice everywhere but it's
126
  '''
127
 
128
- unit_test(unit_test_text_2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
  import streamlit as st
3
  import re
4
  import json
5
+ import nltk
6
+ from nltk.corpus import stopwords
7
+ from nltk import FreqDist
8
+ from graphviz import Digraph
9
+ from collections import Counter
10
+
11
+ nltk.download('punkt')
12
+ nltk.download('stopwords')
13
 
14
  def remove_timestamps(text):
15
  return re.sub(r'\d{1,2}:\d{2}\n', '', text)
 
50
  test_jsonl_list = create_jsonl_list(test_text_without_timestamps)
51
  st.write(test_jsonl_list)
52
 
53
+
54
+
55
+ def extract_high_information_words(text, top_n=10):
56
+ words = nltk.word_tokenize(text)
57
+ words = [word.lower() for word in words if word.isalpha()]
58
+
59
+ stop_words = set(stopwords.words('english'))
60
+ filtered_words = [word for word in words if word not in stop_words]
61
+
62
+ freq_dist = FreqDist(filtered_words)
63
+ high_information_words = [word for word, _ in freq_dist.most_common(top_n)]
64
+
65
+ return high_information_words
66
+
67
+
68
+ def create_relationship_graph(words):
69
+ graph = Digraph()
70
+
71
+ for index, word in enumerate(words):
72
+ graph.node(str(index), word)
73
+
74
+ if index > 0:
75
+ graph.edge(str(index - 1), str(index), label=str(index))
76
+
77
+ return graph
78
+
79
+
80
+ def display_relationship_graph(words):
81
+ graph = create_relationship_graph(words)
82
+ st.graphviz_chart(graph)
83
+
84
+
85
+
86
+
87
  text_input = st.text_area("Enter text:", value="", height=300)
88
  text_without_timestamps = remove_timestamps(text_input)
89
 
 
168
  learning works this is model free reinforcement learning the reinforcement learning has actually been used in practice everywhere but it's
169
  '''
170
 
171
+ unit_test(unit_test_text_2)
172
+
173
+ unit_test_text_3 = '''
174
+ ort try something new add
175
+ 9:17
176
+ randomness directions and compare the result to your expectation if the result
177
+ 9:25
178
+ surprises you if you find that the results exceeded your expectation then
179
+ 9:31
180
+ change your parameters to take those actions in the future that's it this is
181
+ 9:36
182
+ the fool idea of reinforcement learning try it out see if you like it and if you do do more of that in the future and
183
+ 9:44
184
+ that's it that's literally it this is the core idea now it turns out it's not
185
+ 9:49
186
+ difficult to formalize mathematically but this is really what's going on if in a neural network
187
+
188
+ '''
189
+
190
+ unit_test(unit_test_text_3)
191
+
192
+
193
+
194
+
195
+
196
+ # Adding new functionality to the existing code
197
+ text_without_timestamps = remove_timestamps(unit_test_text_2)
198
+ top_words = extract_high_information_words(text_without_timestamps, 10)
199
+ st.markdown("**Top 10 High Information Words:**")
200
+ st.write(top_words)
201
+
202
+ st.markdown("**Relationship Graph:**")
203
+ display_relationship_graph(top_words)
204
+
205
+