Spaces:
Runtime error
Runtime error
import streamlit as st | |
import re | |
import json | |
import nltk | |
from nltk.corpus import stopwords | |
from nltk import FreqDist | |
from graphviz import Digraph | |
from collections import Counter | |
nltk.download('punkt') | |
nltk.download('stopwords') | |
def remove_timestamps(text): | |
return re.sub(r'\d{1,2}:\d{2}\n', '', text) | |
def process_text(text): | |
lines = text.split("\n") | |
processed_lines = [] | |
for line in lines: | |
if line: | |
processed_lines.append(line) | |
outline = "" | |
for i, line in enumerate(processed_lines): | |
if i % 2 == 0: | |
outline += f"**{line}**\n" | |
else: | |
outline += f"- {line} π\n" | |
return outline | |
def create_jsonl_list(text): | |
lines = text.split("\n") | |
jsonl_list = [] | |
for line in lines: | |
if line: | |
jsonl_list.append({"text": line}) | |
return jsonl_list | |
def unit_test(input_text): | |
st.write("Test Text without Timestamps:") | |
test_text_without_timestamps = remove_timestamps(input_text) | |
st.write(test_text_without_timestamps) | |
st.write("Test JSONL List:") | |
test_jsonl_list = create_jsonl_list(test_text_without_timestamps) | |
st.write(test_jsonl_list) | |
def extract_high_information_words(text, top_n=10): | |
words = nltk.word_tokenize(text) | |
words = [word.lower() for word in words if word.isalpha()] | |
stop_words = set(stopwords.words('english')) | |
filtered_words = [word for word in words if word not in stop_words] | |
freq_dist = FreqDist(filtered_words) | |
high_information_words = [word for word, _ in freq_dist.most_common(top_n)] | |
return high_information_words | |
def create_relationship_graph(words): | |
graph = Digraph() | |
for index, word in enumerate(words): | |
graph.node(str(index), word) | |
if index > 0: | |
graph.edge(str(index - 1), str(index), label=str(index)) | |
return graph | |
def display_relationship_graph(words): | |
graph = create_relationship_graph(words) | |
st.graphviz_chart(graph) | |
text_input = st.text_area("Enter text:", value="", height=300) | |
text_without_timestamps = remove_timestamps(text_input) | |
st.markdown("**Text without Timestamps:**") | |
st.write(text_without_timestamps) | |
processed_text = process_text(text_without_timestamps) | |
st.markdown("**Markdown Outline with Emojis:**") | |
st.markdown(processed_text) | |
unit_test_text = ''' | |
1:42 | |
program the does very very well on your data then you will achieve the best | |
1:48 | |
generalization possible with a little bit of modification you can turn it into a precise theorem | |
1:54 | |
and on a very intuitive level it's easy to see what it should be the case if you | |
2:01 | |
have some data and you're able to find a shorter program which generates this | |
2:06 | |
data then you've essentially extracted all the all conceivable regularity from | |
2:11 | |
this data into your program and then you can use these objects to make the best predictions possible like if if you have | |
2:19 | |
data which is so complex but there is no way to express it as a shorter program | |
2:25 | |
then it means that your data is totally random there is no way to extract any regularity from it whatsoever now there | |
2:32 | |
is little known mathematical theory behind this and the proofs of these statements actually not even that hard | |
2:38 | |
but the one minor slight disappointment is that it's actually not possible at | |
2:44 | |
least given today's tools and understanding to find the best short program that explains or generates or | |
2:52 | |
solves your problem given your data this problem is computationally intractable | |
''' | |
unit_test(unit_test_text) | |
unit_test_text_2 = ''' | |
5 | |
to talk a little bit about reinforcement learning so reinforcement learning is a framework it's a framework of evaluating | |
6:53 | |
agents in their ability to achieve goals and complicated stochastic environments | |
6:58 | |
you've got an agent which is plugged into an environment as shown in the figure right here and for any given | |
7:06 | |
agent you can simply run it many times and compute its average reward now the | |
7:13 | |
thing that's interesting about the reinforcement learning framework is that there exist interesting useful | |
7:20 | |
reinforcement learning algorithms the framework existed for a long time it | |
7:25 | |
became interesting once we realized that good algorithms exist now these are there are perfect algorithms but they | |
7:31 | |
are good enough todo interesting things and all you want the mathematical | |
7:37 | |
problem is one where you need to maximize the expected reward now one | |
7:44 | |
important way in which the reinforcement learning framework is not quite complete is that it assumes that the reward is | |
7:50 | |
given by the environment you see this picture the agent sends an action while | |
7:56 | |
the reward sends it an observation in a both the observation and the reward backwards that's what the environment | |
8:01 | |
communicates back the way in which this is not the case in the real world is that we figure out | |
8:11 | |
what the reward is from the observation we reward ourselves we are not told | |
8:16 | |
environment doesn't say hey here's some negative reward it's our interpretation over census that lets us determine what | |
8:23 | |
the reward is and there is only one real true reward in life and this is | |
8:28 | |
existence or nonexistence and everything else is a corollary of that so well what | |
8:35 | |
should our agent be you already know the answer should be a neural network because whenever you want to do | |
8:41 | |
something dense it's going to be a neural network and you want the agent to map observations to actions so you let | |
8:47 | |
it be parametrized with a neural net and you apply learning algorithm so I want to explain to you how reinforcement | |
8:53 | |
learning works this is model free reinforcement learning the reinforcement learning has actually been used in practice everywhere but it's | |
''' | |
unit_test(unit_test_text_2) | |
unit_test_text_3 = ''' | |
ort try something new add | |
9:17 | |
randomness directions and compare the result to your expectation if the result | |
9:25 | |
surprises you if you find that the results exceeded your expectation then | |
9:31 | |
change your parameters to take those actions in the future that's it this is | |
9:36 | |
the fool idea of reinforcement learning try it out see if you like it and if you do do more of that in the future and | |
9:44 | |
that's it that's literally it this is the core idea now it turns out it's not | |
9:49 | |
difficult to formalize mathematically but this is really what's going on if in a neural network | |
''' | |
unit_test(unit_test_text_3) | |
# Adding new functionality to the existing code | |
text_without_timestamps = remove_timestamps(unit_test_text_2) | |
top_words = extract_high_information_words(text_without_timestamps, 10) | |
st.markdown("**Top 10 High Information Words:**") | |
st.write(top_words) | |
st.markdown("**Relationship Graph:**") | |
display_relationship_graph(top_words) | |