|
|
|
import streamlit as st |
|
import torch |
|
import torch.nn as nn |
|
import nltk |
|
from nltk.corpus import stopwords |
|
import pandas as pd |
|
import base64 |
|
|
|
|
|
nltk.download('punkt') |
|
nltk.download('stopwords') |
|
|
|
|
|
def text_convolution(input_text, kernel_size=3): |
|
words = nltk.word_tokenize(input_text) |
|
words = [word for word in words if word not in stopwords.words('english')] |
|
tensor_input = torch.tensor([hash(word) for word in words], dtype=torch.float) |
|
conv_layer = nn.Conv1d(1, 1, kernel_size, stride=1) |
|
tensor_input = tensor_input.view(1, 1, -1) |
|
output = conv_layer(tensor_input) |
|
return output, words |
|
|
|
|
|
def main(): |
|
st.title("Text Convolution Demonstration") |
|
st.write("This app demonstrates how text convolution works. Upload a text file and see the convolution result along with a distribution plot of word tokens.") |
|
|
|
uploaded_file = st.file_uploader("Choose a text file (TXT only)", type=["txt"]) |
|
user_email = st.text_input("Enter your email to save your prompts:") |
|
|
|
if uploaded_file is not None and user_email: |
|
text_data = uploaded_file.read().decode("utf-8") |
|
conv_result, words = text_convolution(text_data) |
|
st.write("Convolution result:", conv_result) |
|
|
|
|
|
word_counts = pd.Series(words).value_counts() |
|
st.bar_chart(word_counts.head(20)) |
|
|
|
|
|
user_file_name = f"{user_email}_prompts.txt" |
|
with open(user_file_name, "a") as file: |
|
file.write(text_data + "\n") |
|
st.success(f"Your prompts have been added to {user_file_name}") |
|
|
|
|
|
with open(user_file_name, "rb") as f: |
|
b64 = base64.b64encode(f.read()).decode() |
|
href = f'<a href="data:file/txt;base64,{b64}" download="{user_file_name}">Download {user_file_name}</a>' |
|
st.markdown(href, unsafe_allow_html=True) |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|