import streamlit as st import torch import torch.nn as nn import nltk from nltk.corpus import stopwords import pandas as pd import base64 # Ensure NLTK resources are downloaded nltk.download('punkt') nltk.download('stopwords') # Function to perform convolution on text data def text_convolution(input_text, kernel_size=3): words = nltk.word_tokenize(input_text) words = [word for word in words if word not in stopwords.words('english')] tensor_input = torch.tensor([hash(word) for word in words], dtype=torch.float) conv_layer = nn.Conv1d(1, 1, kernel_size, stride=1) tensor_input = tensor_input.view(1, 1, -1) output = conv_layer(tensor_input) return output, words # Streamlit UI def main(): st.title("Text Convolution Demonstration") st.write("This app demonstrates how text convolution works. Upload a text file and see the convolution result along with a distribution plot of word tokens.") uploaded_file = st.file_uploader("Choose a text file (TXT only)", type=["txt"]) user_email = st.text_input("Enter your email to save your prompts:") if uploaded_file is not None and user_email: text_data = uploaded_file.read().decode("utf-8") conv_result, words = text_convolution(text_data) st.write("Convolution result:", conv_result) # Visualization word_counts = pd.Series(words).value_counts() st.bar_chart(word_counts.head(20)) # Saving user prompts user_file_name = f"{user_email}_prompts.txt" with open(user_file_name, "a") as file: file.write(text_data + "\n") st.success(f"Your prompts have been added to {user_file_name}") # Download link for the file with open(user_file_name, "rb") as f: b64 = base64.b64encode(f.read()).decode() href = f'Download {user_file_name}' st.markdown(href, unsafe_allow_html=True) if __name__ == "__main__": main()