File size: 4,830 Bytes
21586f0
 
 
5defcd2
0a2414d
066fc55
820215f
0a2414d
 
 
 
 
21586f0
 
 
 
 
 
 
066fc55
21586f0
066fc55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7995560
066fc55
 
820215f
7995560
 
 
 
 
 
820215f
21586f0
0a2414d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
820215f
 
 
 
 
 
 
 
 
 
 
 
0a2414d
 
21586f0
820215f
21586f0
 
 
 
066fc55
 
 
820215f
 
0a2414d
066fc55
820215f
 
0a2414d
21586f0
066fc55
820215f
 
 
066fc55
 
 
 
 
7995560
 
 
066fc55
 
21586f0
820215f
 
21586f0
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import streamlit as st
import pandas as pd
import datetime
import io
import nltk
import base64
import os
from nltk.tokenize import sent_tokenize
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation

nltk.download('punkt')

def save_text_as_file(text, file_type):
    current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    file_name = f"text_file_{current_time}.{file_type}"
    with open(file_name, "w") as file:
        file.write(text)
    st.success(f"Text saved as {file_name}")
    return file_name

def save_list_as_excel(text):
    lines = text.split("\n")
    data = []
    for line in lines:
        if line.strip():
            parts = line.split(" - ", 1)
            if len(parts) == 2:
                data.append(parts)
            else:
                data.append([line.strip(), ""])
    df = pd.DataFrame(data, columns=["Character", "Description"])
    file_name = f"character_list_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
    df.to_excel(file_name, index=False)
    st.success(f"Character list saved as {file_name}")
    return file_name

def get_download_link(file_path, file_type):
    with open(file_path, 'rb') as f:
        data = f.read()
        b64 = base64.b64encode(data).decode()
        if file_type == "xlsx":
            href = f'<a href="data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{b64}" download="{file_path}">Download {file_path}</a>'
        elif file_type == "csv":
            href = f'<a href="data:text/csv;base64,{b64}" download="{file_path}">Download {file_path}</a>'
        elif file_type == "md":
            href = f'<a href="data:text/markdown;base64,{b64}" download="{file_path}">Download {file_path}</a>'
        return href

def perform_nlp(text):
    sentences = sent_tokenize(text)
    # Topic Modeling
    vectorizer = CountVectorizer(stop_words='english')
    X = vectorizer.fit_transform(sentences)
    lda = LatentDirichletAllocation(n_components=3, random_state=42)
    lda.fit(X)
    topics = lda.transform(X)
    # Display topics
    st.subheader("Topic Modeling")
    for i, topic in enumerate(topics):
        st.write(f"Topic {i+1}:")
        topic_words = ", ".join([vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-6:-1]])
        st.write(topic_words)
    # Word Frequency
    word_freq = pd.Series(" ".join(sentences).split()).value_counts()[:10]
    st.subheader("Word Frequency")
    st.bar_chart(word_freq)

def show_files_in_directory():
    st.subheader("Files in Current Directory")
    files = []
    for file in os.listdir("."):
        if file.endswith((".md", ".xlsx", ".csv")):
            file_size = os.path.getsize(file)
            file_modified_time = datetime.datetime.fromtimestamp(os.path.getmtime(file)).strftime("%Y-%m-%d %H:%M:%S")
            files.append({"File Name": file, "Size (bytes)": file_size, "Last Modified": file_modified_time})
    files_df = pd.DataFrame(files)
    files_df["File Name"] = files_df["File Name"].apply(lambda x: f'<a href="{x}" download>{x}</a>')
    st.write(files_df.to_html(escape=False, index=False), unsafe_allow_html=True)

def main():
    st.title("AI UI for Text Processing")
    text_input = st.text_area("Paste your text here")

    if st.button("Process Text"):
        if text_input.strip() == "":
            st.warning("Please paste some text.")
        else:
            file_name = None
            if text_input.strip().startswith(("1.", "1 -", "1 _")) and "\n" in text_input:
                file_name = save_list_as_excel(text_input)
                save_text_as_file(text_input, "csv")
                save_text_as_file(text_input, "md")
            elif "." in text_input or "!" in text_input or "?" in text_input:
                file_name = save_text_as_file(text_input, "txt")
                save_text_as_file(text_input, "csv")
                save_text_as_file(text_input, "md")
                perform_nlp(text_input)
            else:
                file_name = save_text_as_file(text_input, "txt")
                save_text_as_file(text_input, "csv")
                save_text_as_file(text_input, "md")

            if file_name:
                try:
                    df = pd.read_excel(file_name)
                    st.subheader("Saved Data")
                    st.dataframe(df)
                    st.markdown(get_download_link(file_name, "xlsx"), unsafe_allow_html=True)
                    st.markdown(get_download_link(file_name.replace(".xlsx", ".csv"), "csv"), unsafe_allow_html=True)
                    st.markdown(get_download_link(file_name.replace(".xlsx", ".md"), "md"), unsafe_allow_html=True)
                except:
                    pass

    show_files_in_directory()

if __name__ == "__main__":
    main()