File size: 5,187 Bytes
1c744c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc3d855
1c744c7
 
bc3d855
1c744c7
bc3d855
1c744c7
bc3d855
1c744c7
bc3d855
302ccf8
1c744c7
bc3d855
1f6598f
bc3d855
1c744c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import streamlit as st
from streamlit_chat import message
import os
from utils import (
    parse_docx,
    parse_pdf,
    parse_txt,
    parse_csv,
    search_docs,
    embed_docs,
    text_to_docs,
    get_answer,
    get_sources,
    wrap_text_in_html,
)
from openai.error import OpenAIError

def clear_submit():
    st.session_state["submit"] = False

def set_openai_api_key(api_key: str):
    st.session_state["OPENAI_API_KEY"] = api_key

st.markdown('<h1>File GPT 🤖<small> by <a href="https://codegpt.co">Code GPT</a></small></h1>', unsafe_allow_html=True)

# Sidebar
index = None
doc = None
with st.sidebar:
    user_secret = st.text_input(
        "OpenAI API Key",
        type="password",
        placeholder="Paste your OpenAI API key here (sk-...)",
        help="You can get your API key from https://platform.openai.com/account/api-keys.",
        value=st.session_state.get("OPENAI_API_KEY", ""),
    )
    if user_secret:
        set_openai_api_key(user_secret)

    uploaded_file = st.file_uploader(
        "Upload a pdf, docx, or txt file",
        type=["pdf", "docx", "txt", "csv"],
        help="Scanned documents are not supported yet!",
        on_change=clear_submit,
    )

    if uploaded_file is not None:
        if uploaded_file.name.endswith(".pdf"):
            doc = parse_pdf(uploaded_file)
        elif uploaded_file.name.endswith(".docx"):
            doc = parse_docx(uploaded_file)
        elif uploaded_file.name.endswith(".csv"):
            doc = parse_csv(uploaded_file)
        elif uploaded_file.name.endswith(".txt"):
            doc = parse_txt(uploaded_file)
        else:
            st.error("File type not supported")
            doc = None
        text = text_to_docs(doc)
        try:
            with st.spinner("Indexing document... This may take a while⏳"):
                index = embed_docs(text)
                st.session_state["api_key_configured"] = True
        except OpenAIError as e:
            st.error(e._message)

tab1, tab2 = st.tabs(["Intro", "Chat with the File"])
with tab1:
    st.markdown("### How does it work?")
    st.markdown('<p>Read the article to know how it works: <a target="_blank" href="https://medium.com/@dan.avila7/file-gpt-conversaci%C3%B3n-por-chat-con-un-archivo-698d17570358">Medium Article</a></p>', unsafe_allow_html=True)
    st.write("File GPT was written with the following tools:")
    st.markdown("#### Code GPT")
    st.write('All code was written with the help of Code GPT. Visit https://codegpt.co to get the extension.')
    st.markdown("#### Streamlit")
    st.write('The design was written with <a target="_blank" href="https://streamlit.io/">Streamlit</a>.', unsafe_allow_html=True)
    st.markdown("#### LangChain")
    st.write('Question answering with source <a target="_blank" href="https://langchain.readthedocs.io/en/latest/use_cases/question_answering.html#adding-in-sources">Langchain QA</a>.', unsafe_allow_html=True)
    st.markdown("#### Embedding")
    st.write('<a target="_blank" href="https://platform.openai.com/docs/guides/embeddings">Embedding</a> is done via the OpenAI API with "text-embedding-ada-002"', unsafe_allow_html=True)
    st.write("Please note that you must have credits in your OpenAI account to use this tool. Each file uploaded to the platform consumes credits for embedding and each query consumes credits to obtain the response.")
    st.markdown("""---""")
    st.write('Author:  <a target="_blank" href="https://www.linkedin.com/in/daniel-avila-arias/">Daniel Avila</a>', unsafe_allow_html=True)
    st.write('Repo: <a target="_blank" href="https://github.com/davila7/file-gpt">Github</a>', unsafe_allow_html=True)
    st.write("This software was developed with Code GPT, for more information visit: https://codegpt.co", unsafe_allow_html=True)

with tab2:
    st.write('To obtain an API Key you must create an OpenAI account at the following link: https://openai.com/api/')
    if 'generated' not in st.session_state:
        st.session_state['generated'] = []

    if 'past' not in st.session_state:
        st.session_state['past'] = []

    def get_text():
        if user_secret:
            st.header("Ask me something about the document:")
            input_text = st.text_area("You:", on_change=clear_submit)
            return input_text
    user_input = get_text()

    button = st.button("Submit")
    if button or st.session_state.get("submit"):
        if not user_input:
            st.error("Please enter a question!")
        else:
            st.session_state["submit"] = True
            sources = search_docs(index, user_input)
            try:
                answer = get_answer(sources, user_input)
                st.session_state.past.append(user_input)
                st.session_state.generated.append(answer["output_text"].split("SOURCES: ")[0])
            except OpenAIError as e:
                st.error(e._message)
            if st.session_state['generated']:
                for i in range(len(st.session_state['generated'])-1, -1, -1):
                    message(st.session_state["generated"][i], key=str(i))
                    message(st.session_state['past'][i], is_user=True, key=str(i) + '_user')