Spaces:

fracapuano
/

AISandbox

Runtime error

File size: 5,804 Bytes

import streamlit as st
from openai.error import OpenAIError
from .utils import *
from typing import Text, Union

multiple_files = False

def clear_submit():
    """
    Toggles the file_submitted internal session state variable to False.
    """
    st.session_state["file_submitted"] = False

def set_openai_api_key(api_key:Text)->bool:
    """Sets the internal OpenAI API key to the given value.

    Args:
        api_key (Text): OpenAI API key
    """
    if not (api_key.startswith('sk-') and len(api_key)==51):
        st.error("Invalid OpenAI API key! Please provide a valid key.")
        return False
    
    st.session_state["OPENAI_API_KEY"] = api_key
    st.session_state["api_key_configured"] = True
    return True

def file_to_doc(file:Union[PDFFile, DocxFile, TxtFile, CodeFile]) -> None:
    """Converts a file to a document using specialized parsers."""
    if file.name.endswith(".pdf"):
        doc = parse_pdf(file)
    elif file.name.endswith(".docx"):
        doc = parse_docx(file)
    elif file.name.split["."][1] in [".txt", ".py", ".json", ".html", ".css", ".md" ]:
        doc = parse_txt(file)
    else:
        st.error("File type not yet supported! Supported files: [.pdf, .docx, .txt, .py, .json, .html, .css, .md]")
        doc = None
    
    return doc

# this function can be used to define a single doc processing pipeline
# def document_embedding_pipeline(file:Union[PDFFile, DocxFile, TxtFile, CodeFile]) -> None:  

def qa_main():
    st.markdown("<h2>This app allows to chat with files!</h2>", unsafe_allow_html=True)
    st.write("Just upload something using and start chatting with a version of GPT4 that has read the file!")
    
    index = None
    doc = None

    upload_document_greenlight = False
    uploaded_processed_document_greenlight = False
    # OpenAI API Key - TODO: consider adding a key valid for everyone
    st.header("Configure OpenAI API Key")
    st.warning('Please enter your OpenAI API Key!', icon='⚠️')
    user_secret = st.text_input(
        "Insert your OpenAI API key here ([get your API key](https://platform.openai.com/account/api-keys)).",
        type="password",
        placeholder="Paste your OpenAI API key here (sk-...)",
        help="You can get your API key from https://platform.openai.com/account/api-keys.",
        value=st.session_state.get("OPENAI_API_KEY", ""),
    )
    if user_secret:
        if set_openai_api_key(user_secret):
            st.success('OpenAI API key successfully provided!', icon='✅')
            upload_document_greenlight = True
    
    if upload_document_greenlight:
        # File that needs to be queried
        st.header("Upload a file")
        uploaded_file = st.file_uploader(
            "Upload a pdf, docx, or txt file (scanned documents not supported)",
            type=["pdf", "docx", "txt", "py", "json", "html", "css", "md"],
            help="Scanned documents are not supported yet 🥲",
            on_change=clear_submit, 
            accept_multiple_files=multiple_files,
        )
            
        # reading the uploaded file
        if uploaded_file is not None:
            # toggle internal file submission state to True
            st.session_state["file_submitted"] = True
            # parse the file using custom parsers
            doc = file_to_doc(uploaded_file)
            # converts the files into a list of documents
            text = text_to_docs(text=tuple(doc))

            try:
                with st.spinner("Indexing the document... This might take a while!"):
                    index = embed_docs(tuple(text))
                    st.session_state["api_key_configured"] = True
            except OpenAIError as e:
                st.error("OpenAI error encountered: ", e._message)
        
            uploaded_processed_document_greenlight = True
        
    if uploaded_processed_document_greenlight: 
        if "messages" not in st.session_state:
            st.session_state["messages"] = []

        for message in st.session_state.messages:
            with st.chat_message(message["role"]):
                st.markdown(message["content"])

        if prompt := st.chat_input("Ask the document something..."):
            st.session_state.messages.append({"role": "user", "content": prompt})
            with st.chat_message("user"):
                st.markdown(prompt)

            with st.chat_message("assistant"):
                message_placeholder = st.empty()
                # retrieving the most relevant sources
                sources = search_docs(index, prompt)
                # producing the answer, live
                full_response = ""
                for answer_bit in get_answer(sources, prompt)["output_text"]:
                    full_response += answer_bit
                    message_placeholder.markdown(full_response + "▌")
                
                message_placeholder.markdown(full_response)

                # answer = get_answer(sources, prompt)
                # message_placeholder.markdown(answer["output_text"])
            
            # st.session_state.messages.append({"role": "assistant", "content": answer["output_text"]})
            st.session_state.messages.append({"role": "assistant", "content": full_response})

# This might be useful to add memory to the chatbot harnessing a more low-level approach
# llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")

# memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True, output_key='answer')
# retriever = your_vector_store.as_retriever()

# # Create the multipurpose chain
# qachat = ConversationalRetrievalChain.from_llm(
#     llm=ChatOpenAI(temperature=0),
#     memory=memory,
#     retriever=retriever, 
#     return_source_documents=True
# )

# qachat("Ask your question here...")