import streamlit as st from streamlit_chat import message import os from utils import ( parse_docx, parse_pdf, parse_txt, parse_csv, parse_pptx, search_docs, embed_docs, text_to_docs, get_answer, parse_any, get_sources, wrap_text_in_html, ) from openai.error import OpenAIError def clear_submit(): st.session_state["submit"] = False def set_openai_api_key(api_key: str): st.session_state["OPENAI_API_KEY"] = api_key st.markdown('

和文档聊聊💬 by I-Robot.Life

', unsafe_allow_html=True) # Sidebar index = None doc = None with st.sidebar: user_secret = st.text_input( "OpenAI API Key", type="password", placeholder="输入你的api-key (sk-开头)", help="api-key应该从官网获取 https://platform.openai.com/account/api-keys.", value=st.session_state.get("OPENAI_API_KEY", ""), ) if user_secret: set_openai_api_key(user_secret) uploaded_file = st.file_uploader( "上传你的文档,可以是pdf, docx, txt,扫描、影印的pdf暂不支持", type=["pdf", "docx", "txt", "csv", "pptx", "js", "py", "json", "html", "css", "md"], help="扫描、影印的pdf暂不支持!", on_change=clear_submit, ) if uploaded_file is not None: if uploaded_file.name.endswith(".pdf"): doc = parse_pdf(uploaded_file) elif uploaded_file.name.endswith(".docx"): doc = parse_docx(uploaded_file) elif uploaded_file.name.endswith(".csv"): doc = parse_csv(uploaded_file) elif uploaded_file.name.endswith(".txt"): doc = parse_txt(uploaded_file) elif uploaded_file.name.endswith(".pptx"): doc = parse_pptx(uploaded_file) else: doc = parse_any(uploaded_file) #st.error("文档格式不支持") #doc = None text = text_to_docs(doc) st.write(text) try: with st.spinner("正在拼命阅读... 你可以去接杯水再回来看看⏳"): index = embed_docs(text) st.session_state["api_key_configured"] = True except OpenAIError as e: st.error(e._message) tab1, tab2 = st.tabs(["Intro", "Chat with the File"]) with tab1: st.markdown("### 使用指南") st.write("1,输入可用的api-key.") st.write('2,上传文档...等待解析完成') st.write('3,提问,得到回答') st.write('感谢Daniel Avila,感谢Github', unsafe_allow_html=True) with tab2: st.write('从官网链接获取apikey link: https://openai.com/api/') if 'generated' not in st.session_state: st.session_state['generated'] = [] if 'past' not in st.session_state: st.session_state['past'] = [] def get_text(): if user_secret: st.header("关于文档,你想问..?") input_text = st.text_area("You:", on_change=clear_submit) return input_text user_input = get_text() button = st.button("Submit") if button or st.session_state.get("submit"): if not user_input: st.error("请输入问题") else: st.session_state["submit"] = True sources = search_docs(index, user_input) try: answer = get_answer(sources, user_input) st.session_state.past.append(user_input) st.session_state.generated.append(answer["output_text"].split("SOURCES: ")[0]) except OpenAIError as e: st.error(e._message) if st.session_state['generated']: for i in range(len(st.session_state['generated'])-1, -1, -1): message(st.session_state["generated"][i], key=str(i)) message(st.session_state['past'][i], is_user=True, key=str(i) + '_user')