awacke1's picture
Create app.py
c8fbdd5
raw
history blame
6.72 kB
import streamlit as st
import openai
import os
import base64
import glob
import json
import mistune
import pytz
import math
from datetime import datetime
from openai import ChatCompletion
from xml.etree import ElementTree as ET
from bs4 import BeautifulSoup
from collections import deque
openai.api_key = os.getenv('OPENAI_KEY')
st.set_page_config(
page_title="GPT Streamlit Document Reasoner",
layout="wide")
menu = ["txt", "htm", "md", "py"]
choice = st.sidebar.selectbox("Output file type:", menu)
choicePrefix = "Output file type is "
if choice == "txt":
st.sidebar.write(choicePrefix + "Text File.")
elif choice == "htm":
st.sidebar.write(choicePrefix + "HTML5.")
elif choice == "md":
st.sidebar.write(choicePrefix + "Markdown.")
elif choice == "py":
st.sidebar.write(choicePrefix + "Python Code.")
def generate_filename(prompt, file_type):
central = pytz.timezone('US/Central')
safe_date_time = datetime.now(central).strftime("%m%d_%I%M")
safe_prompt = "".join(x for x in prompt if x.isalnum())[:28]
return f"{safe_date_time}_{safe_prompt}.{file_type}"
def create_file(filename, prompt, response):
if filename.endswith(".txt"):
with open(filename, 'w') as file:
file.write(f"Prompt:\n{prompt}\nResponse:\n{response}")
elif filename.endswith(".htm"):
with open(filename, 'w') as file:
file.write(f"<h1>Prompt:</h1> <p>{prompt}</p> <h1>Response:</h1> <p>{response}</p>")
elif filename.endswith(".md"):
with open(filename, 'w') as file:
file.write(f"# Prompt:\n{prompt}\n# Response:\n{response}")
def truncate_document(document, length):
return document[:length]
def divide_document(document, max_length):
return [document[i:i+max_length] for i in range(0, len(document), max_length)]
def chat_with_model(prompt, document_section):
model = "gpt-3.5-turbo"
conversation = [{'role': 'system', 'content': 'You are a helpful assistant.'}]
conversation.append({'role': 'user', 'content': prompt})
conversation.append({'role': 'assistant', 'content': document_section})
response = openai.ChatCompletion.create(model=model, messages=conversation)
return response['choices'][0]['message']['content']
def get_table_download_link(file_path):
with open(file_path, 'r') as file:
data = file.read()
b64 = base64.b64encode(data.encode()).decode()
file_name = os.path.basename(file_path)
ext = os.path.splitext(file_name)[1] # get the file extension
if ext == '.txt':
mime_type = 'text/plain'
elif ext == '.htm':
mime_type = 'text/html'
elif ext == '.md':
mime_type = 'text/markdown'
else:
mime_type = 'application/octet-stream' # general binary data type
href = f'<a href="data:{mime_type};base64,{b64}" target="_blank" download="{file_name}">{file_name}</a>'
return href
def CompressXML(xml_text):
root = ET.fromstring(xml_text)
for elem in list(root.iter()):
if isinstance(elem.tag, str) and 'Comment' in elem.tag:
elem.parent.remove(elem)
return ET.tostring(root, encoding='unicode', method="xml")
def read_file_content(file,max_length):
if file.type == "application/json":
content = json.load(file)
return str(content)
elif file.type == "text/html" or file.type == "text/htm":
content = BeautifulSoup(file, "html.parser")
return content.text
elif file.type == "application/xml" or file.type == "text/xml":
tree = ET.parse(file)
root = tree.getroot()
xml = CompressXML(ET.tostring(root, encoding='unicode'))
return xml
elif file.type == "text/markdown" or file.type == "text/md":
md = mistune.create_markdown()
content = md(file.read().decode())
return content
elif file.type == "text/plain":
return file.getvalue().decode()
else:
return ""
def main():
user_prompt = st.text_area("Your question:", '', height=120)
collength, colupload = st.columns([2,3]) # adjust the ratio as needed
with collength:
#max_length = 12000 - optimal for gpt35 turbo. 2x=24000 for gpt4. 8x=96000 for gpt4-32k.
max_length = st.slider("Context Section Length", min_value=1000, max_value=128000, value=12000, step=1000)
with colupload:
uploaded_file = st.file_uploader("Choose a file", type=["xml", "json", "html", "htm", "md", "txt"])
document_sections = deque()
document_responses = {}
if uploaded_file is not None:
file_content = read_file_content(uploaded_file, max_length)
document_sections.extend(divide_document(file_content, max_length))
if len(document_sections) > 0:
if st.button("πŸ‘οΈ View Upload"):
st.markdown("**Sections of the uploaded file:**")
for i, section in enumerate(list(document_sections)):
st.markdown(f"**Section {i+1}**\n{section}")
st.markdown("**Chat with the model:**")
for i, section in enumerate(list(document_sections)):
if i in document_responses:
st.markdown(f"**Section {i+1}**\n{document_responses[i]}")
else:
if st.button(f"Chat about Section {i+1}"):
st.write('Thinking and Reasoning with your inputs...')
response = chat_with_model(user_prompt, section)
st.write('Response:')
st.write(response)
document_responses[i] = response
filename = generate_filename(f"{user_prompt}_section_{i+1}", choice)
create_file(filename, user_prompt, response)
st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
if st.button('πŸ’¬ Chat'):
st.write('Thinking and Reasoning with your inputs...')
response = chat_with_model(user_prompt, ''.join(list(document_sections)))
st.write('Response:')
st.write(response)
filename = generate_filename(user_prompt, choice)
create_file(filename, user_prompt, response)
st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
all_files = glob.glob("*.txt") + glob.glob("*.htm") + glob.glob("*.md")
for file in all_files:
col1, col2 = st.sidebar.columns([4,1]) # adjust the ratio as needed
with col1:
st.markdown(get_table_download_link(file), unsafe_allow_html=True)
with col2:
if st.button("πŸ—‘", key=file):
os.remove(file)
st.experimental_rerun()
if __name__ == "__main__":
main()