awacke1 commited on
Commit
66bad60
·
1 Parent(s): cccc5f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +171 -52
app.py CHANGED
@@ -1,46 +1,65 @@
1
  import streamlit as st
2
  import openai
3
  import os
 
 
4
  import json
 
 
 
5
  import requests
 
6
  from datetime import datetime
7
- from collections import deque
8
  from openai import ChatCompletion
 
 
 
9
  from audio_recorder_streamlit import audio_recorder
10
 
11
- # Initialize configurations
12
- configurations = {}
13
- config_file = "configurations.json"
14
- if os.path.exists(config_file):
15
- with open(config_file, "r") as file:
16
- configurations = json.load(file)
17
-
18
  openai.api_key = os.getenv('OPENAI_KEY')
19
- st.set_page_config(page_title="GPT Streamlit Document Reasoner", layout="wide")
20
 
 
 
21
  model_choice = st.sidebar.radio("Select Model:", ('gpt-3.5-turbo', 'gpt-3.5-turbo-0301'))
22
 
23
- user_prompt = st.text_area(
24
- "Enter prompts, instructions & questions:",
25
- configurations.get("user_prompt", ""),
26
- height=100
27
- )
28
- system_prompt = configurations.get("system_prompt", "You are a helpful assistant.")
29
-
30
  def generate_filename(prompt, file_type):
31
- safe_date_time = datetime.now().strftime("%m%d_%I%M")
 
32
  safe_prompt = "".join(x for x in prompt if x.isalnum())[:45]
33
  return f"{safe_date_time}_{safe_prompt}.{file_type}"
34
 
35
  def chat_with_model(prompt, document_section):
36
- conversation = [{'role': 'system', 'content': system_prompt}]
 
37
  conversation.append({'role': 'user', 'content': prompt})
38
- if document_section:
39
  conversation.append({'role': 'assistant', 'content': document_section})
40
- response = openai.ChatCompletion.create(model=model_choice, messages=conversation)
41
  return response
42
-
43
- def save_and_play_audio():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  audio_bytes = audio_recorder()
45
  if audio_bytes:
46
  filename = generate_filename("Recording", "wav")
@@ -51,41 +70,141 @@ def save_and_play_audio():
51
  return None
52
 
53
  def create_file(filename, prompt, response):
54
- with open(filename, 'w') as file:
55
- file.write(f"Prompt:\n{prompt}\nResponse:\n{response}")
56
-
 
 
 
 
 
 
 
 
57
  def divide_document(document, max_length):
58
  return [document[i:i+max_length] for i in range(0, len(document), max_length)]
59
-
60
- def handle_uploaded_file(uploaded_file, max_length):
61
- file_content = uploaded_file.read().decode()
62
- return divide_document(file_content, max_length)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  def main():
65
- max_length = st.slider("File section length for large files", min_value=1000, max_value=128000, value=12000, step=1000)
66
- uploaded_file = st.file_uploader("Add a file for context:", type=["txt"])
67
-
 
 
 
 
 
 
68
  document_sections = deque()
69
- if uploaded_file is not None:
70
- document_sections.extend(handle_uploaded_file(uploaded_file, max_length))
71
-
72
  document_responses = {}
73
- for i, section in enumerate(document_sections):
74
- if st.button(f"Chat about Section {i+1}"):
75
- response = chat_with_model(user_prompt, section)
76
- document_responses[i] = response
77
- filename = generate_filename(f"{user_prompt}_section_{i+1}", "txt")
78
- create_file(filename, user_prompt, response)
79
-
80
- if st.button('Chat'):
81
- response = chat_with_model(user_prompt, ''.join(document_sections))
82
- filename = generate_filename(user_prompt, "txt")
83
- create_file(filename, user_prompt, response)
84
-
85
- configurations["user_prompt"] = user_prompt
86
- configurations["system_prompt"] = system_prompt
87
- with open(config_file, "w") as file:
88
- json.dump(configurations, file)
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  if __name__ == "__main__":
91
- main()
 
1
  import streamlit as st
2
  import openai
3
  import os
4
+ import base64
5
+ import glob
6
  import json
7
+ import mistune
8
+ import pytz
9
+ import math
10
  import requests
11
+
12
  from datetime import datetime
 
13
  from openai import ChatCompletion
14
+ from xml.etree import ElementTree as ET
15
+ from bs4 import BeautifulSoup
16
+ from collections import deque
17
  from audio_recorder_streamlit import audio_recorder
18
 
 
 
 
 
 
 
 
19
  openai.api_key = os.getenv('OPENAI_KEY')
20
+ st.set_page_config(page_title="GPT Streamlit Document Reasoner",layout="wide")
21
 
22
+ menu = ["htm", "txt", "md", "py"]
23
+ choice = st.sidebar.selectbox("Output File Type:", menu)
24
  model_choice = st.sidebar.radio("Select Model:", ('gpt-3.5-turbo', 'gpt-3.5-turbo-0301'))
25
 
 
 
 
 
 
 
 
26
  def generate_filename(prompt, file_type):
27
+ central = pytz.timezone('US/Central')
28
+ safe_date_time = datetime.now(central).strftime("%m%d_%I%M")
29
  safe_prompt = "".join(x for x in prompt if x.isalnum())[:45]
30
  return f"{safe_date_time}_{safe_prompt}.{file_type}"
31
 
32
  def chat_with_model(prompt, document_section):
33
+ model = model_choice
34
+ conversation = [{'role': 'system', 'content': 'You are a helpful assistant.'}]
35
  conversation.append({'role': 'user', 'content': prompt})
36
+ if len(document_section)>0:
37
  conversation.append({'role': 'assistant', 'content': document_section})
38
+ response = openai.ChatCompletion.create(model=model, messages=conversation)
39
  return response
40
+ #return response['choices'][0]['message']['content']
41
+
42
+ def transcribe_audio(openai_key, file_path, model):
43
+ OPENAI_API_URL = "https://api.openai.com/v1/audio/transcriptions"
44
+ headers = {
45
+ "Authorization": f"Bearer {openai_key}",
46
+ }
47
+ with open(file_path, 'rb') as f:
48
+ data = {'file': f}
49
+ response = requests.post(OPENAI_API_URL, headers=headers, files=data, data={'model': model})
50
+ if response.status_code == 200:
51
+ st.write(response.json())
52
+ response2 = chat_with_model(response.json().get('text'), '')
53
+ st.write('Responses:')
54
+ #st.write(response)
55
+ st.write(response2)
56
+ return response.json().get('text')
57
+ else:
58
+ st.write(response.json())
59
+ st.error("Error in API call.")
60
+ return None
61
+
62
+ def save_and_play_audio(audio_recorder):
63
  audio_bytes = audio_recorder()
64
  if audio_bytes:
65
  filename = generate_filename("Recording", "wav")
 
70
  return None
71
 
72
  def create_file(filename, prompt, response):
73
+ if filename.endswith(".txt"):
74
+ with open(filename, 'w') as file:
75
+ file.write(f"Prompt:\n{prompt}\nResponse:\n{response}")
76
+ elif filename.endswith(".htm"):
77
+ with open(filename, 'w') as file:
78
+ file.write(f"<h1>Prompt:</h1> <p>{prompt}</p> <h1>Response:</h1> <p>{response}</p>")
79
+ elif filename.endswith(".md"):
80
+ with open(filename, 'w') as file:
81
+ file.write(f"# Prompt:\n{prompt}\n# Response:\n{response}")
82
+ def truncate_document(document, length):
83
+ return document[:length]
84
  def divide_document(document, max_length):
85
  return [document[i:i+max_length] for i in range(0, len(document), max_length)]
86
+ def get_table_download_link(file_path):
87
+ with open(file_path, 'r') as file:
88
+ data = file.read()
89
+ b64 = base64.b64encode(data.encode()).decode()
90
+ file_name = os.path.basename(file_path)
91
+ ext = os.path.splitext(file_name)[1] # get the file extension
92
+ if ext == '.txt':
93
+ mime_type = 'text/plain'
94
+ elif ext == '.htm':
95
+ mime_type = 'text/html'
96
+ elif ext == '.md':
97
+ mime_type = 'text/markdown'
98
+ else:
99
+ mime_type = 'application/octet-stream' # general binary data type
100
+ href = f'<a href="data:{mime_type};base64,{b64}" target="_blank" download="{file_name}">{file_name}</a>'
101
+ return href
102
+
103
+
104
+
105
+ # Audio, transcribe, GPT:
106
+ filename = save_and_play_audio(audio_recorder)
107
+ if filename is not None:
108
+ transcription = transcribe_audio(openai.api_key, filename, "whisper-1")
109
+ st.write(transcription)
110
+ gptOutput = chat_with_model(transcription, '') # push transcript through as prompt
111
+ filename = generate_filename(transcription, choice)
112
+ create_file(filename, transcription, gptOutput)
113
+ st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
114
+
115
+
116
+
117
+
118
+
119
+ def CompressXML(xml_text):
120
+ root = ET.fromstring(xml_text)
121
+ for elem in list(root.iter()):
122
+ if isinstance(elem.tag, str) and 'Comment' in elem.tag:
123
+ elem.parent.remove(elem)
124
+ return ET.tostring(root, encoding='unicode', method="xml")
125
+
126
+ def read_file_content(file,max_length):
127
+ if file.type == "application/json":
128
+ content = json.load(file)
129
+ return str(content)
130
+ elif file.type == "text/html" or file.type == "text/htm":
131
+ content = BeautifulSoup(file, "html.parser")
132
+ return content.text
133
+ elif file.type == "application/xml" or file.type == "text/xml":
134
+ tree = ET.parse(file)
135
+ root = tree.getroot()
136
+ xml = CompressXML(ET.tostring(root, encoding='unicode'))
137
+ return xml
138
+ elif file.type == "text/markdown" or file.type == "text/md":
139
+ md = mistune.create_markdown()
140
+ content = md(file.read().decode())
141
+ return content
142
+ elif file.type == "text/plain":
143
+ return file.getvalue().decode()
144
+ else:
145
+ return ""
146
 
147
  def main():
148
+ user_prompt = st.text_area("Enter prompts, instructions & questions:", '', height=100)
149
+
150
+ collength, colupload = st.columns([2,3]) # adjust the ratio as needed
151
+ with collength:
152
+ #max_length = 12000 - optimal for gpt35 turbo. 2x=24000 for gpt4. 8x=96000 for gpt4-32k.
153
+ max_length = st.slider("File section length for large files", min_value=1000, max_value=128000, value=12000, step=1000)
154
+ with colupload:
155
+ uploaded_file = st.file_uploader("Add a file for context:", type=["xml", "json", "html", "htm", "md", "txt"])
156
+
157
  document_sections = deque()
 
 
 
158
  document_responses = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
+ if uploaded_file is not None:
161
+ file_content = read_file_content(uploaded_file, max_length)
162
+ document_sections.extend(divide_document(file_content, max_length))
163
+
164
+ if len(document_sections) > 0:
165
+
166
+ if st.button("👁️ View Upload"):
167
+ st.markdown("**Sections of the uploaded file:**")
168
+ for i, section in enumerate(list(document_sections)):
169
+ st.markdown(f"**Section {i+1}**\n{section}")
170
+
171
+ st.markdown("**Chat with the model:**")
172
+ for i, section in enumerate(list(document_sections)):
173
+ if i in document_responses:
174
+ st.markdown(f"**Section {i+1}**\n{document_responses[i]}")
175
+ else:
176
+ if st.button(f"Chat about Section {i+1}"):
177
+ st.write('Reasoning with your inputs...')
178
+ response = chat_with_model(user_prompt, section)
179
+ st.write('Response:')
180
+ st.write(response)
181
+ document_responses[i] = response
182
+ filename = generate_filename(f"{user_prompt}_section_{i+1}", choice)
183
+ create_file(filename, user_prompt, response)
184
+ st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
185
+
186
+ if st.button('💬 Chat'):
187
+ st.write('Reasoning with your inputs...')
188
+ response = chat_with_model(user_prompt, ''.join(list(document_sections)))
189
+ st.write('Response:')
190
+ st.write(response)
191
+
192
+ filename = generate_filename(user_prompt, choice)
193
+ create_file(filename, user_prompt, response)
194
+ st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
195
+
196
+ all_files = glob.glob("*.*")
197
+ all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 20] # exclude files with short names
198
+ all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by file type and file name in descending order
199
+
200
+ for file in all_files:
201
+ col1, col3 = st.sidebar.columns([5,1]) # adjust the ratio as needed
202
+ with col1:
203
+ st.markdown(get_table_download_link(file), unsafe_allow_html=True)
204
+ with col3:
205
+ if st.button("🗑", key="delete_"+file):
206
+ os.remove(file)
207
+ st.experimental_rerun()
208
+
209
  if __name__ == "__main__":
210
+ main()