Siyuan0730 commited on
Commit
45de1d9
·
1 Parent(s): 95f55b3

大幅度更新,加入session

Browse files
Files changed (1) hide show
  1. app.py +74 -46
app.py CHANGED
@@ -102,6 +102,7 @@ def courseOutlineGenerating(file_paths, num_lessons, language):
102
  course_outline = genarating_outline(summarized_materials, num_lessons, language)
103
  return course_outline
104
 
 
105
  def constructVDB(file_paths):
106
  #把KM拆解为chunks
107
 
@@ -109,7 +110,7 @@ def constructVDB(file_paths):
109
  for filename in file_paths:
110
  with open(filename, 'r') as f:
111
  content = f.read()
112
- for chunk in chunkstring(content, 1024):
113
  chunks.append(chunk)
114
  chunk_df = pd.DataFrame(chunks, columns=['chunk'])
115
 
@@ -171,12 +172,13 @@ def generateCourse(topic, materials, language):
171
  user_message = f"""You are a great AI teacher and linguist,
172
  skilled at writing informative and easy-to-understand course script based on given lesson topic and knowledge materials.
173
  You should write a course for new hands, they need detailed and vivid explaination to understand the topic.
174
- Here are general steps of creating a well-designed course. Please follow them step-by-step:
 
 
 
 
175
  Step 1. Write down the teaching purpose of the lesson initially in the script.
176
  Step 2. Write down the outline of this lesson (outline is aligned to the teaching purpose), then follow the outline to write the content. Make sure every concept in the outline is explined adequately in the course.
177
- Step 3. Review the content,add some examples (including code example) to the core concepts of this lesson, making sure examples are familiar with learner. Each core concepts should at least with one example.
178
- Step 4. Review the content again, add some analogies or metaphors to the concepts that come up frequently to make the explanation of them more easier to understand.
179
- Make sure all these steps are considered when writing the lesson script content.
180
  Your lesson topic and abstract is within the 「」 quotes, and the knowledge materials are within the 【】 brackets.
181
  lesson topic and abstract: 「{topic}」,
182
  knowledge materials related to this lesson:【{materials} 】
@@ -199,90 +201,112 @@ def decorate_user_question(user_question, retrieved_chunks_for_user):
199
  student's question: 「{user_question}」
200
  related materials:【{retrieved_chunks_for_user}】
201
  if the given materials are irrelavant to student's question, please use your own knowledge to answer the question.
202
- You need to break down the student's question first, find out what he really wants to ask, and then try to give a comprehensive answer.
203
- Start to answer the question now.
 
204
  '''
205
  return decorated_prompt
206
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  def app():
208
  st.title("OmniTutor v0.0.2")
209
 
 
 
 
 
 
 
 
 
 
 
 
210
  with st.sidebar:
211
  st.image("https://siyuan-harry.oss-cn-beijing.aliyuncs.com/oss://siyuan-harry/20231021212525.png")
212
  added_files = st.file_uploader('Upload .md file', type=['.md'], accept_multiple_files=True)
213
- num_lessons = st.slider('How many lessons do you want this course to have?', min_value=5, max_value=19, value=10, step=1)
214
  language = 'English'
215
  Chinese = st.checkbox('Output in Chinese')
216
  if Chinese:
217
  language = 'Chinese'
218
  btn = st.button('submit')
219
-
220
 
221
  col1, col2 = st.columns([0.6,0.4])
222
-
 
 
223
  if btn:
224
- temp_file_paths = []
225
- file_proc_state = st.text("Processing file...")
226
- for added_file in added_files:
227
- with tempfile.NamedTemporaryFile(delete=False, suffix=".md") as tmp:
228
- tmp.write(added_file.getvalue())
229
- tmp_path = tmp.name
230
- temp_file_paths.append(tmp_path)
231
- file_proc_state.text("Processing file...Done")
232
-
233
- vdb_state = st.text("Constructing vector database from provided materials...")
234
- embeddings_df, faiss_index = constructVDB(temp_file_paths)
235
- vdb_state.text("Constructing vector database from provided materials...Done")
236
 
237
- outline_generating_state = st.text("Generating Course Oueline...")
238
- course_outline_list = courseOutlineGenerating(temp_file_paths, num_lessons, language)
239
- outline_generating_state.text("Generating Course Oueline...Done")
240
 
241
  with col1:
 
 
 
 
242
  #把课程大纲打印出来
243
  course_outline_string = ''
244
  lessons_count = 0
245
- for outline in course_outline_list:
246
  lessons_count += 1
247
- course_outline_string += f"{lessons_count}." + outline[0] + '\n'
248
  course_outline_string += '\n' + outline[1] + '\n\n'
249
  #time.sleep(1)
250
  with st.expander("Check the course outline", expanded=False):
251
- st.write(course_outline_string)
252
 
253
  count_generating_content = 0
254
- for lesson in course_outline_list:
255
  count_generating_content += 1
256
  content_generating_state = st.text(f"Writing content for lesson {count_generating_content}...")
257
- retrievedChunksList = searchVDB(lesson, embeddings_df, faiss_index)
258
  courseContent = generateCourse(lesson, retrievedChunksList, language)
259
  content_generating_state.text(f"Writing content for lesson {count_generating_content}...Done")
260
  #st.text_area("Course Content", value=courseContent)
261
  with st.expander(f"Learn the lesson {count_generating_content} ", expanded=False):
262
  st.markdown(courseContent)
263
-
264
- user_question = st.chat_input("Enter your questions when learning...")
265
-
266
- with col2:
267
  st.caption(''':blue[AI Assistant]: Ask this TA any questions related to this course and get direct answers. :sunglasses:''')
268
  # Set a default model
269
 
270
  with st.chat_message("assistant"):
271
  st.write("Hello👋, how can I help you today? 😄")
272
- if "openai_model" not in st.session_state:
273
- st.session_state["openai_model"] = "gpt-3.5-turbo"
274
-
275
- # Initialize chat history
276
- if "messages" not in st.session_state:
277
- st.session_state.messages = []
278
-
279
- # Display chat messages from history on app rerun
280
- for message in st.session_state.messages:
281
- with st.chat_message(message["role"]):
282
- st.markdown(message["content"])
283
  #这里的session.state就是保存了这个对话会话的一些基本信息和设置
284
  if user_question:
285
- retrieved_chunks_for_user = searchVDB(user_question, embeddings_df, faiss_index)
 
286
  prompt = decorate_user_question(user_question, retrieved_chunks_for_user)
287
  st.session_state.messages.append({"role": "user", "content": prompt})
288
  with st.chat_message("user"):
@@ -300,7 +324,11 @@ def app():
300
  message_placeholder.markdown(full_response + "▌")
301
  message_placeholder.markdown(full_response)
302
  st.session_state.messages.append({"role": "assistant", "content": full_response})
 
 
303
 
304
 
 
 
305
  if __name__ == "__main__":
306
  app()
 
102
  course_outline = genarating_outline(summarized_materials, num_lessons, language)
103
  return course_outline
104
 
105
+ @st.cache_data
106
  def constructVDB(file_paths):
107
  #把KM拆解为chunks
108
 
 
110
  for filename in file_paths:
111
  with open(filename, 'r') as f:
112
  content = f.read()
113
+ for chunk in chunkstring(content, 730):
114
  chunks.append(chunk)
115
  chunk_df = pd.DataFrame(chunks, columns=['chunk'])
116
 
 
172
  user_message = f"""You are a great AI teacher and linguist,
173
  skilled at writing informative and easy-to-understand course script based on given lesson topic and knowledge materials.
174
  You should write a course for new hands, they need detailed and vivid explaination to understand the topic.
175
+ A high-quality course should meet requirements below:
176
+ (1) Contains enough facts, data and figures to be convincing
177
+ (2) The internal narrative is layered and logical, not a simple pile of items
178
+ Make sure all these requirements are considered when writing the lesson script content.
179
+ Please follow this procedure step-by-step when disgning the course:
180
  Step 1. Write down the teaching purpose of the lesson initially in the script.
181
  Step 2. Write down the outline of this lesson (outline is aligned to the teaching purpose), then follow the outline to write the content. Make sure every concept in the outline is explined adequately in the course.
 
 
 
182
  Your lesson topic and abstract is within the 「」 quotes, and the knowledge materials are within the 【】 brackets.
183
  lesson topic and abstract: 「{topic}」,
184
  knowledge materials related to this lesson:【{materials} 】
 
201
  student's question: 「{user_question}」
202
  related materials:【{retrieved_chunks_for_user}】
203
  if the given materials are irrelavant to student's question, please use your own knowledge to answer the question.
204
+ You need to break down the student's question first, find out what he really wants to ask, and then try your best to give a comprehensive answer.
205
+ The language you're answering in should aligned with what student is using.
206
+ Now you're talking to the student. Please answer.
207
  '''
208
  return decorated_prompt
209
 
210
+ @st.cache_data
211
+ def initialize_app(added_files, num_lessons, language):
212
+ temp_file_paths = []
213
+ file_proc_state = st.empty()
214
+ file_proc_state.text("Processing file...")
215
+ for added_file in added_files:
216
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".md") as tmp:
217
+ tmp.write(added_file.getvalue())
218
+ tmp_path = tmp.name
219
+ temp_file_paths.append(tmp_path)
220
+ file_proc_state.text("Processing file...Done")
221
+
222
+ vdb_state = st.empty()
223
+ vdb_state.text("Constructing vector database from provided materials...")
224
+ embeddings_df, faiss_index = constructVDB(temp_file_paths)
225
+ vdb_state.text("Constructing vector database from provided materials...Done")
226
+
227
+ outline_generating_state = st.empty()
228
+ outline_generating_state.text("Generating Course Outline...")
229
+ course_outline_list = courseOutlineGenerating(temp_file_paths, num_lessons, language)
230
+ outline_generating_state.text("Generating Course Outline...Done")
231
+
232
+ file_proc_state.empty()
233
+ vdb_state.empty()
234
+ outline_generating_state.empty()
235
+
236
+ return embeddings_df, faiss_index, course_outline_list
237
+
238
  def app():
239
  st.title("OmniTutor v0.0.2")
240
 
241
+ if "openai_model" not in st.session_state:
242
+ st.session_state["openai_model"] = "gpt-3.5-turbo"
243
+ # Initialize chat history
244
+ if "messages" not in st.session_state:
245
+ st.session_state.messages = []
246
+
247
+ # Display chat messages from history on app rerun - 这部分不用session,利用好rerun
248
+ for message in st.session_state.messages:
249
+ with st.chat_message(message["role"]):
250
+ st.markdown(message["content"])
251
+
252
  with st.sidebar:
253
  st.image("https://siyuan-harry.oss-cn-beijing.aliyuncs.com/oss://siyuan-harry/20231021212525.png")
254
  added_files = st.file_uploader('Upload .md file', type=['.md'], accept_multiple_files=True)
255
+ num_lessons = st.slider('How many lessons do you want this course to have?', min_value=3, max_value=14, value=5, step=1)
256
  language = 'English'
257
  Chinese = st.checkbox('Output in Chinese')
258
  if Chinese:
259
  language = 'Chinese'
260
  btn = st.button('submit')
 
261
 
262
  col1, col2 = st.columns([0.6,0.4])
263
+
264
+ user_question = st.chat_input("Enter your questions when learning...")
265
+
266
  if btn:
 
 
 
 
 
 
 
 
 
 
 
 
267
 
268
+ if "embeddings_df" and "faiss_index" and "course_outline_list" not in st.session_state:
269
+ st.session_state.embeddings_df, st.session_state.faiss_index, st.session_state.course_outline_list = initialize_app(added_files, num_lessons, language)
270
+ #embeddings_df, faiss_index, course_outline_list = initialize_app(added_files, num_lessons, language)
271
 
272
  with col1:
273
+ st.text("Processing file...Done")
274
+ st.text("Constructing vector database from provided materials...Done")
275
+ st.text("Generating Course Outline...Done")
276
+
277
  #把课程大纲打印出来
278
  course_outline_string = ''
279
  lessons_count = 0
280
+ for outline in st.session_state.course_outline_list:
281
  lessons_count += 1
282
+ course_outline_string += f"{lessons_count}." + outline[0]
283
  course_outline_string += '\n' + outline[1] + '\n\n'
284
  #time.sleep(1)
285
  with st.expander("Check the course outline", expanded=False):
286
+ st.write(course_outline_string)
287
 
288
  count_generating_content = 0
289
+ for lesson in st.session_state.course_outline_list:
290
  count_generating_content += 1
291
  content_generating_state = st.text(f"Writing content for lesson {count_generating_content}...")
292
+ retrievedChunksList = searchVDB(lesson, st.session_state.embeddings_df, st.session_state.faiss_index)
293
  courseContent = generateCourse(lesson, retrievedChunksList, language)
294
  content_generating_state.text(f"Writing content for lesson {count_generating_content}...Done")
295
  #st.text_area("Course Content", value=courseContent)
296
  with st.expander(f"Learn the lesson {count_generating_content} ", expanded=False):
297
  st.markdown(courseContent)
298
+
299
+ with col2:
 
 
300
  st.caption(''':blue[AI Assistant]: Ask this TA any questions related to this course and get direct answers. :sunglasses:''')
301
  # Set a default model
302
 
303
  with st.chat_message("assistant"):
304
  st.write("Hello👋, how can I help you today? 😄")
305
+
 
 
 
 
 
 
 
 
 
 
306
  #这里的session.state就是保存了这个对话会话的一些基本信息和设置
307
  if user_question:
308
+ retrieved_chunks_for_user = searchVDB(user_question, st.session_state.embeddings_df, st.session_state.faiss_index)
309
+ #retrieved_chunks_for_user = []
310
  prompt = decorate_user_question(user_question, retrieved_chunks_for_user)
311
  st.session_state.messages.append({"role": "user", "content": prompt})
312
  with st.chat_message("user"):
 
324
  message_placeholder.markdown(full_response + "▌")
325
  message_placeholder.markdown(full_response)
326
  st.session_state.messages.append({"role": "assistant", "content": full_response})
327
+
328
+
329
 
330
 
331
+
332
+
333
  if __name__ == "__main__":
334
  app()