Spaces:

Siyuan0730
/

OmniTutor

Running

App Files Files Community

Siyuan0730 commited on Oct 30, 2023

Commit

45de1d9

1 Parent(s): 95f55b3

大幅度更新，加入session

Browse files

Files changed (1) hide show

app.py +74 -46

app.py CHANGED Viewed

@@ -102,6 +102,7 @@ def courseOutlineGenerating(file_paths, num_lessons, language):
     course_outline = genarating_outline(summarized_materials, num_lessons, language)
     return course_outline
 def constructVDB(file_paths):
 #把KM拆解为chunks
@@ -109,7 +110,7 @@ def constructVDB(file_paths):
     for filename in file_paths:
         with open(filename, 'r') as f:
             content = f.read()
-            for chunk in chunkstring(content, 1024):
                 chunks.append(chunk)
     chunk_df = pd.DataFrame(chunks, columns=['chunk'])
@@ -171,12 +172,13 @@ def generateCourse(topic, materials, language):
     user_message = f"""You are a great AI teacher and linguist,
             skilled at writing informative and easy-to-understand course script based on given lesson topic and knowledge materials.
             You should write a course for new hands, they need detailed and vivid explaination to understand the topic.
-            Here are general steps of creating a well-designed course. Please follow them step-by-step:
             Step 1. Write down the teaching purpose of the lesson initially in the script.
             Step 2. Write down the outline of this lesson (outline is aligned to the teaching purpose), then follow the outline to write the content. Make sure every concept in the outline is explined adequately in the course.
-            Step 3. Review the content,add some examples (including code example) to the core concepts of this lesson, making sure examples are familiar with learner. Each core concepts should at least with one example.
-            Step 4. Review the content again, add some analogies or metaphors to the concepts that come up frequently to make the explanation of them more easier to understand.
-            Make sure all these steps are considered when writing the lesson script content.
             Your lesson topic and abstract is within the 「」 quotes, and the knowledge materials are within the 【】 brackets.
             lesson topic and abstract: 「{topic}」,
             knowledge materials related to this lesson：【{materials} 】
@@ -199,90 +201,112 @@ def decorate_user_question(user_question, retrieved_chunks_for_user):
     student's question: 「{user_question}」
     related materials:【{retrieved_chunks_for_user}】
     if the given materials are irrelavant to student's question, please use your own knowledge to answer the question.
-    You need to break down the student's question first, find out what he really wants to ask, and then try to give a comprehensive answer.
-    Start to answer the question now.
     '''
     return decorated_prompt
 def app():
     st.title("OmniTutor v0.0.2")
     with st.sidebar:
         st.image("https://siyuan-harry.oss-cn-beijing.aliyuncs.com/oss://siyuan-harry/20231021212525.png")
         added_files = st.file_uploader('Upload .md file', type=['.md'], accept_multiple_files=True)
-        num_lessons = st.slider('How many lessons do you want this course to have?', min_value=5, max_value=19, value=10, step=1)
         language = 'English'
         Chinese = st.checkbox('Output in Chinese')
         if Chinese:
             language = 'Chinese'
         btn = st.button('submit')
     col1, col2 = st.columns([0.6,0.4])
     if btn:
-        temp_file_paths = []
-        file_proc_state = st.text("Processing file...")
-        for added_file in added_files:
-            with tempfile.NamedTemporaryFile(delete=False, suffix=".md") as tmp:
-                tmp.write(added_file.getvalue())
-                tmp_path = tmp.name
-                temp_file_paths.append(tmp_path)
-        file_proc_state.text("Processing file...Done")
-        vdb_state = st.text("Constructing vector database from provided materials...")
-        embeddings_df, faiss_index = constructVDB(temp_file_paths)
-        vdb_state.text("Constructing vector database from provided materials...Done")
-        outline_generating_state = st.text("Generating Course Oueline...")
-        course_outline_list = courseOutlineGenerating(temp_file_paths, num_lessons, language)
-        outline_generating_state.text("Generating Course Oueline...Done")
         with col1:
             #把课程大纲打印出来
             course_outline_string = ''
             lessons_count = 0
-            for outline in course_outline_list:
                 lessons_count += 1
-                course_outline_string += f"{lessons_count}." + outline[0] + '\n'
                 course_outline_string += '\n' + outline[1] + '\n\n'
                 #time.sleep(1)
             with st.expander("Check the course outline", expanded=False):
-                        st.write(course_outline_string)
             count_generating_content = 0
-            for lesson in course_outline_list:
                 count_generating_content += 1
                 content_generating_state = st.text(f"Writing content for lesson {count_generating_content}...")
-                retrievedChunksList = searchVDB(lesson, embeddings_df, faiss_index)
                 courseContent = generateCourse(lesson, retrievedChunksList, language)
                 content_generating_state.text(f"Writing content for lesson {count_generating_content}...Done")
                 #st.text_area("Course Content", value=courseContent)
                 with st.expander(f"Learn the lesson {count_generating_content} ", expanded=False):
                     st.markdown(courseContent)
-        user_question = st.chat_input("Enter your questions when learning...")
-        with col2:
             st.caption(''':blue[AI Assistant]: Ask this TA any questions related to this course and get direct answers. :sunglasses:''')
                 # Set a default model
             with st.chat_message("assistant"):
                 st.write("Hello👋, how can I help you today? 😄")
-            if "openai_model" not in st.session_state:
-                st.session_state["openai_model"] = "gpt-3.5-turbo"
-            # Initialize chat history
-            if "messages" not in st.session_state:
-                st.session_state.messages = []
-            # Display chat messages from history on app rerun
-            for message in st.session_state.messages:
-                with st.chat_message(message["role"]):
-                    st.markdown(message["content"])
             #这里的session.state就是保存了这个对话会话的一些基本信息和设置
             if user_question:
-                retrieved_chunks_for_user = searchVDB(user_question, embeddings_df, faiss_index)
                 prompt = decorate_user_question(user_question, retrieved_chunks_for_user)
                 st.session_state.messages.append({"role": "user", "content": prompt})
                 with st.chat_message("user"):
@@ -300,7 +324,11 @@ def app():
                         message_placeholder.markdown(full_response + "▌")
                     message_placeholder.markdown(full_response)
                 st.session_state.messages.append({"role": "assistant", "content": full_response})
 if __name__ == "__main__":
     app()

     course_outline = genarating_outline(summarized_materials, num_lessons, language)
     return course_outline
+@st.cache_data
 def constructVDB(file_paths):
 #把KM拆解为chunks
     for filename in file_paths:
         with open(filename, 'r') as f:
             content = f.read()
+            for chunk in chunkstring(content, 730):
                 chunks.append(chunk)
     chunk_df = pd.DataFrame(chunks, columns=['chunk'])
     user_message = f"""You are a great AI teacher and linguist,
             skilled at writing informative and easy-to-understand course script based on given lesson topic and knowledge materials.
             You should write a course for new hands, they need detailed and vivid explaination to understand the topic.
+            A high-quality course should meet requirements below:
+            (1) Contains enough facts, data and figures to be convincing
+            (2) The internal narrative is layered and logical, not a simple pile of items
+            Make sure all these requirements are considered when writing the lesson script content.
+            Please follow this procedure step-by-step when disgning the course:
             Step 1. Write down the teaching purpose of the lesson initially in the script.
             Step 2. Write down the outline of this lesson (outline is aligned to the teaching purpose), then follow the outline to write the content. Make sure every concept in the outline is explined adequately in the course.
             Your lesson topic and abstract is within the 「」 quotes, and the knowledge materials are within the 【】 brackets.
             lesson topic and abstract: 「{topic}」,
             knowledge materials related to this lesson：【{materials} 】
     student's question: 「{user_question}」
     related materials:【{retrieved_chunks_for_user}】
     if the given materials are irrelavant to student's question, please use your own knowledge to answer the question.
+    You need to break down the student's question first, find out what he really wants to ask, and then try your best to give a comprehensive answer.
+    The language you're answering in should aligned with what student is using.
+    Now you're talking to the student. Please answer.
     '''
     return decorated_prompt
+@st.cache_data
+def initialize_app(added_files, num_lessons, language):
+    temp_file_paths = []
+    file_proc_state = st.empty()
+    file_proc_state.text("Processing file...")
+    for added_file in added_files:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".md") as tmp:
+            tmp.write(added_file.getvalue())
+            tmp_path = tmp.name
+            temp_file_paths.append(tmp_path)
+    file_proc_state.text("Processing file...Done")
+    vdb_state = st.empty()
+    vdb_state.text("Constructing vector database from provided materials...")
+    embeddings_df, faiss_index = constructVDB(temp_file_paths)
+    vdb_state.text("Constructing vector database from provided materials...Done")
+    outline_generating_state = st.empty()
+    outline_generating_state.text("Generating Course Outline...")
+    course_outline_list = courseOutlineGenerating(temp_file_paths, num_lessons, language)
+    outline_generating_state.text("Generating Course Outline...Done")
+    file_proc_state.empty()
+    vdb_state.empty()
+    outline_generating_state.empty()
+    return embeddings_df, faiss_index, course_outline_list
 def app():
     st.title("OmniTutor v0.0.2")
+    if "openai_model" not in st.session_state:
+        st.session_state["openai_model"] = "gpt-3.5-turbo"
+        # Initialize chat history
+    if "messages" not in st.session_state:
+        st.session_state.messages = []
+    # Display chat messages from history on app rerun - 这部分不用session，利用好rerun
+    for message in st.session_state.messages:
+        with st.chat_message(message["role"]):
+            st.markdown(message["content"])
     with st.sidebar:
         st.image("https://siyuan-harry.oss-cn-beijing.aliyuncs.com/oss://siyuan-harry/20231021212525.png")
         added_files = st.file_uploader('Upload .md file', type=['.md'], accept_multiple_files=True)
+        num_lessons = st.slider('How many lessons do you want this course to have?', min_value=3, max_value=14, value=5, step=1)
         language = 'English'
         Chinese = st.checkbox('Output in Chinese')
         if Chinese:
             language = 'Chinese'
         btn = st.button('submit')
     col1, col2 = st.columns([0.6,0.4])
+    user_question = st.chat_input("Enter your questions when learning...")
     if btn:
+        if "embeddings_df" and "faiss_index" and "course_outline_list" not in st.session_state:
+            st.session_state.embeddings_df, st.session_state.faiss_index, st.session_state.course_outline_list = initialize_app(added_files, num_lessons, language)
+        #embeddings_df, faiss_index, course_outline_list = initialize_app(added_files, num_lessons, language)
         with col1:
+            st.text("Processing file...Done")
+            st.text("Constructing vector database from provided materials...Done")
+            st.text("Generating Course Outline...Done")
             #把课程大纲打印出来
             course_outline_string = ''
             lessons_count = 0
+            for outline in st.session_state.course_outline_list:
                 lessons_count += 1
+                course_outline_string += f"{lessons_count}." + outline[0]
                 course_outline_string += '\n' + outline[1] + '\n\n'
                 #time.sleep(1)
             with st.expander("Check the course outline", expanded=False):
+                st.write(course_outline_string)
             count_generating_content = 0
+            for lesson in st.session_state.course_outline_list:
                 count_generating_content += 1
                 content_generating_state = st.text(f"Writing content for lesson {count_generating_content}...")
+                retrievedChunksList = searchVDB(lesson, st.session_state.embeddings_df, st.session_state.faiss_index)
                 courseContent = generateCourse(lesson, retrievedChunksList, language)
                 content_generating_state.text(f"Writing content for lesson {count_generating_content}...Done")
                 #st.text_area("Course Content", value=courseContent)
                 with st.expander(f"Learn the lesson {count_generating_content} ", expanded=False):
                     st.markdown(courseContent)
+    with col2:
             st.caption(''':blue[AI Assistant]: Ask this TA any questions related to this course and get direct answers. :sunglasses:''')
                 # Set a default model
             with st.chat_message("assistant"):
                 st.write("Hello👋, how can I help you today? 😄")
             #这里的session.state就是保存了这个对话会话的一些基本信息和设置
             if user_question:
+                retrieved_chunks_for_user = searchVDB(user_question, st.session_state.embeddings_df, st.session_state.faiss_index)
+                #retrieved_chunks_for_user = []
                 prompt = decorate_user_question(user_question, retrieved_chunks_for_user)
                 st.session_state.messages.append({"role": "user", "content": prompt})
                 with st.chat_message("user"):
                         message_placeholder.markdown(full_response + "▌")
                     message_placeholder.markdown(full_response)
                 st.session_state.messages.append({"role": "assistant", "content": full_response})
 if __name__ == "__main__":
     app()