SearchGPT

Running

App Files Files Community

Shreyas094 commited on Jul 6, 2024

Commit

d23826b

verified ·

1 Parent(s): f080583

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -120

app.py CHANGED Viewed

@@ -210,104 +210,71 @@ def google_search(term, num_results=5, lang="en", timeout=5, safe="active", ssl_
         print(f"Result {i}:")
         print(f"  Link: {result['link']}")
         if result['text']:
-            print(f"  Text: {result['text'][:100]}...")  # Print first 100 characters
         else:
-            print("  Text: None")
-    print("End of search results")
-    if not all_results:
-        print("No search results found. Returning a default message.")
-        return [{"link": None, "text": "No information found in the web search results."}]
     return all_results
-def ask_question(question, temperature, top_p, repetition_penalty, web_search):
     global conversation_history
-    if not question:
-        return "Please enter a question."
-    if question in memory_database and not web_search:
-        answer = memory_database[question]
     else:
-        model = get_model(temperature, top_p, repetition_penalty)
-        embed = get_embeddings()
-        if web_search:
-            search_results = google_search(question)
-            context_str = "\n".join([result["text"] for result in search_results if result["text"]])
-            # Convert web search results to Document format
-            web_docs = [Document(page_content=result["text"], metadata={"source": result["link"]}) for result in search_results if result["text"]]
-            # Check if the FAISS database exists
-            if os.path.exists("faiss_database"):
-                database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
-                database.add_documents(web_docs)
-            else:
-                database = FAISS.from_documents(web_docs, embed)
-                database.save_local("faiss_database")
-            prompt_template = """
-            Answer the question based on the following web search results:
-            Web Search Results:
-            {context}
-            Current Question: {question}
-            If the web search results don't contain relevant information, state that the information is not available in the search results.
-            Provide a concise and direct answer to the question without mentioning the web search or these instructions:
-            """
-            prompt_val = ChatPromptTemplate.from_template(prompt_template)
-            formatted_prompt = prompt_val.format(context=context_str, question=question)
-        else:
-            # Check if the FAISS database exists
-            if os.path.exists("faiss_database"):
-                database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
-            else:
-                return "No FAISS database found. Please upload documents to create the vector store."
-            history_str = "\n".join([f"Q: {item['question']}\nA: {item['answer']}" for item in conversation_history])
-            if is_related_to_history(question, conversation_history):
-                context_str = "No additional context needed. Please refer to the conversation history."
-            else:
-                retriever = database.as_retriever()
-                relevant_docs = retriever.get_relevant_documents(question)
-                context_str = "\n".join([doc.page_content for doc in relevant_docs])
-            prompt_val = ChatPromptTemplate.from_template(prompt)
-            formatted_prompt = prompt_val.format(history=history_str, context=context_str, question=question)
-        answer = generate_chunked_response(model, formatted_prompt)
-        answer = re.split(r'Question:|Current Question:', answer)[-1].strip()
-        # Remove any remaining prompt instructions from the answer
-        answer_lines = answer.split('\n')
-        answer = '\n'.join(line for line in answer_lines if not line.startswith('If') and not line.startswith('Provide'))
-        if not web_search:
-            memory_database[question] = answer
-    if not web_search:
-        conversation_history = manage_conversation_history(question, answer, conversation_history)
     return answer
-def update_vectors(files, use_recursive_splitter):
-    if not files:
-        return "Please upload at least one PDF file."
-    embed = get_embeddings()
-    total_chunks = 0
-    for file in files:
-        if use_recursive_splitter:
-            data = load_and_split_document_recursive(file)
-        else:
-            data = load_and_split_document_basic(file)
-        create_or_update_database(data, embed)
-        total_chunks += len(data)
-    return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files."
 def extract_db_to_excel():
     embed = get_embeddings()
@@ -338,47 +305,46 @@ def export_memory_db_to_excel():
     return excel_path
-# Gradio interface
 with gr.Blocks() as demo:
-    gr.Markdown("# Chat with your PDF documents")
-    with gr.Row():
-        file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
-        update_button = gr.Button("Update Vector Store")
-        use_recursive_splitter = gr.Checkbox(label="Use Recursive Text Splitter", value=False)
-    update_output = gr.Textbox(label="Update Status")
-    update_button.click(update_vectors, inputs=[file_input, use_recursive_splitter], outputs=update_output)
-    with gr.Row():
-        with gr.Column(scale=2):
-            chatbot = gr.Chatbot(label="Conversation")
-            question_input = gr.Textbox(label="Ask a question about your documents")
-            submit_button = gr.Button("Submit")
-        with gr.Column(scale=1):
-            temperature_slider = gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, value=0.5, step=0.1)
-            top_p_slider = gr.Slider(label="Top P", minimum=0.0, maximum=1.0, value=0.9, step=0.1)
-            repetition_penalty_slider = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, value=1.0, step=0.1)
-            web_search_checkbox = gr.Checkbox(label="Enable Web Search", value=False)
-    def chat(question, history):
-        answer = ask_question(question, temperature_slider.value, top_p_slider.value, repetition_penalty_slider.value, web_search_checkbox.value)
-        history.append((question, answer))
-        return "", history
-    submit_button.click(chat, inputs=[question_input, chatbot], outputs=[question_input, chatbot])
-    extract_button = gr.Button("Extract Database to Excel")
-    excel_output = gr.File(label="Download Excel File")
-    extract_button.click(extract_db_to_excel, inputs=[], outputs=excel_output)
-    export_memory_button = gr.Button("Export Memory Database to Excel")
-    memory_excel_output = gr.File(label="Download Memory Excel File")
-    export_memory_button.click(export_memory_db_to_excel, inputs=[], outputs=memory_excel_output)
-    clear_button = gr.Button("Clear Cache")
-    clear_output = gr.Textbox(label="Cache Status")
-    clear_button.click(clear_cache, inputs=[], outputs=clear_output)
-if __name__ == "__main__":
-    demo.launch()

         print(f"Result {i}:")
         print(f"  Link: {result['link']}")
         if result['text']:
+            print(f"  Text: {result['text'][:100]}...")  # Display the first 100 characters of the text for brevity
         else:
+            print("  No text extracted")
     return all_results
+def process_question(question, documents, history, temperature, top_p, repetition_penalty):
     global conversation_history
+    embeddings = get_embeddings()
+    # Check the memory database for similar questions
+    for prev_question, prev_answer in memory_database.items():
+        similarity = get_similarity(question, prev_question)
+        if similarity > 0.7:
+            return prev_answer
+    # Load the FAISS vector store if it exists
+    if os.path.exists("faiss_database"):
+        db = FAISS.load_local("faiss_database", embeddings, allow_dangerous_deserialization=True)
+        relevant_docs = db.similarity_search(question, k=3)
     else:
+        relevant_docs = []
+    if len(relevant_docs) == 0:
+        # Perform web search and update the vector store
+        web_search_results = google_search(question, num_results=5)
+        web_docs = [Document(page_content=res["text"] or "", metadata={"source": res["link"]}) for res in web_search_results if res["text"]]
+        if web_docs:
+            # Update the FAISS vector store with new documents
+            create_or_update_database(web_docs, embeddings)
+            # Reload the updated FAISS store and retrieve relevant documents
+            db = FAISS.load_local("faiss_database", embeddings, allow_dangerous_deserialization=True)
+            relevant_docs = db.similarity_search(question, k=3)
+    context = "\n\n".join([doc.page_content for doc in relevant_docs])
+    if is_related_to_history(question, history):
+        context = "None"
+    else:
+        history_text = "\n".join([f"Q: {h['question']}\nA: {h['answer']}" for h in history])
+        context = context if context else "None"
+    prompt_text = ChatPromptTemplate(
+        input_variables=["history", "context", "question"],
+        template=prompt
+    ).format(history=history_text, context=context, question=question)
+    model = get_model(temperature, top_p, repetition_penalty)
+    answer = generate_chunked_response(model, prompt_text)
+    conversation_history = manage_conversation_history(question, answer, history)
+    memory_database[question] = answer
     return answer
+def process_uploaded_file(file, is_recursive):
+    if is_recursive:
+        data = load_and_split_document_recursive(file)
+    else:
+        data = load_and_split_document_basic(file)
+    embeddings = get_embeddings()
+    create_or_update_database(data, embeddings)
+    return "File processed and data added to the vector database."
 def extract_db_to_excel():
     embed = get_embeddings()
     return excel_path
 with gr.Blocks() as demo:
+    with gr.Tab("Upload PDF"):
+        with gr.Row():
+            pdf_file = gr.File(label="Upload PDF")
+        with gr.Row():
+            recursive_check = gr.Checkbox(label="Use Recursive Text Splitter")
+            upload_button = gr.Button("Upload and Process")
+        with gr.Row():
+            upload_output = gr.Textbox(label="Upload Output")
+    with gr.Tab("Ask Questions"):
+        with gr.Row():
+            question = gr.Textbox(label="Your Question")
+        with gr.Row():
+            temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, label="Temperature")
+            top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.9, label="Top P")
+            repetition_penalty = gr.Slider(minimum=0.0, maximum=2.0, value=1.0, label="Repetition Penalty")
+        with gr.Row():
+            ask_button = gr.Button("Ask")
+        with gr.Row():
+            answer = gr.Textbox(label="Answer")
+    with gr.Tab("Clear Cache"):
+        with gr.Row():
+            clear_button = gr.Button("Clear Cache")
+        with gr.Row():
+            clear_output = gr.Textbox(label="Clear Output")
+    with gr.Tab("Export Data"):
+        with gr.Row():
+            export_db_button = gr.Button("Export Database to Excel")
+            export_db_output = gr.Textbox(label="Export Output")
+        with gr.Row():
+            export_memory_button = gr.Button("Export Memory DB to Excel")
+            export_memory_output = gr.Textbox(label="Export Output")
+    upload_button.click(process_uploaded_file, [pdf_file, recursive_check], upload_output)
+    ask_button.click(process_question, [question, pdf_file, recursive_check, temperature, top_p, repetition_penalty], answer)
+    clear_button.click(clear_cache, [], clear_output)
+    export_db_button.click(extract_db_to_excel, [], export_db_output)
+    export_memory_button.click(export_memory_db_to_excel, [], export_memory_output)
+demo.launch()