SearchGPT

Running

App Files Files Community

Shreyas094 commited on Jul 25, 2024

Commit

487fdcd

verified ·

1 Parent(s): 5999644

Update app.py

Browse files

Files changed (1) hide show

app.py +98 -111

app.py CHANGED Viewed

@@ -20,8 +20,9 @@ huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
 llama_cloud_api_key = os.environ.get("LLAMA_CLOUD_API_KEY")
 MODELS = [
-    "mistralai/Mistral-7B-Instruct-v0.3",
     "mistralai/Mixtral-8x7B-Instruct-v0.1",
     "microsoft/Phi-3-mini-4k-instruct"
 ]
@@ -77,53 +78,76 @@ def update_vectors(files, parser):
     return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}."
-def generate_chunked_response(prompt, model, max_tokens=1000, num_calls=5, temperature=0.2, should_stop=False):
     print(f"Starting generate_chunked_response with {num_calls} calls")
     client = InferenceClient(model, token=huggingface_token)
-    full_response = ""
     messages = [{"role": "user", "content": prompt}]
     for i in range(num_calls):
         print(f"Starting API call {i+1}")
-        if should_stop:
             print("Stop clicked, breaking loop")
             break
         try:
             for message in client.chat_completion(
                 messages=messages,
                 max_tokens=max_tokens,
                 temperature=temperature,
                 stream=True,
             ):
-                if should_stop:
                     print("Stop clicked during streaming, breaking")
                     break
                 if message.choices and message.choices[0].delta and message.choices[0].delta.content:
                     chunk = message.choices[0].delta.content
-                    full_response += chunk
-            print(f"API call {i+1} completed")
         except Exception as e:
             print(f"Error in generating response: {str(e)}")
-    # Clean up the response
-    clean_response = re.sub(r'<s>\[INST\].*?\[/INST\]\s*', '', full_response, flags=re.DOTALL)
     clean_response = clean_response.replace("Using the following context:", "").strip()
     clean_response = clean_response.replace("Using the following context from the PDF documents:", "").strip()
-    # Remove duplicate paragraphs and sentences
-    paragraphs = clean_response.split('\n\n')
     unique_paragraphs = []
     for paragraph in paragraphs:
         if paragraph not in unique_paragraphs:
-            sentences = paragraph.split('. ')
             unique_sentences = []
             for sentence in sentences:
                 if sentence not in unique_sentences:
                     unique_sentences.append(sentence)
             unique_paragraphs.append('. '.join(unique_sentences))
-    final_response = '\n\n'.join(unique_paragraphs)
     print(f"Final clean response: {final_response[:100]}...")
     return final_response
@@ -137,104 +161,82 @@ class CitingSources(BaseModel):
         ...,
         description="List of sources to cite. Should be an URL of the source."
     )
-def chatbot_interface(message, history, use_web_search, model, temperature, num_calls):
-    if not message.strip():
-        return "", history
-    history = history + [(message, "")]
-    try:
-        if use_web_search:
-            for main_content, sources in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature):
-                history[-1] = (message, f"{main_content}\n\n{sources}")
-                yield history
-        else:
-            for partial_response in get_response_from_pdf(message, model, num_calls=num_calls, temperature=temperature):
-                history[-1] = (message, partial_response)
-                yield history
-    except gr.CancelledError:
-        yield history
-def retry_last_response(history, use_web_search, model, temperature, num_calls):
-    if not history:
-        return history
-    last_user_msg = history[-1][0]
-    history = history[:-1]  # Remove the last response
-    return chatbot_interface(last_user_msg, history, use_web_search, model, temperature, num_calls)
-def respond(message, history, model, temperature, num_calls, use_web_search):
-    if use_web_search:
-        for main_content, sources in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature):
-            yield f"{main_content}\n\n{sources}"
-    else:
-        for partial_response, _ in get_response_from_pdf(message, model, num_calls=num_calls, temperature=temperature):
-            yield partial_response
-def get_response_with_search(query, model, num_calls=5, temperature=0.2):
     search_results = duckduckgo_search(query)
     context = "\n".join(f"{result['title']}\n{result['body']}\nSource: {result['href']}\n"
                         for result in search_results if 'body' in result)
-    prompt = f"""Using the following context:
 {context}
 Write a detailed and complete research document that fulfills the following user request: '{query}'
-After writing the document, please provide a list of sources used in your response."""
-    client = InferenceClient(model, token=huggingface_token)
-    main_content = ""
-    for i in range(num_calls):
-        for message in client.chat_completion(
-            messages=[{"role": "user", "content": prompt}],
-            max_tokens=1000,
-            temperature=temperature,
-            stream=True,
-        ):
-            if message.choices and message.choices[0].delta and message.choices[0].delta.content:
-                chunk = message.choices[0].delta.content
-                main_content += chunk
-                yield main_content, ""  # Yield partial main content without sources
-def get_response_from_pdf(query, model, num_calls=5, temperature=0.2):
     embed = get_embeddings()
     if os.path.exists("faiss_database"):
         database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
     else:
-        yield "No documents available. Please upload PDF documents to answer questions."
-        return
     retriever = database.as_retriever()
     relevant_docs = retriever.get_relevant_documents(query)
     context_str = "\n".join([doc.page_content for doc in relevant_docs])
-    prompt = f"""Using the following context from the PDF documents:
 {context_str}
-Write a detailed and complete response that fully answers the following user question.
-Ensure your response covers all relevant information and is not cut off: '{query}'
-If the response is long, please continue until you have provided a comprehensive answer."""
-    client = InferenceClient(model, token=huggingface_token)
-    response = ""
-    for i in range(num_calls):
-        for message in client.chat_completion(
-            messages=[{"role": "user", "content": prompt}],
-            max_tokens=2000,
-            temperature=temperature,
-            stream=True,
-        ):
-            if message.choices and message.choices[0].delta and message.choices[0].delta.content:
-                chunk = message.choices[0].delta.content
-                response += chunk
-                yield response, ""  # Yield accumulated response with an empty string for consistency
-def vote(data: gr.LikeData):
-    if data.liked:
-        print(f"You upvoted this response: {data.value}")
     else:
-        print(f"You downvoted this response: {data.value}")
 css = """
 /* Add your custom CSS here */
@@ -245,34 +247,18 @@ demo = gr.ChatInterface(
     additional_inputs=[
         gr.Dropdown(choices=MODELS, label="Select Model", value=MODELS[1]),
         gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.1, label="Temperature"),
-        gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Number of API Calls"),
         gr.Checkbox(label="Use Web Search", value=False)
     ],
     title="AI-powered Web Search and PDF Chat Assistant",
     description="Chat with your PDFs or use web search to answer questions.",
-    theme=gr.themes.Soft(
-        primary_hue="orange",
-        secondary_hue="amber",
-        neutral_hue="gray",
-        font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]
-    ).set(
-        body_background_fill_dark="#0c0505",
-        block_background_fill_dark="#0c0505",
-        block_border_width="1px",
-        block_title_background_fill_dark="#1b0f0f",
-        input_background_fill_dark="#140b0b",
-        button_secondary_background_fill_dark="#140b0b",
-        border_color_accent_dark="#1b0f0f",
-        border_color_primary_dark="#1b0f0f",
-        background_fill_secondary_dark="#0c0505",
-        color_accent_soft_dark="transparent",
-        code_background_fill_dark="#140b0b"
-    ),
     css=css,
     examples=[
-        ["Tell me about the contents of the uploaded PDFs."],
-        ["What are the main topics discussed in the documents?"],
-        ["Can you summarize the key points from the PDFs?"]
     ],
     cache_examples=False,
     analytics_enabled=False,
@@ -281,6 +267,7 @@ demo = gr.ChatInterface(
 # Add file upload functionality
 with demo:
     gr.Markdown("## Upload PDF Documents")
     with gr.Row():
         file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
         parser_dropdown = gr.Dropdown(choices=["pypdf", "llamaparse"], label="Select PDF Parser", value="llamaparse")
@@ -302,4 +289,4 @@ with demo:
     )
 if __name__ == "__main__":
-    demo.launch(share=True)

 llama_cloud_api_key = os.environ.get("LLAMA_CLOUD_API_KEY")
 MODELS = [
+    "google/gemma-2-9b",
     "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "mistralai/Mistral-7B-Instruct-v0.3",
     "microsoft/Phi-3-mini-4k-instruct"
 ]
     return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}."
+def generate_chunked_response(prompt, model, max_tokens=1000, num_calls=3, temperature=0.2, stop_clicked=None):
     print(f"Starting generate_chunked_response with {num_calls} calls")
     client = InferenceClient(model, token=huggingface_token)
+    full_responses = []
     messages = [{"role": "user", "content": prompt}]
     for i in range(num_calls):
         print(f"Starting API call {i+1}")
+        if (isinstance(stop_clicked, gr.State) and stop_clicked.value) or stop_clicked:
             print("Stop clicked, breaking loop")
             break
         try:
+            response = ""
             for message in client.chat_completion(
                 messages=messages,
                 max_tokens=max_tokens,
                 temperature=temperature,
                 stream=True,
             ):
+                if (isinstance(stop_clicked, gr.State) and stop_clicked.value) or stop_clicked:
                     print("Stop clicked during streaming, breaking")
                     break
                 if message.choices and message.choices[0].delta and message.choices[0].delta.content:
                     chunk = message.choices[0].delta.content
+                    response += chunk
+            print(f"API call {i+1} response: {response[:100]}...")
+            full_responses.append(response)
         except Exception as e:
             print(f"Error in generating response: {str(e)}")
+    # Combine responses and clean up
+    combined_response = " ".join(full_responses)
+    clean_response = re.sub(r'<s>\[INST\].*?\[/INST\]\s*', '', combined_response, flags=re.DOTALL)
     clean_response = clean_response.replace("Using the following context:", "").strip()
     clean_response = clean_response.replace("Using the following context from the PDF documents:", "").strip()
+    # Split the response into main content and sources
+    parts = re.split(r'\n\s*Sources:\s*\n', clean_response, flags=re.IGNORECASE, maxsplit=1)
+    main_content = parts[0].strip()
+    sources = parts[1].strip() if len(parts) > 1 else ""
+    # Process main content
+    paragraphs = main_content.split('\n\n')
     unique_paragraphs = []
     for paragraph in paragraphs:
         if paragraph not in unique_paragraphs:
             unique_sentences = []
+            sentences = paragraph.split('. ')
             for sentence in sentences:
                 if sentence not in unique_sentences:
                     unique_sentences.append(sentence)
             unique_paragraphs.append('. '.join(unique_sentences))
+    final_content = '\n\n'.join(unique_paragraphs)
+    # Process sources
+    if sources:
+        source_lines = sources.split('\n')
+        unique_sources = []
+        for line in source_lines:
+            if line.strip() and line not in unique_sources:
+                unique_sources.append(line)
+        final_sources = '\n'.join(unique_sources)
+        final_response = f"{final_content}\n\nSources:\n{final_sources}"
+    else:
+        final_response = final_content
+    # Remove any content after the sources
+    final_response = re.sub(r'(Sources:.*?)(?:\n\n|\Z).*', r'\1', final_response, flags=re.DOTALL)
     print(f"Final clean response: {final_response[:100]}...")
     return final_response
         ...,
         description="List of sources to cite. Should be an URL of the source."
     )
+def get_response_with_search(query, model, num_calls=3, temperature=0.2, stop_clicked=None):
     search_results = duckduckgo_search(query)
     context = "\n".join(f"{result['title']}\n{result['body']}\nSource: {result['href']}\n"
                         for result in search_results if 'body' in result)
+    prompt = f"""<s>[INST] Using the following context:
 {context}
 Write a detailed and complete research document that fulfills the following user request: '{query}'
+After writing the document, please provide a list of sources used in your response. [/INST]"""
+    generated_text = generate_chunked_response(prompt, model, num_calls=num_calls, temperature=temperature, stop_clicked=stop_clicked)
+    # Clean the response
+    clean_text = re.sub(r'<s>\[INST\].*?\[/INST\]\s*', '', generated_text, flags=re.DOTALL)
+    clean_text = clean_text.replace("Using the following context:", "").strip()
+    # Split the content and sources
+    parts = clean_text.split("Sources:", 1)
+    main_content = parts[0].strip()
+    sources = parts[1].strip() if len(parts) > 1 else ""
+    return main_content, sources
+def get_response_from_pdf(query, model, num_calls=3, temperature=0.2, stop_clicked=None):
     embed = get_embeddings()
     if os.path.exists("faiss_database"):
         database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
     else:
+        return "No documents available. Please upload PDF documents to answer questions."
     retriever = database.as_retriever()
     relevant_docs = retriever.get_relevant_documents(query)
     context_str = "\n".join([doc.page_content for doc in relevant_docs])
+    prompt = f"""<s>[INST] Using the following context from the PDF documents:
 {context_str}
+Write a detailed and complete response that answers the following user question: '{query}'
+Do not include a list of sources in your response. [/INST]"""
+    generated_text = generate_chunked_response(prompt, model, num_calls=num_calls, temperature=temperature, stop_clicked=stop_clicked)
+    # Clean the response
+    clean_text = re.sub(r'<s>\[INST\].*?\[/INST\]\s*', '', generated_text, flags=re.DOTALL)
+    clean_text = clean_text.replace("Using the following context from the PDF documents:", "").strip()
+    return clean_text
+def chatbot_interface(message, history, use_web_search, model, temperature):
+    if not message.strip():  # Check if the message is empty or just whitespace
+        return history
+    if use_web_search:
+        main_content, sources = get_response_with_search(message, model, temperature)
+        formatted_response = f"{main_content}\n\nSources:\n{sources}"
     else:
+        response = get_response_from_pdf(message, model, temperature)
+        formatted_response = response
+    # Check if the last message in history is the same as the current message
+    if history and history[-1][0] == message:
+        # Replace the last response instead of adding a new one
+        history[-1] = (message, formatted_response)
+    else:
+        # Add the new message-response pair
+        history.append((message, formatted_response))
+    return history
+def respond(message, history, model, temperature, num_calls, use_web_search):
+    if use_web_search:
+        main_content, sources = get_response_with_search(message, model, num_calls=num_calls, temperature=temperature)
+        return f"{main_content}\n\nSources:\n{sources}"
+    else:
+        return get_response_from_pdf(message, model, num_calls=num_calls, temperature=temperature)
 css = """
 /* Add your custom CSS here */
     additional_inputs=[
         gr.Dropdown(choices=MODELS, label="Select Model", value=MODELS[1]),
         gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.1, label="Temperature"),
+        gr.Slider(minimum=1, maximum=5, value=3, step=1, label="Number of API Calls"),
         gr.Checkbox(label="Use Web Search", value=False)
     ],
     title="AI-powered Web Search and PDF Chat Assistant",
     description="Chat with your PDFs or use web search to answer questions.",
+    theme=gr.themes.Soft(),
     css=css,
     examples=[
+        ["What are the latest developments in AI?"],
+        ["Tell me about recent updates on GitHub"],
+        ["What are the best hotels in Galapagos, Ecuador?"],
+        ["Summarize recent advancements in Python programming"],
     ],
     cache_examples=False,
     analytics_enabled=False,
 # Add file upload functionality
 with demo:
     gr.Markdown("## Upload PDF Documents")
     with gr.Row():
         file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
         parser_dropdown = gr.Dropdown(choices=["pypdf", "llamaparse"], label="Select PDF Parser", value="llamaparse")
     )
 if __name__ == "__main__":
+    demo.launch(share=True)