Spaces:

ibrahim313
/

ChatwithPDF

Runtime error

App Files Files Community

ibrahim313 commited on Aug 23, 2024

Commit

855467f

verified ·

1 Parent(s): d451b60

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -113

app.py CHANGED Viewed

@@ -1,132 +1,81 @@
 import os
-import base64
-import gc
-import tempfile
 import gradio as gr
 from llama_index.core import Settings
-from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
 from llama_index.llms.cohere import Cohere
 from llama_index.embeddings.cohere import CohereEmbedding
 from llama_index.postprocessor.cohere_rerank import CohereRerank
-from llama_index.core import PromptTemplate
-# Your Cohere API Key
-API_KEY = "ziEpsRreaJzBi5HUDap7gMecJWXX69O26Hf71Kxo"
-# Global query engine
-query_engine = None
-# Function to reset chat
-def reset_chat():
-    gc.collect()
-# Function to display PDF file
-def display_pdf(file):
-    try:
-        base64_pdf = base64.b64encode(file.read()).decode("utf-8")
-        pdf_display = f"""<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600px" type="application/pdf">
-                        </iframe>"""
-        return pdf_display
-    except Exception as e:
-        return f"Error displaying PDF: {e}"
-# Function to process PDF and generate a query engine
-def process_pdf(uploaded_file):
-    global query_engine  # Use global to modify the global query_engine variable
-    if not uploaded_file:
-        return None, "No file uploaded. Please upload a PDF file."
-    if not uploaded_file.name.lower().endswith(".pdf"):
-        return None, "Invalid file type. Please upload a PDF file."
-    try:
-        with tempfile.TemporaryDirectory() as temp_dir:
-            file_path = os.path.join(temp_dir, uploaded_file.name)
-            with open(file_path, "wb") as f:
-                f.write(uploaded_file.read())
-            # Creating an index over loaded data
-            loader = SimpleDirectoryReader(
                 input_dir=temp_dir,
                 required_exts=[".pdf"],
                 recursive=True
             )
-            docs = loader.load_data()
-            # Setting up LLM & embedding model
-            llm = Cohere(api_key=API_KEY, model="command")
-            embed_model = CohereEmbedding(
-                cohere_api_key=API_KEY,
-                model_name="embed-english-v3.0",
-                input_type="search_query",
-            )
-            Settings.embed_model = embed_model
-            index = VectorStoreIndex.from_documents(docs, show_progress=True)
-            # Create a cohere reranker
-            cohere_rerank = CohereRerank(api_key=API_KEY)
-            # Create the query engine
-            Settings.llm = llm
-            query_engine = index.as_query_engine(streaming=True, node_postprocessors=[cohere_rerank])
-            # Customizing prompt template
-            qa_prompt_tmpl_str = (
-                "Context information is below.\n"
-                "---------------------\n"
-                "{context_str}\n"
-                "---------------------\n"
-                "Given the context information above, I want you to think step by step to answer the query in a crisp manner. "
-                "If you don't know the answer, say 'I don't know!'.\n"
-                "Query: {query_str}\n"
-                "Answer: "
-            )
-            qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)
-            query_engine.update_prompts(
-                {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
-            )
-            return query_engine, display_pdf(uploaded_file)
-    except Exception as e:
-        return None, f"An error occurred during PDF processing: {e}"
-# Function to handle chat queries
-def chat_with_pdf(prompt):
-    if not query_engine:
-        return "Please upload and process a PDF file first."
-    try:
-        full_response = ""
-        streaming_response = query_engine.query(prompt)
-        for chunk in streaming_response.response_gen:
-            full_response += chunk
-        return full_response
-    except Exception as e:
-        return f"An error occurred during the query process: {e}"
-# Gradio Interface
-with gr.Blocks() as demo:
-    gr.Markdown("# 🔍 Searchable Document Chatbot")
-    gr.Markdown("Upload your PDF document and start asking questions.")
-    pdf_file = gr.File(label="Upload your PDF file", file_types=[".pdf"])
-    pdf_preview = gr.HTML(label="PDF Preview")
-    process_button = gr.Button("Process PDF")
-    chat_input = gr.Textbox(label="Ask a question")
-    chat_output = gr.Textbox(label="Chat Response")
-    process_button.click(fn=process_pdf, inputs=pdf_file, outputs=pdf_preview)
-    chat_input.submit(fn=chat_with_pdf, inputs=chat_input, outputs=chat_output)
-    gr.Markdown("Made with ❤️ by Muhammad Ibrahim Qasmi")
-demo.launch()

 import os
+import nest_asyncio
 import gradio as gr
+from dotenv import load_dotenv
+from IPython.display import Markdown, display
 from llama_index.core import Settings
+from llama_index.core import VectorStoreIndex, ServiceContext, SimpleDirectoryReader
 from llama_index.llms.cohere import Cohere
 from llama_index.embeddings.cohere import CohereEmbedding
 from llama_index.postprocessor.cohere_rerank import CohereRerank
+# allows nested access to the event loop
+nest_asyncio.apply()
+# put your API key here, find one at: https://dashboard.cohere.com/api-keys
+API_KEY = 'ziEpsRreaJzBi5HUDap7gMecJWXX69O26Hf71Kxo'
+# setup llm & embedding model
+llm = Cohere(api_key=API_KEY, model="command-r-plus")
+embed_model = CohereEmbedding(
+    cohere_api_key=API_KEY,
+    model_name="embed-english-v3.0",
+    input_type="search_query",
+)
+# Function to load data from uploaded PDF
+def process_pdfs(pdf_files):
+    # Create a temporary directory to store the uploaded PDFs
+    temp_dir = 'temp_pdf_directory'
+    os.makedirs(temp_dir, exist_ok=True)
+    # Save uploaded files to the temporary directory
+    for file in pdf_files:
+        file_path = os.path.join(temp_dir, file.name)
+        with open(file_path, 'wb') as f:
+            f.write(file.read())
+    # Load data from the temporary directory
+    loader = SimpleDirectoryReader(
                 input_dir=temp_dir,
                 required_exts=[".pdf"],
                 recursive=True
             )
+    docs = loader.load_data()
+    # Create an index over loaded data
+    Settings.embed_model = embed_model
+    index = VectorStoreIndex.from_documents(docs, show_progress=True)
+    # Create a cohere reranker
+    cohere_rerank = CohereRerank(api_key=API_KEY)
+    # Create the query engine, where we use a cohere reranker on the fetched nodes
+    Settings.llm = llm
+    query_engine = index.as_query_engine(node_postprocessors=[cohere_rerank])
+    return index, query_engine
+# Query function
+def query_pdfs(pdf_files, question):
+    index, query_engine = process_pdfs(pdf_files)
+    response = query_engine.query(question)
+    return str(response)
+# Create Gradio interface
+iface = gr.Interface(
+    fn=query_pdfs,
+    inputs=[
+        gr.inputs.File(label="Upload PDF Files", type="file", multiple=True),
+        gr.inputs.Textbox(label="Ask a Question", placeholder="Enter your question here...")
+    ],
+    outputs="text",
+    title="PDF Query System",
+    description="Upload PDF files and ask questions to extract information from them."
+)
+if __name__ == "__main__":
+    iface.launch()