Spaces:

ibrahim313
/

ChatwithPDF

Runtime error

File size: 4,567 Bytes

import os
import base64
import gc
import tempfile

import gradio as gr

from llama_index.core import Settings
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.llms.cohere import Cohere
from llama_index.embeddings.cohere import CohereEmbedding
from llama_index.postprocessor.cohere_rerank import CohereRerank
from llama_index.core import PromptTemplate

# Your Cohere API Key
API_KEY = "ziEpsRreaJzBi5HUDap7gMecJWXX69O26Hf71Kxo"

# Global query engine
query_engine = None

# Function to reset chat
def reset_chat():
    gc.collect()

# Function to display PDF file
def display_pdf(file):
    try:
        base64_pdf = base64.b64encode(file.read()).decode("utf-8")
        pdf_display = f"""<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600px" type="application/pdf">
                        </iframe>"""
        return pdf_display
    except Exception as e:
        return f"Error displaying PDF: {e}"

# Function to process PDF and generate a query engine
def process_pdf(uploaded_file):
    global query_engine  # Use global to modify the global query_engine variable

    if not uploaded_file:
        return None, "No file uploaded. Please upload a PDF file."

    if not uploaded_file.name.lower().endswith(".pdf"):
        return None, "Invalid file type. Please upload a PDF file."

    try:
        with tempfile.TemporaryDirectory() as temp_dir:
            file_path = os.path.join(temp_dir, uploaded_file.name)
            with open(file_path, "wb") as f:
                f.write(uploaded_file.read())

            # Creating an index over loaded data
            loader = SimpleDirectoryReader(
                input_dir=temp_dir,
                required_exts=[".pdf"],
                recursive=True
            )
            docs = loader.load_data()

            # Setting up LLM & embedding model
            llm = Cohere(api_key=API_KEY, model="command")
            embed_model = CohereEmbedding(
                cohere_api_key=API_KEY,
                model_name="embed-english-v3.0",
                input_type="search_query",
            )

            Settings.embed_model = embed_model
            index = VectorStoreIndex.from_documents(docs, show_progress=True)

            # Create a cohere reranker 
            cohere_rerank = CohereRerank(api_key=API_KEY)

            # Create the query engine
            Settings.llm = llm
            query_engine = index.as_query_engine(streaming=True, node_postprocessors=[cohere_rerank])

            # Customizing prompt template
            qa_prompt_tmpl_str = (
                "Context information is below.\n"
                "---------------------\n"
                "{context_str}\n"
                "---------------------\n"
                "Given the context information above, I want you to think step by step to answer the query in a crisp manner. "
                "If you don't know the answer, say 'I don't know!'.\n"
                "Query: {query_str}\n"
                "Answer: "
            )
            qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)

            query_engine.update_prompts(
                {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
            )

            return query_engine, display_pdf(uploaded_file)
    except Exception as e:
        return None, f"An error occurred during PDF processing: {e}"

# Function to handle chat queries
def chat_with_pdf(prompt):
    if not query_engine:
        return "Please upload and process a PDF file first."
    
    try:
        full_response = ""
        streaming_response = query_engine.query(prompt)
        
        for chunk in streaming_response.response_gen:
            full_response += chunk
        
        return full_response
    except Exception as e:
        return f"An error occurred during the query process: {e}"

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# 🔍 Searchable Document Chatbot")
    gr.Markdown("Upload your PDF document and start asking questions.")

    pdf_file = gr.File(label="Upload your PDF file", file_types=[".pdf"])
    pdf_preview = gr.HTML(label="PDF Preview")

    process_button = gr.Button("Process PDF")
    
    chat_input = gr.Textbox(label="Ask a question")
    chat_output = gr.Textbox(label="Chat Response")
    
    process_button.click(fn=process_pdf, inputs=pdf_file, outputs=pdf_preview)
    chat_input.submit(fn=chat_with_pdf, inputs=chat_input, outputs=chat_output)

    gr.Markdown("Made with ❤️ by Muhammad Ibrahim Qasmi")

demo.launch()