File size: 4,567 Bytes
c8149d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c7aff60
 
 
c8149d3
 
 
 
 
 
c7aff60
 
 
 
 
 
 
c8149d3
 
 
c7aff60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8149d3
 
c7aff60
c8149d3
 
 
c7aff60
 
 
 
 
 
 
 
 
 
c8149d3
 
 
 
 
 
 
 
 
 
 
 
 
 
c7aff60
 
c8149d3
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import os
import base64
import gc
import tempfile

import gradio as gr

from llama_index.core import Settings
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.llms.cohere import Cohere
from llama_index.embeddings.cohere import CohereEmbedding
from llama_index.postprocessor.cohere_rerank import CohereRerank
from llama_index.core import PromptTemplate

# Your Cohere API Key
API_KEY = "ziEpsRreaJzBi5HUDap7gMecJWXX69O26Hf71Kxo"

# Global query engine
query_engine = None

# Function to reset chat
def reset_chat():
    gc.collect()

# Function to display PDF file
def display_pdf(file):
    try:
        base64_pdf = base64.b64encode(file.read()).decode("utf-8")
        pdf_display = f"""<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600px" type="application/pdf">
                        </iframe>"""
        return pdf_display
    except Exception as e:
        return f"Error displaying PDF: {e}"

# Function to process PDF and generate a query engine
def process_pdf(uploaded_file):
    global query_engine  # Use global to modify the global query_engine variable

    if not uploaded_file:
        return None, "No file uploaded. Please upload a PDF file."

    if not uploaded_file.name.lower().endswith(".pdf"):
        return None, "Invalid file type. Please upload a PDF file."

    try:
        with tempfile.TemporaryDirectory() as temp_dir:
            file_path = os.path.join(temp_dir, uploaded_file.name)
            with open(file_path, "wb") as f:
                f.write(uploaded_file.read())

            # Creating an index over loaded data
            loader = SimpleDirectoryReader(
                input_dir=temp_dir,
                required_exts=[".pdf"],
                recursive=True
            )
            docs = loader.load_data()

            # Setting up LLM & embedding model
            llm = Cohere(api_key=API_KEY, model="command")
            embed_model = CohereEmbedding(
                cohere_api_key=API_KEY,
                model_name="embed-english-v3.0",
                input_type="search_query",
            )

            Settings.embed_model = embed_model
            index = VectorStoreIndex.from_documents(docs, show_progress=True)

            # Create a cohere reranker 
            cohere_rerank = CohereRerank(api_key=API_KEY)

            # Create the query engine
            Settings.llm = llm
            query_engine = index.as_query_engine(streaming=True, node_postprocessors=[cohere_rerank])

            # Customizing prompt template
            qa_prompt_tmpl_str = (
                "Context information is below.\n"
                "---------------------\n"
                "{context_str}\n"
                "---------------------\n"
                "Given the context information above, I want you to think step by step to answer the query in a crisp manner. "
                "If you don't know the answer, say 'I don't know!'.\n"
                "Query: {query_str}\n"
                "Answer: "
            )
            qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)

            query_engine.update_prompts(
                {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
            )

            return query_engine, display_pdf(uploaded_file)
    except Exception as e:
        return None, f"An error occurred during PDF processing: {e}"

# Function to handle chat queries
def chat_with_pdf(prompt):
    if not query_engine:
        return "Please upload and process a PDF file first."
    
    try:
        full_response = ""
        streaming_response = query_engine.query(prompt)
        
        for chunk in streaming_response.response_gen:
            full_response += chunk
        
        return full_response
    except Exception as e:
        return f"An error occurred during the query process: {e}"

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# 🔍 Searchable Document Chatbot")
    gr.Markdown("Upload your PDF document and start asking questions.")

    pdf_file = gr.File(label="Upload your PDF file", file_types=[".pdf"])
    pdf_preview = gr.HTML(label="PDF Preview")

    process_button = gr.Button("Process PDF")
    
    chat_input = gr.Textbox(label="Ask a question")
    chat_output = gr.Textbox(label="Chat Response")
    
    process_button.click(fn=process_pdf, inputs=pdf_file, outputs=pdf_preview)
    chat_input.submit(fn=chat_with_pdf, inputs=chat_input, outputs=chat_output)

    gr.Markdown("Made with ❤️ by Muhammad Ibrahim Qasmi")

demo.launch()