ChatwithPDF / app.py
ibrahim313's picture
Update app.py
c7aff60 verified
raw
history blame
4.57 kB
import os
import base64
import gc
import tempfile
import gradio as gr
from llama_index.core import Settings
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.llms.cohere import Cohere
from llama_index.embeddings.cohere import CohereEmbedding
from llama_index.postprocessor.cohere_rerank import CohereRerank
from llama_index.core import PromptTemplate
# Your Cohere API Key
API_KEY = "ziEpsRreaJzBi5HUDap7gMecJWXX69O26Hf71Kxo"
# Global query engine
query_engine = None
# Function to reset chat
def reset_chat():
gc.collect()
# Function to display PDF file
def display_pdf(file):
try:
base64_pdf = base64.b64encode(file.read()).decode("utf-8")
pdf_display = f"""<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600px" type="application/pdf">
</iframe>"""
return pdf_display
except Exception as e:
return f"Error displaying PDF: {e}"
# Function to process PDF and generate a query engine
def process_pdf(uploaded_file):
global query_engine # Use global to modify the global query_engine variable
if not uploaded_file:
return None, "No file uploaded. Please upload a PDF file."
if not uploaded_file.name.lower().endswith(".pdf"):
return None, "Invalid file type. Please upload a PDF file."
try:
with tempfile.TemporaryDirectory() as temp_dir:
file_path = os.path.join(temp_dir, uploaded_file.name)
with open(file_path, "wb") as f:
f.write(uploaded_file.read())
# Creating an index over loaded data
loader = SimpleDirectoryReader(
input_dir=temp_dir,
required_exts=[".pdf"],
recursive=True
)
docs = loader.load_data()
# Setting up LLM & embedding model
llm = Cohere(api_key=API_KEY, model="command")
embed_model = CohereEmbedding(
cohere_api_key=API_KEY,
model_name="embed-english-v3.0",
input_type="search_query",
)
Settings.embed_model = embed_model
index = VectorStoreIndex.from_documents(docs, show_progress=True)
# Create a cohere reranker
cohere_rerank = CohereRerank(api_key=API_KEY)
# Create the query engine
Settings.llm = llm
query_engine = index.as_query_engine(streaming=True, node_postprocessors=[cohere_rerank])
# Customizing prompt template
qa_prompt_tmpl_str = (
"Context information is below.\n"
"---------------------\n"
"{context_str}\n"
"---------------------\n"
"Given the context information above, I want you to think step by step to answer the query in a crisp manner. "
"If you don't know the answer, say 'I don't know!'.\n"
"Query: {query_str}\n"
"Answer: "
)
qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)
query_engine.update_prompts(
{"response_synthesizer:text_qa_template": qa_prompt_tmpl}
)
return query_engine, display_pdf(uploaded_file)
except Exception as e:
return None, f"An error occurred during PDF processing: {e}"
# Function to handle chat queries
def chat_with_pdf(prompt):
if not query_engine:
return "Please upload and process a PDF file first."
try:
full_response = ""
streaming_response = query_engine.query(prompt)
for chunk in streaming_response.response_gen:
full_response += chunk
return full_response
except Exception as e:
return f"An error occurred during the query process: {e}"
# Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("# πŸ” Searchable Document Chatbot")
gr.Markdown("Upload your PDF document and start asking questions.")
pdf_file = gr.File(label="Upload your PDF file", file_types=[".pdf"])
pdf_preview = gr.HTML(label="PDF Preview")
process_button = gr.Button("Process PDF")
chat_input = gr.Textbox(label="Ask a question")
chat_output = gr.Textbox(label="Chat Response")
process_button.click(fn=process_pdf, inputs=pdf_file, outputs=pdf_preview)
chat_input.submit(fn=chat_with_pdf, inputs=chat_input, outputs=chat_output)
gr.Markdown("Made with ❀️ by Muhammad Ibrahim Qasmi")
demo.launch()