Spaces:

ibrahim313
/

ChatwithPDF

Runtime error

App Files Files Community

ChatwithPDF / app.py

ibrahim313

Update app.py

c7aff60 verified about 1 year ago

raw

history blame

4.57 kB

	import os
	import base64
	import gc
	import tempfile

	import gradio as gr

	from llama_index.core import Settings
	from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
	from llama_index.llms.cohere import Cohere
	from llama_index.embeddings.cohere import CohereEmbedding
	from llama_index.postprocessor.cohere_rerank import CohereRerank
	from llama_index.core import PromptTemplate

	# Your Cohere API Key
	API_KEY = "ziEpsRreaJzBi5HUDap7gMecJWXX69O26Hf71Kxo"

	# Global query engine
	query_engine = None

	# Function to reset chat
	def reset_chat():
	gc.collect()

	# Function to display PDF file
	def display_pdf(file):
	try:
	base64_pdf = base64.b64encode(file.read()).decode("utf-8")
	pdf_display = f"""<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600px" type="application/pdf">
	</iframe>"""
	return pdf_display
	except Exception as e:
	return f"Error displaying PDF: {e}"

	# Function to process PDF and generate a query engine
	def process_pdf(uploaded_file):
	global query_engine # Use global to modify the global query_engine variable

	if not uploaded_file:
	return None, "No file uploaded. Please upload a PDF file."

	if not uploaded_file.name.lower().endswith(".pdf"):
	return None, "Invalid file type. Please upload a PDF file."

	try:
	with tempfile.TemporaryDirectory() as temp_dir:
	file_path = os.path.join(temp_dir, uploaded_file.name)
	with open(file_path, "wb") as f:
	f.write(uploaded_file.read())

	# Creating an index over loaded data
	loader = SimpleDirectoryReader(
	input_dir=temp_dir,
	required_exts=[".pdf"],
	recursive=True
	)
	docs = loader.load_data()

	# Setting up LLM & embedding model
	llm = Cohere(api_key=API_KEY, model="command")
	embed_model = CohereEmbedding(
	cohere_api_key=API_KEY,
	model_name="embed-english-v3.0",
	input_type="search_query",
	)

	Settings.embed_model = embed_model
	index = VectorStoreIndex.from_documents(docs, show_progress=True)

	# Create a cohere reranker
	cohere_rerank = CohereRerank(api_key=API_KEY)

	# Create the query engine
	Settings.llm = llm
	query_engine = index.as_query_engine(streaming=True, node_postprocessors=[cohere_rerank])

	# Customizing prompt template
	qa_prompt_tmpl_str = (
	"Context information is below.\n"
	"---------------------\n"
	"{context_str}\n"
	"---------------------\n"
	"Given the context information above, I want you to think step by step to answer the query in a crisp manner. "
	"If you don't know the answer, say 'I don't know!'.\n"
	"Query: {query_str}\n"
	"Answer: "
	)
	qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)

	query_engine.update_prompts(
	{"response_synthesizer:text_qa_template": qa_prompt_tmpl}
	)

	return query_engine, display_pdf(uploaded_file)
	except Exception as e:
	return None, f"An error occurred during PDF processing: {e}"

	# Function to handle chat queries
	def chat_with_pdf(prompt):
	if not query_engine:
	return "Please upload and process a PDF file first."

	try:
	full_response = ""
	streaming_response = query_engine.query(prompt)

	for chunk in streaming_response.response_gen:
	full_response += chunk

	return full_response
	except Exception as e:
	return f"An error occurred during the query process: {e}"

	# Gradio Interface
	with gr.Blocks() as demo:
	gr.Markdown("# 🔍 Searchable Document Chatbot")
	gr.Markdown("Upload your PDF document and start asking questions.")

	pdf_file = gr.File(label="Upload your PDF file", file_types=[".pdf"])
	pdf_preview = gr.HTML(label="PDF Preview")

	process_button = gr.Button("Process PDF")

	chat_input = gr.Textbox(label="Ask a question")
	chat_output = gr.Textbox(label="Chat Response")

	process_button.click(fn=process_pdf, inputs=pdf_file, outputs=pdf_preview)
	chat_input.submit(fn=chat_with_pdf, inputs=chat_input, outputs=chat_output)

	gr.Markdown("Made with ❤️ by Muhammad Ibrahim Qasmi")

	demo.launch()