ChatwithPDF / app.py
ibrahim313's picture
Update app.py
855467f verified
raw
history blame
2.57 kB
import os
import nest_asyncio
import gradio as gr
from dotenv import load_dotenv
from IPython.display import Markdown, display
from llama_index.core import Settings
from llama_index.core import VectorStoreIndex, ServiceContext, SimpleDirectoryReader
from llama_index.llms.cohere import Cohere
from llama_index.embeddings.cohere import CohereEmbedding
from llama_index.postprocessor.cohere_rerank import CohereRerank
# allows nested access to the event loop
nest_asyncio.apply()
# put your API key here, find one at: https://dashboard.cohere.com/api-keys
API_KEY = 'ziEpsRreaJzBi5HUDap7gMecJWXX69O26Hf71Kxo'
# setup llm & embedding model
llm = Cohere(api_key=API_KEY, model="command-r-plus")
embed_model = CohereEmbedding(
cohere_api_key=API_KEY,
model_name="embed-english-v3.0",
input_type="search_query",
)
# Function to load data from uploaded PDF
def process_pdfs(pdf_files):
# Create a temporary directory to store the uploaded PDFs
temp_dir = 'temp_pdf_directory'
os.makedirs(temp_dir, exist_ok=True)
# Save uploaded files to the temporary directory
for file in pdf_files:
file_path = os.path.join(temp_dir, file.name)
with open(file_path, 'wb') as f:
f.write(file.read())
# Load data from the temporary directory
loader = SimpleDirectoryReader(
input_dir=temp_dir,
required_exts=[".pdf"],
recursive=True
)
docs = loader.load_data()
# Create an index over loaded data
Settings.embed_model = embed_model
index = VectorStoreIndex.from_documents(docs, show_progress=True)
# Create a cohere reranker
cohere_rerank = CohereRerank(api_key=API_KEY)
# Create the query engine, where we use a cohere reranker on the fetched nodes
Settings.llm = llm
query_engine = index.as_query_engine(node_postprocessors=[cohere_rerank])
return index, query_engine
# Query function
def query_pdfs(pdf_files, question):
index, query_engine = process_pdfs(pdf_files)
response = query_engine.query(question)
return str(response)
# Create Gradio interface
iface = gr.Interface(
fn=query_pdfs,
inputs=[
gr.inputs.File(label="Upload PDF Files", type="file", multiple=True),
gr.inputs.Textbox(label="Ask a Question", placeholder="Enter your question here...")
],
outputs="text",
title="PDF Query System",
description="Upload PDF files and ask questions to extract information from them."
)
if __name__ == "__main__":
iface.launch()