File size: 2,924 Bytes
ac04873
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3a47322
 
ac04873
 
 
 
 
 
 
 
 
 
c12a4ac
 
 
ac04873
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c12a4ac
ac04873
 
 
 
 
 
 
 
 
 
 
 
 
c12a4ac
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import os
import gradio as gr
import asyncio
from langchain_core.prompts import PromptTemplate
from langchain_community.output_parsers.rail_parser import GuardrailsOutputParser
from langchain_community.document_loaders import PyPDFLoader
from langchain_google_genai import ChatGoogleGenerativeAI
import google.generativeai as genai
from langchain.chains.question_answering import load_qa_chain  # Import load_qa_chain


async def initialize(file_path, question):
    genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
    model = genai.GenerativeModel('gemini-pro')
    model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
    prompt_template = """Understand the question answer to the question as precise as possible using the provided context. 
                        If the answer is not contained in the context, say "answer not available in context" \n\n
                          Context: \n {context}?\n
                          Question: \n {question} \n
                          Answer:
                        """
    prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
    if os.path.exists(file_path):
        pdf_loader = PyPDFLoader(file_path)
        pages = pdf_loader.load_and_split()
        context = "\n".join(f"Page {i+1}: {page.page_content}" for i, page in enumerate(pages[:30]))
        stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
        
        # Use ainvoke instead of arun
        stuff_answer = await stuff_chain.ainvoke({"input_documents": pages, "question": question, "context": context})
        
        # Extract the page number where the context was found
        sources = []
        for i, page in enumerate(pages):
            if question.lower() in page.page_content.lower():
                sources.append(f"Page {i+1}")

        if sources:
            source_str = f" (Source: {', '.join(sources)})"
        else:
            source_str = " (Source: Not found in specific page)"

        # Add the clickable link to the source
        file_name = os.path.basename(file_path)
        source_link = f"[{file_name}](file://{os.path.abspath(file_path)})"
        return f"{stuff_answer} {source_str} - [Document: {source_link}]"
    else:
        return "Error: Unable to process the document. Please ensure the PDF file is valid."


# Define Gradio Interface
input_file = gr.File(label="Upload PDF File")
input_question = gr.Textbox(label="Ask about the document")
output_text = gr.Textbox(label="Answer - GeminiPro")

async def pdf_qa(file, question):
    answer = await initialize(file.name, question)
    return answer

# Create Gradio Interface with share=True to enable a public link
gr.Interface(fn=pdf_qa, inputs=[input_file, input_question], outputs=output_text, title="PDF Question Answering System", description="Upload a PDF file and ask questions about the content.").launch(share=True)