File size: 3,421 Bytes
ac04873
 
 
 
 
 
 
 
 
 
 
 
 
 
9105bd6
f32ba7f
9105bd6
 
 
 
 
 
 
 
 
ac04873
 
 
9105bd6
ac04873
 
 
f32ba7f
 
 
 
 
 
ac04873
c12a4ac
f32ba7f
c12a4ac
84df10e
f32ba7f
 
fd9a79e
ac04873
f32ba7f
fd9a79e
ac04873
fd9a79e
 
ac04873
 
 
f32ba7f
ac04873
 
9105bd6
ac04873
 
 
 
 
 
 
 
 
 
 
 
c12a4ac
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import os
import gradio as gr
import asyncio
from langchain_core.prompts import PromptTemplate
from langchain_community.output_parsers.rail_parser import GuardrailsOutputParser
from langchain_community.document_loaders import PyPDFLoader
from langchain_google_genai import ChatGoogleGenerativeAI
import google.generativeai as genai
from langchain.chains.question_answering import load_qa_chain  # Import load_qa_chain

async def initialize(file_path, question):
    genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
    model = genai.GenerativeModel('gemini-pro')
    model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
    
    # Prompt template for precise answers
    prompt_template = """Answer the question precisely and concisely using the provided context. Avoid any additional commentary or system messages.
                          If the answer is not contained in the context, respond with "answer not available in context".
                          
                          Context:
                          {context}
                          
                          Question:
                          {question}
                          
                          Answer:
                        """
    prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
    
    if os.path.exists(file_path):
        pdf_loader = PyPDFLoader(file_path)
        pages = pdf_loader.load_and_split()
        
        # Extract content from each page and store along with page number
        page_contexts = [f"Page {i+1}: {page.page_content}" for i, page in enumerate(pages)]
        context = "\n".join(page_contexts[:30])  # Using the first 30 pages for context
        
        # Load the question-answering chain
        stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
        
        # Get the answer from the model
        stuff_answer = await stuff_chain.ainvoke({"input_documents": pages, "question": question, "context": context})
        answer = stuff_answer.get('output_text', '').strip()
        
        # Identify the pages that contain the answer
        relevant_pages = []
        for i, page in enumerate(pages):
            if answer.lower() in page.page_content.lower():
                relevant_pages.append(f"Page {i+1}")

        if relevant_pages:
            source_str = f" (Source: {', '.join(relevant_pages)})"
        else:
            source_str = " (Source: Not found in specific page)"

        # Create a clickable link for the document
        file_name = os.path.basename(file_path)
        source_link = f"[{file_name}](file://{os.path.abspath(file_path)})"
        return f"{answer} {source_str} - [Document: {source_link}]"
    else:
        return "Error: Unable to process the document. Please ensure the PDF file is valid."

# Define Gradio Interface
input_file = gr.File(label="Upload PDF File")
input_question = gr.Textbox(label="Ask about the document")
output_text = gr.Textbox(label="Answer - GeminiPro")

async def pdf_qa(file, question):
    answer = await initialize(file.name, question)
    return answer

# Create Gradio Interface with share=True to enable a public link
gr.Interface(fn=pdf_qa, inputs=[input_file, input_question], outputs=output_text, title="PDF Question Answering System", description="Upload a PDF file and ask questions about the content.").launch(share=True)