Spaces:

saritha
/

RAG_with_langchain_gemini

Sleeping

File size: 3,318 Bytes

cc82b37
82c6cf9
cc82b37
13dc352
82c6cf9
cc82b37
020ff2f
cc82b37
c0559fe
6bd6468
82c6cf9
9751da0
 
 
 
 
 
 
 
9c5ca00
 
 
4a0ed22
003eaf6
 
4a0ed22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
003eaf6
684b24a
003eaf6
13dc352
 
4a0ed22
 
 
 
 
 
 
003eaf6
684b24a
4a0ed22
 
 
 
003eaf6
4a0ed22
9751da0
 
82c6cf9
9751da0
 
82c6cf9
 
 
 
020ff2f
6bd6468
 
82c6cf9
27f2b4b
6bd6468
 
 
 
 
 
 
82c6cf9
cc82b37
 
 
919751f
003eaf6
4a0ed22

import os
from langchain_core.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain_community.output_parsers.rail_parser import GuardrailsOutputParser
from langchain_community.document_loaders import PyPDFLoader
import google.generativeai as genai
import gradio as gr

# Function for initialization
def initialize(pdf_file, question):
    try:
        # Access the uploaded file information from Gradio
        file_info = pdf_file

        # Check if a file was uploaded
        if file_info is not None:
            # Construct potential file path based on temporary directory and filename
            file_path = os.path.join("/tmp", file_info.name)  # Adjust temporary directory if needed
            if os.path.exists(file_path):
                # Process the PDF
                pdf_loader = PyPDFLoader(file_path)
                pages = pdf_loader.load_and_split()
                processed_context = "\n".join(str(page.page_content) for page in pages[:30])  # Limit to first 30 pages

                # Configure Google Generative AI (replace with your API key)
                genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

                # Prompt template for formatting context and question
                prompt_template = """Answer the question as precise as possible using the provided context. If the answer is not contained in the context, say "answer not available in context" 

                Context: 
                {context}

                Question: 
                {question} 

                Answer:
                """

                prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

                # Generate answer using GeminiPro model
                # Load the GeminiPro model
                model = genai.GenerativeModel('gemini-pro')
                # model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)

                # Ensure your LLM object (model) inherits from Runnable and can generate text

                # Prepare the input data
                input_data = {
                    "context": processed_context,
                    "question": question
                }

                # Generate the answer using load_qa_chain
                stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
                # stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
                stuff_answer = stuff_chain(input_data, return_only_outputs=True)

                # Extract the answer
                generated_answer = stuff_answer['output_text']

                return generated_answer
            else:
                return "Error: The uploaded file could not be found."
        else:
            return "Error: No PDF file was uploaded."

    except Exception as e:
        return f"An error occurred: {e}"  # Generic error handling

# Create a Gradio interface
interface = gr.Interface(
    fn=initialize,
    inputs=[
        gr.File(label="Upload PDF"),  # No need for 'type' argument
        gr.Textbox(label="Question")
    ],
    outputs="text",
    title="GeminiPro Q&A Bot",
    description="Ask questions about the uploaded PDF document.",
)

# Launch the interface
interface.launch()