Spaces:

datascientist22
/

rag-pdfQA-chatbot

Sleeping

File size: 2,197 Bytes

69bf39e
5e9dd30
69bf39e
 
249d7ba
69bf39e
62e64fb
 
 
 
 
c1c7f8f
62e64fb
5e9dd30
62e64fb
 
 
5e9dd30
62e64fb
 
 
 
5e9dd30
62e64fb
 
5e9dd30
62e64fb
 
5e9dd30
249d7ba
 
 
 
 
 
 
c1c7f8f
249d7ba
 
5e9dd30
249d7ba
c1c7f8f
 
985d58e
c1c7f8f
5e9dd30
249d7ba
5e9dd30
c1c7f8f
 
 
5e9dd30
249d7ba
 
 
7bdff6e
249d7ba
 
 
c1c7f8f
 
 
249d7ba
7ff270d
249d7ba
c1c7f8f

import streamlit as st
from PyPDF2 import PdfReader
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from io import BytesIO

# Initialize the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
model = AutoModelForCausalLM.from_pretrained(
    "himmeow/vi-gemma-2b-RAG",
    device_map="auto",
    torch_dtype=torch.float16  # Use FP16 for faster computation if supported
)

# Use GPU if available
if torch.cuda.is_available():
    model.to("cuda")

# Streamlit app layout
st.set_page_config(page_title="📄 PDF Query App", page_icon=":book:", layout="wide")
st.title("📄 PDF Query App")
st.sidebar.title("Upload File and Query")

# Sidebar: File Upload
uploaded_file = st.sidebar.file_uploader("Upload your PDF file", type="pdf")

# Sidebar: Query Input
query = st.sidebar.text_input("Enter your query:")

# Sidebar: Submit Button
if st.sidebar.button("Submit"):
    if uploaded_file and query:
        # Read the PDF file
        pdf_text = ""
        with BytesIO(uploaded_file.read()) as file:
            reader = PdfReader(file)
            for page in reader.pages:
                text = page.extract_text()
                pdf_text += text + "\n"

        # Define the prompt format for the model
        prompt = f"""
        {pdf_text}
        
        Please answer the question: {query}

        """

        # Break the text into chunks if it's too long for the model
        max_input_length = 2048  # Adjust based on the model's max length
        input_ids = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_input_length)

        # Use GPU for input ids if available
        if torch.cuda.is_available():
            input_ids = input_ids.to("cuda")

        # Generate text using the model
        outputs = model.generate(
            **input_ids,
            max_new_tokens=250,  # Reduce the number of tokens generated for faster results
            no_repeat_ngram_size=3,  # Prevent repetition
            num_beams=2,  # Use beam search with fewer beams for faster results
        )

        # Decode and display the results
        response = tokenizer.decode(outputs[0], skip_special