datascientist22's picture
Update app.py
c1c7f8f verified
raw
history blame
2.2 kB
import streamlit as st
from PyPDF2 import PdfReader
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from io import BytesIO
# Initialize the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
model = AutoModelForCausalLM.from_pretrained(
"himmeow/vi-gemma-2b-RAG",
device_map="auto",
torch_dtype=torch.float16 # Use FP16 for faster computation if supported
)
# Use GPU if available
if torch.cuda.is_available():
model.to("cuda")
# Streamlit app layout
st.set_page_config(page_title="πŸ“„ PDF Query App", page_icon=":book:", layout="wide")
st.title("πŸ“„ PDF Query App")
st.sidebar.title("Upload File and Query")
# Sidebar: File Upload
uploaded_file = st.sidebar.file_uploader("Upload your PDF file", type="pdf")
# Sidebar: Query Input
query = st.sidebar.text_input("Enter your query:")
# Sidebar: Submit Button
if st.sidebar.button("Submit"):
if uploaded_file and query:
# Read the PDF file
pdf_text = ""
with BytesIO(uploaded_file.read()) as file:
reader = PdfReader(file)
for page in reader.pages:
text = page.extract_text()
pdf_text += text + "\n"
# Define the prompt format for the model
prompt = f"""
{pdf_text}
Please answer the question: {query}
"""
# Break the text into chunks if it's too long for the model
max_input_length = 2048 # Adjust based on the model's max length
input_ids = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_input_length)
# Use GPU for input ids if available
if torch.cuda.is_available():
input_ids = input_ids.to("cuda")
# Generate text using the model
outputs = model.generate(
**input_ids,
max_new_tokens=250, # Reduce the number of tokens generated for faster results
no_repeat_ngram_size=3, # Prevent repetition
num_beams=2, # Use beam search with fewer beams for faster results
)
# Decode and display the results
response = tokenizer.decode(outputs[0], skip_special