Spaces:
Sleeping
Sleeping
File size: 2,197 Bytes
69bf39e 5e9dd30 69bf39e 249d7ba 69bf39e 62e64fb c1c7f8f 62e64fb 5e9dd30 62e64fb 5e9dd30 62e64fb 5e9dd30 62e64fb 5e9dd30 62e64fb 5e9dd30 249d7ba c1c7f8f 249d7ba 5e9dd30 249d7ba c1c7f8f 985d58e c1c7f8f 5e9dd30 249d7ba 5e9dd30 c1c7f8f 5e9dd30 249d7ba 7bdff6e 249d7ba c1c7f8f 249d7ba 7ff270d 249d7ba c1c7f8f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import streamlit as st
from PyPDF2 import PdfReader
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from io import BytesIO
# Initialize the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
model = AutoModelForCausalLM.from_pretrained(
"himmeow/vi-gemma-2b-RAG",
device_map="auto",
torch_dtype=torch.float16 # Use FP16 for faster computation if supported
)
# Use GPU if available
if torch.cuda.is_available():
model.to("cuda")
# Streamlit app layout
st.set_page_config(page_title="π PDF Query App", page_icon=":book:", layout="wide")
st.title("π PDF Query App")
st.sidebar.title("Upload File and Query")
# Sidebar: File Upload
uploaded_file = st.sidebar.file_uploader("Upload your PDF file", type="pdf")
# Sidebar: Query Input
query = st.sidebar.text_input("Enter your query:")
# Sidebar: Submit Button
if st.sidebar.button("Submit"):
if uploaded_file and query:
# Read the PDF file
pdf_text = ""
with BytesIO(uploaded_file.read()) as file:
reader = PdfReader(file)
for page in reader.pages:
text = page.extract_text()
pdf_text += text + "\n"
# Define the prompt format for the model
prompt = f"""
{pdf_text}
Please answer the question: {query}
"""
# Break the text into chunks if it's too long for the model
max_input_length = 2048 # Adjust based on the model's max length
input_ids = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_input_length)
# Use GPU for input ids if available
if torch.cuda.is_available():
input_ids = input_ids.to("cuda")
# Generate text using the model
outputs = model.generate(
**input_ids,
max_new_tokens=250, # Reduce the number of tokens generated for faster results
no_repeat_ngram_size=3, # Prevent repetition
num_beams=2, # Use beam search with fewer beams for faster results
)
# Decode and display the results
response = tokenizer.decode(outputs[0], skip_special |