Spaces:

datascientist22
/

rag-pdfQA-chatbot

Sleeping

App Files Files Community

rag-pdfQA-chatbot / app.py

datascientist22

Update app.py

c1c7f8f verified 12 months ago

raw

history blame

2.2 kB

	import streamlit as st
	from PyPDF2 import PdfReader
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch
	from io import BytesIO

	# Initialize the tokenizer and model
	tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
	model = AutoModelForCausalLM.from_pretrained(
	"himmeow/vi-gemma-2b-RAG",
	device_map="auto",
	torch_dtype=torch.float16 # Use FP16 for faster computation if supported
	)

	# Use GPU if available
	if torch.cuda.is_available():
	model.to("cuda")

	# Streamlit app layout
	st.set_page_config(page_title="📄 PDF Query App", page_icon=":book:", layout="wide")
	st.title("📄 PDF Query App")
	st.sidebar.title("Upload File and Query")

	# Sidebar: File Upload
	uploaded_file = st.sidebar.file_uploader("Upload your PDF file", type="pdf")

	# Sidebar: Query Input
	query = st.sidebar.text_input("Enter your query:")

	# Sidebar: Submit Button
	if st.sidebar.button("Submit"):
	if uploaded_file and query:
	# Read the PDF file
	pdf_text = ""
	with BytesIO(uploaded_file.read()) as file:
	reader = PdfReader(file)
	for page in reader.pages:
	text = page.extract_text()
	pdf_text += text + "\n"

	# Define the prompt format for the model
	prompt = f"""
	{pdf_text}

	Please answer the question: {query}

	"""

	# Break the text into chunks if it's too long for the model
	max_input_length = 2048 # Adjust based on the model's max length
	input_ids = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_input_length)

	# Use GPU for input ids if available
	if torch.cuda.is_available():
	input_ids = input_ids.to("cuda")

	# Generate text using the model
	outputs = model.generate(
	**input_ids,
	max_new_tokens=250, # Reduce the number of tokens generated for faster results
	no_repeat_ngram_size=3, # Prevent repetition
	num_beams=2, # Use beam search with fewer beams for faster results
	)

	# Decode and display the results
	response = tokenizer.decode(outputs[0], skip_special