Spaces:
Sleeping
Sleeping
File size: 3,218 Bytes
69bf39e 5e9dd30 69bf39e 5a93818 5e9dd30 198dc13 5e9dd30 e93e1aa 5e9dd30 e93e1aa 5a93818 e93e1aa 5e9dd30 5a93818 5e9dd30 5a93818 e93e1aa 198dc13 5e9dd30 198dc13 e93e1aa 5e9dd30 198dc13 6feb14e 5e9dd30 e4b0e31 5e9dd30 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import streamlit as st
from PyPDF2 import PdfReader
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Initialize the tokenizer and model from the saved checkpoint
tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
model = AutoModelForCausalLM.from_pretrained(
"himmeow/vi-gemma-2b-RAG",
device_map="auto",
torch_dtype=torch.bfloat16
)
# Use GPU if available
if torch.cuda.is_available():
model.to("cuda")
# Set up the Streamlit app layout
st.set_page_config(page_title="RAG PDF Chatbot", layout="wide")
# Sidebar with file upload and app title with creator details
st.sidebar.title("π PDF Upload")
uploaded_files = st.sidebar.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)
# Multicolor sidebar background
st.sidebar.markdown("""
<style>
.sidebar .sidebar-content {
background: linear-gradient(135deg, #ff9a9e, #fad0c4 40%, #fad0c4 60%, #ff9a9e);
color: white;
}
</style>
""", unsafe_allow_html=True)
st.sidebar.markdown("""
### Created by: [Engr. Hamesh Raj](https://www.linkedin.com/in/datascientisthameshraj/)
""")
# Main title
st.markdown("""
<h1 style='text-align: center; color: #ff6f61;'>π RAG PDF Chatbot</h1>
""", unsafe_allow_html=True)
# Multicolor background for the main content
st.markdown("""
<style>
body {
background: linear-gradient(135deg, #89f7fe 0%, #66a6ff 100%);
}
</style>
""", unsafe_allow_html=True)
# Input field for user queries
query = st.text_input("Enter your query here:")
submit_button = st.button("Submit")
# Initialize chat history
if 'chat_history' not in st.session_state:
st.session_state.chat_history = []
# Function to extract text from PDF files
def extract_text_from_pdfs(files):
text = ""
for uploaded_file in files:
reader = PdfReader(uploaded_file)
for page in reader.pages:
text += page.extract_text() + "\n"
return text
# Handle the query submission
if submit_button and query:
# Extract text from uploaded PDFs
if uploaded_files:
pdf_text = extract_text_from_pdfs(uploaded_files)
# Prepare the input prompt
prompt = f"""
Based on the following context/document:
{pdf_text}
Please answer the question: {query}
"""
# Encode the input text
input_ids = tokenizer(prompt, return_tensors="pt")
# Use GPU for input ids if available
if torch.cuda.is_available():
input_ids = input_ids.to("cuda")
# Generate the response
outputs = model.generate(
**input_ids,
max_new_tokens=500,
no_repeat_ngram_size=5,
)
# Decode the response and clean it
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
clean_response = response.strip()
# Update chat history
st.session_state.chat_history.append((query, clean_response))
# Display chat history
if st.session_state.chat_history:
for q, a in st.session_state.chat_history:
st.markdown(f"**Question:** {q}")
st.markdown(f"**Answer:** {a}")
st.write("---") |