File size: 3,218 Bytes
69bf39e
5e9dd30
69bf39e
 
 
5a93818
 
 
 
 
 
 
 
 
 
 
 
5e9dd30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198dc13
5e9dd30
 
 
 
 
e93e1aa
5e9dd30
 
e93e1aa
5a93818
 
 
e93e1aa
5e9dd30
5a93818
 
5e9dd30
 
5a93818
e93e1aa
198dc13
5e9dd30
198dc13
e93e1aa
5e9dd30
198dc13
6feb14e
 
5e9dd30
e4b0e31
 
5e9dd30
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import streamlit as st
from PyPDF2 import PdfReader
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Initialize the tokenizer and model from the saved checkpoint
tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
model = AutoModelForCausalLM.from_pretrained(
    "himmeow/vi-gemma-2b-RAG",
    device_map="auto",
    torch_dtype=torch.bfloat16
)

# Use GPU if available
if torch.cuda.is_available():
    model.to("cuda")

# Set up the Streamlit app layout
st.set_page_config(page_title="RAG PDF Chatbot", layout="wide")

# Sidebar with file upload and app title with creator details
st.sidebar.title("πŸ“ PDF Upload")
uploaded_files = st.sidebar.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)

# Multicolor sidebar background
st.sidebar.markdown("""
    <style>
    .sidebar .sidebar-content {
        background: linear-gradient(135deg, #ff9a9e, #fad0c4 40%, #fad0c4 60%, #ff9a9e);
        color: white;
    }
    </style>
    """, unsafe_allow_html=True)

st.sidebar.markdown("""
### Created by: [Engr. Hamesh Raj](https://www.linkedin.com/in/datascientisthameshraj/)
""")

# Main title
st.markdown("""
    <h1 style='text-align: center; color: #ff6f61;'>πŸ“œ RAG PDF Chatbot</h1>
    """, unsafe_allow_html=True)

# Multicolor background for the main content
st.markdown("""
    <style>
    body {
        background: linear-gradient(135deg, #89f7fe 0%, #66a6ff 100%);
    }
    </style>
    """, unsafe_allow_html=True)

# Input field for user queries
query = st.text_input("Enter your query here:")
submit_button = st.button("Submit")

# Initialize chat history
if 'chat_history' not in st.session_state:
    st.session_state.chat_history = []

# Function to extract text from PDF files
def extract_text_from_pdfs(files):
    text = ""
    for uploaded_file in files:
        reader = PdfReader(uploaded_file)
        for page in reader.pages:
            text += page.extract_text() + "\n"
    return text

# Handle the query submission
if submit_button and query:
    # Extract text from uploaded PDFs
    if uploaded_files:
        pdf_text = extract_text_from_pdfs(uploaded_files)
        
        # Prepare the input prompt
        prompt = f"""
        Based on the following context/document:
        {pdf_text}
        Please answer the question: {query}
        """

        # Encode the input text
        input_ids = tokenizer(prompt, return_tensors="pt")

        # Use GPU for input ids if available
        if torch.cuda.is_available():
            input_ids = input_ids.to("cuda")

        # Generate the response
        outputs = model.generate(
            **input_ids,
            max_new_tokens=500,
            no_repeat_ngram_size=5,
        )

        # Decode the response and clean it
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        clean_response = response.strip()

        # Update chat history
        st.session_state.chat_history.append((query, clean_response))

# Display chat history
if st.session_state.chat_history:
    for q, a in st.session_state.chat_history:
        st.markdown(f"**Question:** {q}")
        st.markdown(f"**Answer:** {a}")
        st.write("---")