Spaces:

datascientist22
/

rag-pdfQA-chatbot

Sleeping

App Files Files Community

datascientist22 commited on Sep 3, 2024

Commit

7bdff6e

verified ·

1 Parent(s): 7ff270d

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -34

app.py CHANGED Viewed

@@ -47,20 +47,25 @@ if 'chat_history' not in st.session_state:
     st.session_state.chat_history = []
 # Load the tokenizer and model
-tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
-model = AutoModelForCausalLM.from_pretrained("himmeow/vi-gemma-2b-RAG")
-# Use GPU if available
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model = model.to(device)
 # Function to extract text from PDF files
 def extract_text_from_pdfs(files):
     text = ""
     for uploaded_file in files:
-        reader = PdfReader(uploaded_file)
-        for page in reader.pages:
-            text += page.extract_text() + "\n"
     return text
 # Handle the query submission
@@ -73,33 +78,35 @@ if submit_button:
         try:
             # Extract text from uploaded PDFs
             pdf_text = extract_text_from_pdfs(uploaded_files)
-            # Prepare the input prompt
-            prompt = f"""
-            Based on the following context/document:
-            {pdf_text}
-            Please answer the question: {query}
-            """
-            # Encode the input text
-            input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
-            # Generate the response
-            outputs = model.generate(
-                input_ids=input_ids,
-                max_new_tokens=500,
-                no_repeat_ngram_size=5,
-            )
-            # Decode the response and clean it
-            response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-            clean_response = response.strip()
-            # Update chat history
-            st.session_state.chat_history.append((query, clean_response))
         except Exception as e:
-            st.error(f"An error occurred: {e}")
 # Display chat history
 if st.session_state.chat_history:

     st.session_state.chat_history = []
 # Load the tokenizer and model
+try:
+    tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
+    model = AutoModelForCausalLM.from_pretrained("himmeow/vi-gemma-2b-RAG")
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model = model.to(device)
+except Exception as e:
+    st.error(f"Error loading model or tokenizer: {e}")
+    st.stop()
 # Function to extract text from PDF files
 def extract_text_from_pdfs(files):
     text = ""
     for uploaded_file in files:
+        try:
+            reader = PdfReader(uploaded_file)
+            for page in reader.pages:
+                text += page.extract_text() + "\n"
+        except Exception as e:
+            st.error(f"Error reading PDF file: {e}")
     return text
 # Handle the query submission
         try:
             # Extract text from uploaded PDFs
             pdf_text = extract_text_from_pdfs(uploaded_files)
+            if not pdf_text.strip():
+                st.warning("⚠️ No text found in the uploaded PDFs.")
+            else:
+                # Prepare the input prompt
+                prompt = f"""
+                Based on the following context/document:
+                {pdf_text}
+                Please answer the question: {query}
+                """
+                # Encode the input text
+                inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=tokenizer.model_max_length)
+                # Generate the response
+                outputs = model.generate(
+                    input_ids=inputs['input_ids'].to(device),
+                    max_new_tokens=500,
+                    no_repeat_ngram_size=5,
+                )
+                # Decode the response and clean it
+                response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+                clean_response = response.strip()
+                # Update chat history
+                st.session_state.chat_history.append((query, clean_response))
         except Exception as e:
+            st.error(f"An error occurred during processing: {e}")
 # Display chat history
 if st.session_state.chat_history: