Spaces:

raksama19
/

Test-Dolphin-PDF

Runtime error

App Files Files Community

raksama19 commited on Jul 15

Commit

d6f868f

verified ·

1 Parent(s): d3e09b2

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -13

app.py CHANGED Viewed

@@ -318,8 +318,9 @@ except Exception as e:
 # Initialize embedding model for RAG
 try:
     print("Loading embedding model...")
-    embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
-    print("✅ Embedding model loaded successfully")
 except Exception as e:
     print(f"❌ Error loading embedding model: {e}")
     embedding_model = None
@@ -368,7 +369,7 @@ embedding_model = None
 # chatbot_model is initialized above
-def chunk_document(text, chunk_size=500, overlap=50):
     """Split document into overlapping chunks for RAG"""
     words = text.split()
     chunks = []
@@ -386,19 +387,27 @@ def create_embeddings(chunks):
         return None
     try:
-        embeddings = embedding_model.encode(chunks)
-        return embeddings
     except Exception as e:
         print(f"Error creating embeddings: {e}")
         return None
-def retrieve_relevant_chunks(question, chunks, embeddings, top_k=3):
     """Retrieve most relevant chunks for a question"""
     if embedding_model is None or embeddings is None:
-        return chunks[:3]  # Fallback to first 3 chunks
     try:
-        question_embedding = embedding_model.encode([question])
         similarities = cosine_similarity(question_embedding, embeddings)[0]
         # Get top-k most similar chunks
@@ -408,7 +417,7 @@ def retrieve_relevant_chunks(question, chunks, embeddings, top_k=3):
         return relevant_chunks
     except Exception as e:
         print(f"Error retrieving chunks: {e}")
-        return chunks[:3]  # Fallback
 def process_uploaded_pdf(pdf_file, progress=gr.Progress()):
     """Main processing function for uploaded PDF"""
@@ -452,10 +461,17 @@ def get_processed_markdown():
 def clear_all():
     """Clear all data and hide results tab"""
-    global processed_markdown, show_results_tab
     processed_markdown = ""
     show_results_tab = False
-    return None, "✅ Ready to process your PDF", gr.Tabs(visible=False)
 # Create Gradio interface
@@ -660,14 +676,23 @@ with gr.Blocks(
             input_len = inputs["input_ids"].shape[-1]
             with torch.inference_mode():
                 generation = chatbot_model.generate(
                     **inputs,
-                    max_new_tokens=300,
                     do_sample=False,
                     temperature=0.7,
-                    pad_token_id=chatbot_processor.tokenizer.pad_token_id
                 )
                 generation = generation[0][input_len:]
             response = chatbot_processor.decode(generation, skip_special_tokens=True)

 # Initialize embedding model for RAG
 try:
     print("Loading embedding model...")
+    # Force CPU for embedding model to save GPU memory
+    embedding_model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
+    print("✅ Embedding model loaded successfully (CPU)")
 except Exception as e:
     print(f"❌ Error loading embedding model: {e}")
     embedding_model = None
 # chatbot_model is initialized above
+def chunk_document(text, chunk_size=300, overlap=30):
     """Split document into overlapping chunks for RAG"""
     words = text.split()
     chunks = []
         return None
     try:
+        # Process in smaller batches to avoid memory issues
+        batch_size = 32
+        embeddings = []
+        for i in range(0, len(chunks), batch_size):
+            batch = chunks[i:i + batch_size]
+            batch_embeddings = embedding_model.encode(batch, show_progress_bar=False)
+            embeddings.extend(batch_embeddings)
+        return np.array(embeddings)
     except Exception as e:
         print(f"Error creating embeddings: {e}")
         return None
+def retrieve_relevant_chunks(question, chunks, embeddings, top_k=2):
     """Retrieve most relevant chunks for a question"""
     if embedding_model is None or embeddings is None:
+        return chunks[:2]  # Fallback to first 2 chunks (reduced from 3)
     try:
+        question_embedding = embedding_model.encode([question], show_progress_bar=False)
         similarities = cosine_similarity(question_embedding, embeddings)[0]
         # Get top-k most similar chunks
         return relevant_chunks
     except Exception as e:
         print(f"Error retrieving chunks: {e}")
+        return chunks[:2]  # Fallback
 def process_uploaded_pdf(pdf_file, progress=gr.Progress()):
     """Main processing function for uploaded PDF"""
 def clear_all():
     """Clear all data and hide results tab"""
+    global processed_markdown, show_results_tab, document_chunks, document_embeddings
     processed_markdown = ""
     show_results_tab = False
+    document_chunks = []
+    document_embeddings = None
+    # Clear GPU memory
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    return None, "", gr.Tabs(visible=False)
 # Create Gradio interface
             input_len = inputs["input_ids"].shape[-1]
             with torch.inference_mode():
+                # Clear cache before generation
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
                 generation = chatbot_model.generate(
                     **inputs,
+                    max_new_tokens=200,  # Reduced from 300 to save memory
                     do_sample=False,
                     temperature=0.7,
+                    pad_token_id=chatbot_processor.tokenizer.pad_token_id,
+                    use_cache=False  # Disable KV cache to save memory
                 )
                 generation = generation[0][input_len:]
+                # Clear cache after generation
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
             response = chatbot_processor.decode(generation, skip_special_tokens=True)