Spaces:

raksama19
/

Test-Dolphin-PDF

Runtime error

App Files Files Community

raksama19 commited on Jul 15

Commit

12128c1

verified ·

1 Parent(s): d6f868f

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -30

app.py CHANGED Viewed

@@ -11,9 +11,16 @@ import numpy as np
 from PIL import Image
 from transformers import AutoProcessor, VisionEncoderDecoderModel, Gemma3nForConditionalGeneration, pipeline
 import torch
-from sentence_transformers import SentenceTransformer
-import numpy as np
-from sklearn.metrics.pairwise import cosine_similarity
 import os
 import tempfile
 import uuid
@@ -316,13 +323,20 @@ except Exception as e:
     model_status = f"❌ Model failed to load: {str(e)}"
 # Initialize embedding model for RAG
-try:
-    print("Loading embedding model...")
-    # Force CPU for embedding model to save GPU memory
-    embedding_model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
-    print("✅ Embedding model loaded successfully (CPU)")
-except Exception as e:
-    print(f"❌ Error loading embedding model: {e}")
     embedding_model = None
 # Initialize chatbot model
@@ -369,7 +383,7 @@ embedding_model = None
 # chatbot_model is initialized above
-def chunk_document(text, chunk_size=300, overlap=30):
     """Split document into overlapping chunks for RAG"""
     words = text.split()
     chunks = []
@@ -387,8 +401,8 @@ def create_embeddings(chunks):
         return None
     try:
-        # Process in smaller batches to avoid memory issues
-        batch_size = 32
         embeddings = []
         for i in range(0, len(chunks), batch_size):
@@ -401,10 +415,10 @@ def create_embeddings(chunks):
         print(f"Error creating embeddings: {e}")
         return None
-def retrieve_relevant_chunks(question, chunks, embeddings, top_k=2):
     """Retrieve most relevant chunks for a question"""
     if embedding_model is None or embeddings is None:
-        return chunks[:2]  # Fallback to first 2 chunks (reduced from 3)
     try:
         question_embedding = embedding_model.encode([question], show_progress_bar=False)
@@ -417,7 +431,7 @@ def retrieve_relevant_chunks(question, chunks, embeddings, top_k=2):
         return relevant_chunks
     except Exception as e:
         print(f"Error retrieving chunks: {e}")
-        return chunks[:2]  # Fallback
 def process_uploaded_pdf(pdf_file, progress=gr.Progress()):
     """Main processing function for uploaded PDF"""
@@ -467,10 +481,6 @@ def clear_all():
     document_chunks = []
     document_embeddings = None
-    # Clear GPU memory
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
     return None, "", gr.Tabs(visible=False)
@@ -676,23 +686,15 @@ with gr.Blocks(
             input_len = inputs["input_ids"].shape[-1]
             with torch.inference_mode():
-                # Clear cache before generation
-                if torch.cuda.is_available():
-                    torch.cuda.empty_cache()
                 generation = chatbot_model.generate(
                     **inputs,
-                    max_new_tokens=200,  # Reduced from 300 to save memory
                     do_sample=False,
                     temperature=0.7,
                     pad_token_id=chatbot_processor.tokenizer.pad_token_id,
-                    use_cache=False  # Disable KV cache to save memory
                 )
                 generation = generation[0][input_len:]
-                # Clear cache after generation
-                if torch.cuda.is_available():
-                    torch.cuda.empty_cache()
             response = chatbot_processor.decode(generation, skip_special_tokens=True)

 from PIL import Image
 from transformers import AutoProcessor, VisionEncoderDecoderModel, Gemma3nForConditionalGeneration, pipeline
 import torch
+try:
+    from sentence_transformers import SentenceTransformer
+    import numpy as np
+    from sklearn.metrics.pairwise import cosine_similarity
+    RAG_DEPENDENCIES_AVAILABLE = True
+except ImportError as e:
+    print(f"RAG dependencies not available: {e}")
+    print("Please install: pip install sentence-transformers scikit-learn")
+    RAG_DEPENDENCIES_AVAILABLE = False
+    SentenceTransformer = None
 import os
 import tempfile
 import uuid
     model_status = f"❌ Model failed to load: {str(e)}"
 # Initialize embedding model for RAG
+if RAG_DEPENDENCIES_AVAILABLE:
+    try:
+        print("Loading embedding model for RAG...")
+        # Use GPU for embedding model with 24GB VRAM
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        embedding_model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
+        print(f"✅ Embedding model loaded successfully ({device})")
+    except Exception as e:
+        print(f"❌ Error loading embedding model: {e}")
+        import traceback
+        traceback.print_exc()
+        embedding_model = None
+else:
+    print("❌ RAG dependencies not available")
     embedding_model = None
 # Initialize chatbot model
 # chatbot_model is initialized above
+def chunk_document(text, chunk_size=500, overlap=50):
     """Split document into overlapping chunks for RAG"""
     words = text.split()
     chunks = []
         return None
     try:
+        # Process in larger batches with 24GB GPU
+        batch_size = 64
         embeddings = []
         for i in range(0, len(chunks), batch_size):
         print(f"Error creating embeddings: {e}")
         return None
+def retrieve_relevant_chunks(question, chunks, embeddings, top_k=3):
     """Retrieve most relevant chunks for a question"""
     if embedding_model is None or embeddings is None:
+        return chunks[:3]  # Fallback to first 3 chunks
     try:
         question_embedding = embedding_model.encode([question], show_progress_bar=False)
         return relevant_chunks
     except Exception as e:
         print(f"Error retrieving chunks: {e}")
+        return chunks[:3]  # Fallback
 def process_uploaded_pdf(pdf_file, progress=gr.Progress()):
     """Main processing function for uploaded PDF"""
     document_chunks = []
     document_embeddings = None
     return None, "", gr.Tabs(visible=False)
             input_len = inputs["input_ids"].shape[-1]
             with torch.inference_mode():
                 generation = chatbot_model.generate(
                     **inputs,
+                    max_new_tokens=400,  # Increased for 24GB GPU
                     do_sample=False,
                     temperature=0.7,
                     pad_token_id=chatbot_processor.tokenizer.pad_token_id,
+                    use_cache=True  # Enable KV cache with more VRAM
                 )
                 generation = generation[0][input_len:]
             response = chatbot_processor.decode(generation, skip_special_tokens=True)