Spaces:

raksama19
/

Test-Dolphin-PDF

Runtime error

App Files Files Community

raksama19 commited on Jul 16

Commit

2b9109f

verified ·

1 Parent(s): c7f802d

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -105

app.py CHANGED Viewed

@@ -15,10 +15,11 @@ try:
     from sentence_transformers import SentenceTransformer
     import numpy as np
     from sklearn.metrics.pairwise import cosine_similarity
     RAG_DEPENDENCIES_AVAILABLE = True
 except ImportError as e:
     print(f"RAG dependencies not available: {e}")
-    print("Please install: pip install sentence-transformers scikit-learn")
     RAG_DEPENDENCIES_AVAILABLE = False
     SentenceTransformer = None
 import os
@@ -320,21 +321,32 @@ hf_token = os.getenv('HF_TOKEN')
 # Don't load models initially - load them on demand
 model_status = "✅ Models ready (Dynamic loading)"
-# Initialize embedding model for RAG (CPU to save GPU memory)
 if RAG_DEPENDENCIES_AVAILABLE:
     try:
         print("Loading embedding model for RAG...")
-        # Use CPU for embedding model to save GPU memory for main models
         embedding_model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
         print("✅ Embedding model loaded successfully (CPU)")
     except Exception as e:
-        print(f"❌ Error loading embedding model: {e}")
         import traceback
         traceback.print_exc()
         embedding_model = None
 else:
     print("❌ RAG dependencies not available")
     embedding_model = None
 # Model management functions
 def load_dolphin_model():
@@ -371,59 +383,29 @@ def unload_dolphin_model():
             torch.cuda.empty_cache()
         print("✅ DOLPHIN model unloaded")
-def load_chatbot_model():
-    """Load Gemma chatbot model"""
-    global chatbot_model, chatbot_processor, current_model
-    if current_model == "chatbot":
-        return chatbot_model, chatbot_processor
-    # Unload DOLPHIN model if loaded
-    unload_dolphin_model()
     try:
-        print("Loading Gemma chatbot model...")
-        print(f"HF_TOKEN found: {'Yes' if hf_token else 'No'}")
-        if hf_token:
-            chatbot_model = Gemma3nForConditionalGeneration.from_pretrained(
-                "google/gemma-3n-e4b-it",
-                device_map="auto",
-                torch_dtype=torch.bfloat16,
-                token=hf_token
-            ).eval()
-            chatbot_processor = AutoProcessor.from_pretrained(
-                "google/gemma-3n-e4b-it",
-                token=hf_token
-            )
-            current_model = "chatbot"
-            print("✅ Gemma chatbot model loaded")
-            return chatbot_model, chatbot_processor
-        else:
-            print("❌ No HF_TOKEN found")
-            return None, None
     except Exception as e:
-        print(f"❌ Error loading chatbot model: {e}")
         import traceback
         traceback.print_exc()
-        return None, None
-def unload_chatbot_model():
-    """Unload chatbot model to free memory"""
-    global chatbot_model, chatbot_processor, current_model
-    if chatbot_model is not None:
-        print("Unloading Gemma chatbot model...")
-        del chatbot_model, chatbot_processor
-        chatbot_model = None
-        chatbot_processor = None
-        if current_model == "chatbot":
-            current_model = None
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        print("✅ Gemma chatbot model unloaded")
 # Global state for managing tabs
@@ -431,12 +413,10 @@ processed_markdown = ""
 show_results_tab = False
 document_chunks = []
 document_embeddings = None
-embedding_model = None
-# Global model state - only one model loaded at a time
 dolphin_model = None
-chatbot_model = None
-chatbot_processor = None
 current_model = None  # Track which model is currently loaded
@@ -518,9 +498,8 @@ def process_uploaded_pdf(pdf_file, progress=gr.Progress()):
             document_embeddings = create_embeddings(document_chunks)
             print(f"Created {len(document_chunks)} chunks")
-            # Unload DOLPHIN model to free memory for chatbot
             progress(0.95, desc="Preparing chatbot...")
-            unload_dolphin_model()
             show_results_tab = True
             progress(1.0, desc="PDF processed successfully!")
@@ -549,11 +528,10 @@ def clear_all():
     document_chunks = []
     document_embeddings = None
-    # Unload any loaded models
     unload_dolphin_model()
-    unload_chatbot_model()
-    return None, "✅ Ready to process your PDF", gr.Tabs(visible=False)
 # Create Gradio interface
@@ -608,12 +586,14 @@ with gr.Blocks(
         # Home Tab
         with gr.TabItem("🏠 Home", id="home"):
             embedding_status = "✅ RAG ready" if embedding_model else "❌ RAG not loaded"
             current_status = f"Currently loaded: {current_model or 'None'}"
             gr.Markdown(
                 "# Scholar Express\n"
-                "### Upload a research paper to get a web-friendly version, an AI chatbot, and a podcast summary. Models are loaded dynamically to optimize memory usage.\n"
                 f"**System:** {model_status}\n"
                 f"**RAG System:** {embedding_status}\n"
                 f"**Status:** {current_status}"
             )
@@ -648,7 +628,7 @@ with gr.Blocks(
             # Status output (hidden during processing)
             status_output = gr.Markdown(
-                "✅ Ready to process your PDF",
                 elem_classes="status-message"
             )
@@ -685,7 +665,7 @@ with gr.Blocks(
                 send_btn = gr.Button("Send", variant="primary", scale=1)
             gr.Markdown(
-                "*Ask questions about your processed document. The AI uses RAG (Retrieval-Augmented Generation) to find relevant sections and provide accurate answers.*",
                 elem_id="chat-notice"
             )
@@ -714,7 +694,7 @@ with gr.Blocks(
         outputs=[chat_tab]
     )
-    # Chatbot functionality
     def chatbot_response(message, history):
         if not message.strip():
             return history
@@ -723,61 +703,42 @@ with gr.Blocks(
             return history + [[message, "❌ Please process a PDF document first before asking questions."]]
         try:
-            # Load chatbot model
-            model, processor = load_chatbot_model()
-            if model is None or processor is None:
-                return history + [[message, "❌ Failed to load chatbot model. Please check your HuggingFace token."]]
-            # Use RAG to get relevant chunks instead of full document
             if document_chunks and len(document_chunks) > 0:
                 relevant_chunks = retrieve_relevant_chunks(message, document_chunks, document_embeddings)
                 context = "\n\n".join(relevant_chunks)
             else:
                 # Fallback to truncated document if RAG fails
-                context = processed_markdown[:1200] + "..." if len(processed_markdown) > 1200 else processed_markdown
-            # Create chat messages with shorter context
-            messages = [
-                {
-                    "role": "system",
-                    "content": [{"type": "text", "text": "You are a helpful assistant. Answer questions about the document concisely."}]
-                },
-                {
-                    "role": "user",
-                    "content": [{"type": "text", "text": f"Context:\n{context}\n\nQ: {message}"}]
-                }
-            ]
-            # Process with the model
-            inputs = processor.apply_chat_template(
-                messages,
-                add_generation_prompt=True,
-                tokenize=True,
-                return_dict=True,
-                return_tensors="pt",
-            ).to(model.device)
-            input_len = inputs["input_ids"].shape[-1]
-            with torch.inference_mode():
-                generation = model.generate(
-                    **inputs,
-                    max_new_tokens=300,  # Can be higher now with single model
-                    do_sample=False,
-                    temperature=0.7,
-                    pad_token_id=processor.tokenizer.pad_token_id,
-                    use_cache=True,  # Can enable cache with single model
-                    num_beams=1
-                )
-                generation = generation[0][input_len:]
-            response = processor.decode(generation, skip_special_tokens=True)
-            return history + [[message, response]]
         except Exception as e:
             error_msg = f"❌ Error generating response: {str(e)}"
             return history + [[message, error_msg]]
     send_btn.click(

     from sentence_transformers import SentenceTransformer
     import numpy as np
     from sklearn.metrics.pairwise import cosine_similarity
+    import google.generativeai as genai
     RAG_DEPENDENCIES_AVAILABLE = True
 except ImportError as e:
     print(f"RAG dependencies not available: {e}")
+    print("Please install: pip install sentence-transformers scikit-learn google-generativeai")
     RAG_DEPENDENCIES_AVAILABLE = False
     SentenceTransformer = None
 import os
 # Don't load models initially - load them on demand
 model_status = "✅ Models ready (Dynamic loading)"
+# Initialize embedding model and Gemini API
 if RAG_DEPENDENCIES_AVAILABLE:
     try:
         print("Loading embedding model for RAG...")
         embedding_model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
         print("✅ Embedding model loaded successfully (CPU)")
+        # Initialize Gemini API
+        gemini_api_key = os.getenv('GEMINI_API_KEY')
+        if gemini_api_key:
+            genai.configure(api_key=gemini_api_key)
+            gemini_model = genai.GenerativeModel('gemma-3n-e4b-it')
+            print("✅ Gemini API configured successfully")
+        else:
+            print("❌ GEMINI_API_KEY not found in environment")
+            gemini_model = None
     except Exception as e:
+        print(f"❌ Error loading models: {e}")
         import traceback
         traceback.print_exc()
         embedding_model = None
+        gemini_model = None
 else:
     print("❌ RAG dependencies not available")
     embedding_model = None
+    gemini_model = None
 # Model management functions
 def load_dolphin_model():
             torch.cuda.empty_cache()
         print("✅ DOLPHIN model unloaded")
+def initialize_gemini_model():
+    """Initialize Gemini API model"""
+    global gemini_model
+    if gemini_model is not None:
+        return gemini_model
     try:
+        gemini_api_key = os.getenv('GEMINI_API_KEY')
+        if not gemini_api_key:
+            print("❌ GEMINI_API_KEY not found in environment")
+            return None
+        print("Initializing Gemini API...")
+        genai.configure(api_key=gemini_api_key)
+        gemini_model = genai.GenerativeModel('gemma-3n-e4b-it')
+        print("✅ Gemini API model ready")
+        return gemini_model
     except Exception as e:
+        print(f"❌ Error initializing Gemini model: {e}")
         import traceback
         traceback.print_exc()
+        return None
 # Global state for managing tabs
 show_results_tab = False
 document_chunks = []
 document_embeddings = None
+# Global model state
 dolphin_model = None
+gemini_model = None
 current_model = None  # Track which model is currently loaded
             document_embeddings = create_embeddings(document_chunks)
             print(f"Created {len(document_chunks)} chunks")
+            # Keep DOLPHIN model loaded for GPU usage
             progress(0.95, desc="Preparing chatbot...")
             show_results_tab = True
             progress(1.0, desc="PDF processed successfully!")
     document_chunks = []
     document_embeddings = None
+    # Unload DOLPHIN model
     unload_dolphin_model()
+    return None, "", gr.Tabs(visible=False)
 # Create Gradio interface
         # Home Tab
         with gr.TabItem("🏠 Home", id="home"):
             embedding_status = "✅ RAG ready" if embedding_model else "❌ RAG not loaded"
+            gemini_status = "✅ Gemini API ready" if gemini_model else "❌ Gemini API not configured"
             current_status = f"Currently loaded: {current_model or 'None'}"
             gr.Markdown(
                 "# Scholar Express\n"
+                "### Upload a research paper to get a web-friendly version and an AI chatbot powered by Gemini API. DOLPHIN model runs on GPU for optimal performance.\n"
                 f"**System:** {model_status}\n"
                 f"**RAG System:** {embedding_status}\n"
+                f"**Gemini API:** {gemini_status}\n"
                 f"**Status:** {current_status}"
             )
             # Status output (hidden during processing)
             status_output = gr.Markdown(
+                "",
                 elem_classes="status-message"
             )
                 send_btn = gr.Button("Send", variant="primary", scale=1)
             gr.Markdown(
+                "*Ask questions about your processed document. The AI uses RAG (Retrieval-Augmented Generation) with Gemini API to find relevant sections and provide accurate answers.*",
                 elem_id="chat-notice"
             )
         outputs=[chat_tab]
     )
+    # Chatbot functionality with Gemini API
     def chatbot_response(message, history):
         if not message.strip():
             return history
             return history + [[message, "❌ Please process a PDF document first before asking questions."]]
         try:
+            # Initialize Gemini model
+            model = initialize_gemini_model()
+            if model is None:
+                return history + [[message, "❌ Failed to initialize Gemini model. Please check your GEMINI_API_KEY."]]
+            # Use RAG to get relevant chunks from markdown
             if document_chunks and len(document_chunks) > 0:
                 relevant_chunks = retrieve_relevant_chunks(message, document_chunks, document_embeddings)
                 context = "\n\n".join(relevant_chunks)
             else:
                 # Fallback to truncated document if RAG fails
+                context = processed_markdown[:2000] + "..." if len(processed_markdown) > 2000 else processed_markdown
+            # Create prompt for Gemini
+            prompt = f"""You are a helpful assistant that answers questions about documents. Use the provided context to answer questions accurately and concisely.
+Context from the document:
+{context}
+Question: {message}
+Please provide a clear and helpful answer based on the context provided."""
+            # Generate response using Gemini API
+            response = model.generate_content(prompt)
+            response_text = response.text if hasattr(response, 'text') else str(response)
+            return history + [[message, response_text]]
         except Exception as e:
             error_msg = f"❌ Error generating response: {str(e)}"
+            print(f"Full error: {e}")
+            import traceback
+            traceback.print_exc()
             return history + [[message, error_msg]]
     send_btn.click(