Spaces:

raksama19
/

Test-Dolphin-PDF

Runtime error

App Files Files Community

raksama19 commited on Jul 17

Commit

4789fc5

verified ·

1 Parent(s): dcbaa35

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -50

app.py CHANGED Viewed

@@ -17,6 +17,7 @@ try:
     import numpy as np
     from sklearn.metrics.pairwise import cosine_similarity
     import google.generativeai as genai
     RAG_DEPENDENCIES_AVAILABLE = True
 except ImportError as e:
     print(f"RAG dependencies not available: {e}")
@@ -338,21 +339,21 @@ if RAG_DEPENDENCIES_AVAILABLE:
         gemini_api_key = os.getenv('GEMINI_API_KEY')
         if gemini_api_key:
             genai.configure(api_key=gemini_api_key)
-            gemini_model = genai.GenerativeModel('gemma-3n-e4b-it')
             print("✅ Gemini API configured successfully")
         else:
             print("❌ GEMINI_API_KEY not found in environment")
-            gemini_model = None
     except Exception as e:
         print(f"❌ Error loading models: {e}")
         import traceback
         traceback.print_exc()
         embedding_model = None
-        gemini_model = None
 else:
     print("❌ RAG dependencies not available")
     embedding_model = None
-    gemini_model = None
 # Model management functions
 def load_dolphin_model():
@@ -388,12 +389,12 @@ def unload_dolphin_model():
             torch.cuda.empty_cache()
         print("✅ DOLPHIN model unloaded")
-def initialize_gemini_model():
-    """Initialize Gemini API model"""
-    global gemini_model
-    if gemini_model is not None:
-        return gemini_model
     try:
         gemini_api_key = os.getenv('GEMINI_API_KEY')
@@ -401,35 +402,41 @@ def initialize_gemini_model():
             print("❌ GEMINI_API_KEY not found in environment")
             return None
-        print("Initializing Gemini API...")
-        genai.configure(api_key=gemini_api_key)
-        gemini_model = genai.GenerativeModel('gemma-3n-e4b-it')
-        print("✅ Gemini API model ready (gemma-3n-e4b-it)")
-        return gemini_model
     except Exception as e:
-        print(f"❌ Error initializing Gemini model: {e}")
         import traceback
         traceback.print_exc()
         return None
 def generate_alt_text_for_image(pil_image):
-    """Generate alt text for an image using Gemma 3n model"""
     try:
-        # Initialize Gemini model
-        model = initialize_gemini_model()
-        if model is None:
-            print("❌ Gemini model not initialized for alt text generation")
             return "Image description unavailable"
         # Debug: Check image format and properties
         print(f"🔍 Image format: {pil_image.format}, mode: {pil_image.mode}, size: {pil_image.size}")
-        # Ensure image is in RGB mode (required for Gemini API)
         if pil_image.mode != 'RGB':
             print(f"Converting image from {pil_image.mode} to RGB")
             pil_image = pil_image.convert('RGB')
         # Create a detailed prompt for alt text generation
         prompt = """You are an accessibility expert creating alt text for images to help visually impaired users understand visual content. Analyze this image and provide a clear, concise description that captures the essential visual information.
@@ -441,36 +448,23 @@ Focus on:
 Provide a descriptive alt text in 1-2 sentences that is informative but not overly verbose. Start directly with the description without saying "This image shows" or similar phrases."""
-        print(f"🔍 Generating alt text for image with Gemma 3n...")
-        # Generate alt text using Gemini API with proper multimodal input
-        # Pass the PIL image directly - Gemini API handles PIL Image objects
-        response = model.generate_content([prompt, pil_image])
         print(f"📡 API response received: {type(response)}")
-        print(f"📡 Response attributes: {dir(response)}")
         if hasattr(response, 'text') and response.text:
             alt_text = response.text.strip()
             print(f"✅ Alt text generated: {alt_text[:100]}...")
         else:
             print(f"❌ No text in response. Response: {response}")
-            # Try to access response differently
-            if hasattr(response, 'candidates') and response.candidates:
-                candidate = response.candidates[0]
-                if hasattr(candidate, 'content') and candidate.content:
-                    if hasattr(candidate.content, 'parts') and candidate.content.parts:
-                        alt_text = candidate.content.parts[0].text.strip()
-                        print(f"✅ Alt text from candidates: {alt_text[:100]}...")
-                    else:
-                        print(f"❌ No parts in content")
-                        return "Image description unavailable"
-                else:
-                    print(f"❌ No content in candidate")
-                    return "Image description unavailable"
-            else:
-                print(f"❌ No candidates in response")
-                return "Image description unavailable"
         # Clean up the alt text
         alt_text = alt_text.replace('\n', ' ').replace('\r', ' ')
@@ -498,7 +492,7 @@ document_embeddings = None
 # Global model state
 dolphin_model = None
-gemini_model = None
 current_model = None  # Track which model is currently loaded
@@ -668,7 +662,7 @@ with gr.Blocks(
         # Home Tab
         with gr.TabItem("🏠 Home", id="home"):
             embedding_status = "✅ RAG ready" if embedding_model else "❌ RAG not loaded"
-            gemini_status = "✅ Gemini API ready" if gemini_model else "❌ Gemini API not configured"
             current_status = f"Currently loaded: {current_model or 'None'}"
             gr.Markdown(
                 "# Scholar Express - Alt Text Enhanced\n"
@@ -786,11 +780,11 @@ with gr.Blocks(
             return history + [[message, "❌ Please process a PDF document first before asking questions."]]
         try:
-            # Initialize Gemini model
-            model = initialize_gemini_model()
-            if model is None:
-                return history + [[message, "❌ Failed to initialize Gemini model. Please check your GEMINI_API_KEY."]]
             # Use RAG to get relevant chunks from markdown (balanced for performance vs quota)
             if document_chunks and len(document_chunks) > 0:
@@ -821,7 +815,7 @@ Please provide a clear and helpful answer based on the context provided."""
             for attempt in range(max_retries):
                 try:
-                    response = model.generate_content(prompt)
                     response_text = response.text if hasattr(response, 'text') else str(response)
                     return history + [[message, response_text]]
                 except Exception as api_error:

     import numpy as np
     from sklearn.metrics.pairwise import cosine_similarity
     import google.generativeai as genai
+    from google.generativeai import types
     RAG_DEPENDENCIES_AVAILABLE = True
 except ImportError as e:
     print(f"RAG dependencies not available: {e}")
         gemini_api_key = os.getenv('GEMINI_API_KEY')
         if gemini_api_key:
             genai.configure(api_key=gemini_api_key)
+            gemini_client = True  # Just mark as configured
             print("✅ Gemini API configured successfully")
         else:
             print("❌ GEMINI_API_KEY not found in environment")
+            gemini_client = None
     except Exception as e:
         print(f"❌ Error loading models: {e}")
         import traceback
         traceback.print_exc()
         embedding_model = None
+        gemini_client = None
 else:
     print("❌ RAG dependencies not available")
     embedding_model = None
+    gemini_client = None
 # Model management functions
 def load_dolphin_model():
             torch.cuda.empty_cache()
         print("✅ DOLPHIN model unloaded")
+def initialize_gemini_client():
+    """Initialize Gemini API client"""
+    global gemini_client
+    if gemini_client is not None:
+        return gemini_client
     try:
         gemini_api_key = os.getenv('GEMINI_API_KEY')
             print("❌ GEMINI_API_KEY not found in environment")
             return None
+        print("Initializing Gemini API client...")
+        gemini_client = genai.configure(api_key=gemini_api_key)
+        print("✅ Gemini API client ready for gemma-3n-e4b-it")
+        return gemini_client
     except Exception as e:
+        print(f"❌ Error initializing Gemini client: {e}")
         import traceback
         traceback.print_exc()
         return None
 def generate_alt_text_for_image(pil_image):
+    """Generate alt text for an image using Gemma 3n model via Google AI API"""
     try:
+        # Initialize Gemini client
+        client = initialize_gemini_client()
+        if client is None:
+            print("❌ Gemini client not initialized for alt text generation")
             return "Image description unavailable"
         # Debug: Check image format and properties
         print(f"🔍 Image format: {pil_image.format}, mode: {pil_image.mode}, size: {pil_image.size}")
+        # Ensure image is in RGB mode
         if pil_image.mode != 'RGB':
             print(f"Converting image from {pil_image.mode} to RGB")
             pil_image = pil_image.convert('RGB')
+        # Convert PIL image to bytes
+        buffered = io.BytesIO()
+        pil_image.save(buffered, format="JPEG")
+        image_bytes = buffered.getvalue()
+        print(f"🔍 Generating alt text for image with Gemma 3n...")
         # Create a detailed prompt for alt text generation
         prompt = """You are an accessibility expert creating alt text for images to help visually impaired users understand visual content. Analyze this image and provide a clear, concise description that captures the essential visual information.
 Provide a descriptive alt text in 1-2 sentences that is informative but not overly verbose. Start directly with the description without saying "This image shows" or similar phrases."""
+        # Use the Google AI API client with proper format
+        response = genai.GenerativeModel('gemma-3n-e4b-it').generate_content([
+            types.Part.from_bytes(
+                data=image_bytes,
+                mime_type='image/jpeg',
+            ),
+            prompt
+        ])
         print(f"📡 API response received: {type(response)}")
         if hasattr(response, 'text') and response.text:
             alt_text = response.text.strip()
             print(f"✅ Alt text generated: {alt_text[:100]}...")
         else:
             print(f"❌ No text in response. Response: {response}")
+            return "Image description unavailable"
         # Clean up the alt text
         alt_text = alt_text.replace('\n', ' ').replace('\r', ' ')
 # Global model state
 dolphin_model = None
+gemini_client = None
 current_model = None  # Track which model is currently loaded
         # Home Tab
         with gr.TabItem("🏠 Home", id="home"):
             embedding_status = "✅ RAG ready" if embedding_model else "❌ RAG not loaded"
+            gemini_status = "✅ Gemini API ready" if gemini_client else "❌ Gemini API not configured"
             current_status = f"Currently loaded: {current_model or 'None'}"
             gr.Markdown(
                 "# Scholar Express - Alt Text Enhanced\n"
             return history + [[message, "❌ Please process a PDF document first before asking questions."]]
         try:
+            # Initialize Gemini client
+            client = initialize_gemini_client()
+            if client is None:
+                return history + [[message, "❌ Failed to initialize Gemini client. Please check your GEMINI_API_KEY."]]
             # Use RAG to get relevant chunks from markdown (balanced for performance vs quota)
             if document_chunks and len(document_chunks) > 0:
             for attempt in range(max_retries):
                 try:
+                    response = genai.GenerativeModel('gemma-3n-e4b-it').generate_content(prompt)
                     response_text = response.text if hasattr(response, 'text') else str(response)
                     return history + [[message, response_text]]
                 except Exception as api_error: