Spaces:

raksama19
/

Test-Dolphin-PDF

Runtime error

App Files Files Community

raksama19 commited on Jul 17

Commit

2278c9f

verified ·

1 Parent(s): 95c40e3

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -3

app.py CHANGED Viewed

@@ -422,6 +422,14 @@ def generate_alt_text_for_image(pil_image):
             print("❌ Gemini model not initialized for alt text generation")
             return "Image description unavailable"
         # Create a detailed prompt for alt text generation
         prompt = """You are an accessibility expert creating alt text for images to help visually impaired users understand visual content. Analyze this image and provide a clear, concise description that captures the essential visual information.
@@ -433,17 +441,36 @@ Focus on:
 Provide a descriptive alt text in 1-2 sentences that is informative but not overly verbose. Start directly with the description without saying "This image shows" or similar phrases."""
-        print(f"🔍 Generating alt text for image...")
         # Generate alt text using Gemini API with proper multimodal input
         response = model.generate_content([prompt, pil_image])
         if hasattr(response, 'text') and response.text:
             alt_text = response.text.strip()
             print(f"✅ Alt text generated: {alt_text[:100]}...")
         else:
-            print(f"❌ No text in response: {response}")
-            return "Image description unavailable"
         # Clean up the alt text
         alt_text = alt_text.replace('\n', ' ').replace('\r', ' ')

             print("❌ Gemini model not initialized for alt text generation")
             return "Image description unavailable"
+        # Debug: Check image format and properties
+        print(f"🔍 Image format: {pil_image.format}, mode: {pil_image.mode}, size: {pil_image.size}")
+        # Ensure image is in RGB mode (required for Gemini API)
+        if pil_image.mode != 'RGB':
+            print(f"Converting image from {pil_image.mode} to RGB")
+            pil_image = pil_image.convert('RGB')
         # Create a detailed prompt for alt text generation
         prompt = """You are an accessibility expert creating alt text for images to help visually impaired users understand visual content. Analyze this image and provide a clear, concise description that captures the essential visual information.
 Provide a descriptive alt text in 1-2 sentences that is informative but not overly verbose. Start directly with the description without saying "This image shows" or similar phrases."""
+        print(f"🔍 Generating alt text for image with Gemma 3n...")
         # Generate alt text using Gemini API with proper multimodal input
+        # Pass the PIL image directly - Gemini API handles PIL Image objects
         response = model.generate_content([prompt, pil_image])
+        print(f"📡 API response received: {type(response)}")
+        print(f"📡 Response attributes: {dir(response)}")
         if hasattr(response, 'text') and response.text:
             alt_text = response.text.strip()
             print(f"✅ Alt text generated: {alt_text[:100]}...")
         else:
+            print(f"❌ No text in response. Response: {response}")
+            # Try to access response differently
+            if hasattr(response, 'candidates') and response.candidates:
+                candidate = response.candidates[0]
+                if hasattr(candidate, 'content') and candidate.content:
+                    if hasattr(candidate.content, 'parts') and candidate.content.parts:
+                        alt_text = candidate.content.parts[0].text.strip()
+                        print(f"✅ Alt text from candidates: {alt_text[:100]}...")
+                    else:
+                        print(f"❌ No parts in content")
+                        return "Image description unavailable"
+                else:
+                    print(f"❌ No content in candidate")
+                    return "Image description unavailable"
+            else:
+                print(f"❌ No candidates in response")
+                return "Image description unavailable"
         # Clean up the alt text
         alt_text = alt_text.replace('\n', ' ').replace('\r', ' ')