raksama19 commited on
Commit
2278c9f
Β·
verified Β·
1 Parent(s): 95c40e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -3
app.py CHANGED
@@ -422,6 +422,14 @@ def generate_alt_text_for_image(pil_image):
422
  print("❌ Gemini model not initialized for alt text generation")
423
  return "Image description unavailable"
424
 
 
 
 
 
 
 
 
 
425
  # Create a detailed prompt for alt text generation
426
  prompt = """You are an accessibility expert creating alt text for images to help visually impaired users understand visual content. Analyze this image and provide a clear, concise description that captures the essential visual information.
427
 
@@ -433,17 +441,36 @@ Focus on:
433
 
434
  Provide a descriptive alt text in 1-2 sentences that is informative but not overly verbose. Start directly with the description without saying "This image shows" or similar phrases."""
435
 
436
- print(f"πŸ” Generating alt text for image...")
437
 
438
  # Generate alt text using Gemini API with proper multimodal input
 
439
  response = model.generate_content([prompt, pil_image])
440
 
 
 
 
441
  if hasattr(response, 'text') and response.text:
442
  alt_text = response.text.strip()
443
  print(f"βœ… Alt text generated: {alt_text[:100]}...")
444
  else:
445
- print(f"❌ No text in response: {response}")
446
- return "Image description unavailable"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
447
 
448
  # Clean up the alt text
449
  alt_text = alt_text.replace('\n', ' ').replace('\r', ' ')
 
422
  print("❌ Gemini model not initialized for alt text generation")
423
  return "Image description unavailable"
424
 
425
+ # Debug: Check image format and properties
426
+ print(f"πŸ” Image format: {pil_image.format}, mode: {pil_image.mode}, size: {pil_image.size}")
427
+
428
+ # Ensure image is in RGB mode (required for Gemini API)
429
+ if pil_image.mode != 'RGB':
430
+ print(f"Converting image from {pil_image.mode} to RGB")
431
+ pil_image = pil_image.convert('RGB')
432
+
433
  # Create a detailed prompt for alt text generation
434
  prompt = """You are an accessibility expert creating alt text for images to help visually impaired users understand visual content. Analyze this image and provide a clear, concise description that captures the essential visual information.
435
 
 
441
 
442
  Provide a descriptive alt text in 1-2 sentences that is informative but not overly verbose. Start directly with the description without saying "This image shows" or similar phrases."""
443
 
444
+ print(f"πŸ” Generating alt text for image with Gemma 3n...")
445
 
446
  # Generate alt text using Gemini API with proper multimodal input
447
+ # Pass the PIL image directly - Gemini API handles PIL Image objects
448
  response = model.generate_content([prompt, pil_image])
449
 
450
+ print(f"πŸ“‘ API response received: {type(response)}")
451
+ print(f"πŸ“‘ Response attributes: {dir(response)}")
452
+
453
  if hasattr(response, 'text') and response.text:
454
  alt_text = response.text.strip()
455
  print(f"βœ… Alt text generated: {alt_text[:100]}...")
456
  else:
457
+ print(f"❌ No text in response. Response: {response}")
458
+ # Try to access response differently
459
+ if hasattr(response, 'candidates') and response.candidates:
460
+ candidate = response.candidates[0]
461
+ if hasattr(candidate, 'content') and candidate.content:
462
+ if hasattr(candidate.content, 'parts') and candidate.content.parts:
463
+ alt_text = candidate.content.parts[0].text.strip()
464
+ print(f"βœ… Alt text from candidates: {alt_text[:100]}...")
465
+ else:
466
+ print(f"❌ No parts in content")
467
+ return "Image description unavailable"
468
+ else:
469
+ print(f"❌ No content in candidate")
470
+ return "Image description unavailable"
471
+ else:
472
+ print(f"❌ No candidates in response")
473
+ return "Image description unavailable"
474
 
475
  # Clean up the alt text
476
  alt_text = alt_text.replace('\n', ' ').replace('\r', ' ')