Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -422,6 +422,14 @@ def generate_alt_text_for_image(pil_image):
|
|
422 |
print("β Gemini model not initialized for alt text generation")
|
423 |
return "Image description unavailable"
|
424 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
425 |
# Create a detailed prompt for alt text generation
|
426 |
prompt = """You are an accessibility expert creating alt text for images to help visually impaired users understand visual content. Analyze this image and provide a clear, concise description that captures the essential visual information.
|
427 |
|
@@ -433,17 +441,36 @@ Focus on:
|
|
433 |
|
434 |
Provide a descriptive alt text in 1-2 sentences that is informative but not overly verbose. Start directly with the description without saying "This image shows" or similar phrases."""
|
435 |
|
436 |
-
print(f"π Generating alt text for image...")
|
437 |
|
438 |
# Generate alt text using Gemini API with proper multimodal input
|
|
|
439 |
response = model.generate_content([prompt, pil_image])
|
440 |
|
|
|
|
|
|
|
441 |
if hasattr(response, 'text') and response.text:
|
442 |
alt_text = response.text.strip()
|
443 |
print(f"β
Alt text generated: {alt_text[:100]}...")
|
444 |
else:
|
445 |
-
print(f"β No text in response: {response}")
|
446 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
447 |
|
448 |
# Clean up the alt text
|
449 |
alt_text = alt_text.replace('\n', ' ').replace('\r', ' ')
|
|
|
422 |
print("β Gemini model not initialized for alt text generation")
|
423 |
return "Image description unavailable"
|
424 |
|
425 |
+
# Debug: Check image format and properties
|
426 |
+
print(f"π Image format: {pil_image.format}, mode: {pil_image.mode}, size: {pil_image.size}")
|
427 |
+
|
428 |
+
# Ensure image is in RGB mode (required for Gemini API)
|
429 |
+
if pil_image.mode != 'RGB':
|
430 |
+
print(f"Converting image from {pil_image.mode} to RGB")
|
431 |
+
pil_image = pil_image.convert('RGB')
|
432 |
+
|
433 |
# Create a detailed prompt for alt text generation
|
434 |
prompt = """You are an accessibility expert creating alt text for images to help visually impaired users understand visual content. Analyze this image and provide a clear, concise description that captures the essential visual information.
|
435 |
|
|
|
441 |
|
442 |
Provide a descriptive alt text in 1-2 sentences that is informative but not overly verbose. Start directly with the description without saying "This image shows" or similar phrases."""
|
443 |
|
444 |
+
print(f"π Generating alt text for image with Gemma 3n...")
|
445 |
|
446 |
# Generate alt text using Gemini API with proper multimodal input
|
447 |
+
# Pass the PIL image directly - Gemini API handles PIL Image objects
|
448 |
response = model.generate_content([prompt, pil_image])
|
449 |
|
450 |
+
print(f"π‘ API response received: {type(response)}")
|
451 |
+
print(f"π‘ Response attributes: {dir(response)}")
|
452 |
+
|
453 |
if hasattr(response, 'text') and response.text:
|
454 |
alt_text = response.text.strip()
|
455 |
print(f"β
Alt text generated: {alt_text[:100]}...")
|
456 |
else:
|
457 |
+
print(f"β No text in response. Response: {response}")
|
458 |
+
# Try to access response differently
|
459 |
+
if hasattr(response, 'candidates') and response.candidates:
|
460 |
+
candidate = response.candidates[0]
|
461 |
+
if hasattr(candidate, 'content') and candidate.content:
|
462 |
+
if hasattr(candidate.content, 'parts') and candidate.content.parts:
|
463 |
+
alt_text = candidate.content.parts[0].text.strip()
|
464 |
+
print(f"β
Alt text from candidates: {alt_text[:100]}...")
|
465 |
+
else:
|
466 |
+
print(f"β No parts in content")
|
467 |
+
return "Image description unavailable"
|
468 |
+
else:
|
469 |
+
print(f"β No content in candidate")
|
470 |
+
return "Image description unavailable"
|
471 |
+
else:
|
472 |
+
print(f"β No candidates in response")
|
473 |
+
return "Image description unavailable"
|
474 |
|
475 |
# Clean up the alt text
|
476 |
alt_text = alt_text.replace('\n', ' ').replace('\r', ' ')
|