Noo88ear commited on
Commit
8833692
Β·
verified Β·
1 Parent(s): 5643ef9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -10
app.py CHANGED
@@ -127,7 +127,7 @@ def enhance_prompt_with_gemini(prompt: str, style: str) -> str:
127
  Return only the enhanced prompt without explanation.
128
  """
129
 
130
- model = genai.GenerativeModel('gemini-2.0-flash-exp')
131
  response = model.generate_content(enhancement_prompt)
132
  enhanced = response.text.strip()
133
 
@@ -172,9 +172,9 @@ def generate_marketing_image(prompt: str, style: str = "realistic") -> str:
172
  # Initialize the genai SDK client
173
  client = genai_sdk.Client(api_key=GOOGLE_API_KEY)
174
 
175
- # Generate image using Imagen 3 via SDK
176
  result = client.models.generate_images(
177
- model="imagen-3.0-generate-002",
178
  prompt=enhanced_prompt,
179
  config={
180
  "number_of_images": 1,
@@ -201,7 +201,8 @@ def generate_marketing_image(prompt: str, style: str = "realistic") -> str:
201
  "prompt": prompt,
202
  "enhanced_prompt": enhanced_prompt,
203
  "style": style,
204
- "generation_method": "google-genai-sdk",
 
205
  "real_ai_generation": True
206
  }
207
 
@@ -320,8 +321,8 @@ def analyze_marketing_image_with_gemini(image_url: str, prompt: str, review_guid
320
  logger.error("Could not load image for analysis")
321
  return _fallback_image_analysis(prompt, review_guidelines)
322
 
323
- # Generate analysis using Gemini Vision
324
- model = genai.GenerativeModel('gemini-2.0-flash-exp')
325
  response = model.generate_content([analysis_prompt, image])
326
  analysis_text = response.text
327
 
@@ -396,7 +397,8 @@ def _parse_gemini_analysis(analysis_text: str, original_prompt: str) -> Dict[str
396
  "language_issues": language_issues,
397
  "effectiveness_issues": effectiveness_issues,
398
  "recommendations": recommendations[:5], # Limit to top 5
399
- "analysis_method": "gemini_vision",
 
400
  "original_prompt": original_prompt
401
  }
402
 
@@ -647,14 +649,17 @@ def process_generated_image_and_results(api_response_str: str) -> Tuple[Image.Im
647
  review_method = metadata.get('review_method', 'unknown')
648
 
649
  generation_info = ""
650
- if generation_method == "google-genai-sdk":
651
- generation_info = "🎨 **Generated with**: Google Imagen3 SDK (Real AI)\n"
 
 
 
652
  elif generation_method == "placeholder":
653
  generation_info = "🎨 **Generated with**: Placeholder (Fallback)\n"
654
 
655
  review_method_info = ""
656
  if review_method == "gemini_vision":
657
- review_method_info = "πŸ” **Reviewed with**: Gemini Vision (AI Analysis)\n"
658
  elif review_method == "fallback_text":
659
  review_method_info = "πŸ” **Reviewed with**: Text Analysis (Fallback)\n"
660
 
@@ -852,9 +857,70 @@ with gr.Blocks(title="Marketing Image Generator MCP", theme=gr.themes.Soft()) as
852
  </div>
853
  """)
854
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
855
  if __name__ == "__main__":
856
  logger.info("πŸš€ Starting Marketing Image Generator with MCP Server")
857
  logger.info(f"πŸ”‘ Google AI: {'βœ… Configured' if GOOGLE_API_KEY else '❌ No API Key'}")
858
  logger.info("πŸ”— MCP Server will be available at /gradio_api/mcp/sse")
859
 
 
 
 
 
 
 
860
  demo.launch(mcp_server=True)
 
127
  Return only the enhanced prompt without explanation.
128
  """
129
 
130
+ model = genai.GenerativeModel('gemini-2.5-pro')
131
  response = model.generate_content(enhancement_prompt)
132
  enhanced = response.text.strip()
133
 
 
172
  # Initialize the genai SDK client
173
  client = genai_sdk.Client(api_key=GOOGLE_API_KEY)
174
 
175
+ # Generate image using Imagen 4.0 (best balance of quality and speed)
176
  result = client.models.generate_images(
177
+ model="imagen-4.0-generate-preview-06-06",
178
  prompt=enhanced_prompt,
179
  config={
180
  "number_of_images": 1,
 
201
  "prompt": prompt,
202
  "enhanced_prompt": enhanced_prompt,
203
  "style": style,
204
+ "generation_method": "imagen-4.0",
205
+ "model_name": "imagen-4.0-generate-preview-06-06",
206
  "real_ai_generation": True
207
  }
208
 
 
321
  logger.error("Could not load image for analysis")
322
  return _fallback_image_analysis(prompt, review_guidelines)
323
 
324
+ # Generate analysis using Gemini 2.5 Pro with enhanced multimodal understanding
325
+ model = genai.GenerativeModel('gemini-2.5-pro')
326
  response = model.generate_content([analysis_prompt, image])
327
  analysis_text = response.text
328
 
 
397
  "language_issues": language_issues,
398
  "effectiveness_issues": effectiveness_issues,
399
  "recommendations": recommendations[:5], # Limit to top 5
400
+ "analysis_method": "gemini-2.5-pro-vision",
401
+ "model_name": "gemini-2.5-pro",
402
  "original_prompt": original_prompt
403
  }
404
 
 
649
  review_method = metadata.get('review_method', 'unknown')
650
 
651
  generation_info = ""
652
+ if generation_method == "imagen-4.0":
653
+ model_name = metadata.get('model_name', 'imagen-4.0-generate-preview-06-06')
654
+ generation_info = f"🎨 **Generated with**: {model_name} (Real AI)\n"
655
+ elif generation_method == "google-genai-sdk":
656
+ generation_info = "🎨 **Generated with**: Google Imagen 4.0 (Real AI)\n"
657
  elif generation_method == "placeholder":
658
  generation_info = "🎨 **Generated with**: Placeholder (Fallback)\n"
659
 
660
  review_method_info = ""
661
  if review_method == "gemini_vision":
662
+ review_method_info = "πŸ” **Reviewed with**: Gemini 2.5 Pro Vision (AI Analysis)\n"
663
  elif review_method == "fallback_text":
664
  review_method_info = "πŸ” **Reviewed with**: Text Analysis (Fallback)\n"
665
 
 
857
  </div>
858
  """)
859
 
860
+ def test_imagen4_models():
861
+ """Test if Imagen 4.0 models are accessible"""
862
+ if not GEMINI_AVAILABLE or not GOOGLE_API_KEY:
863
+ logger.warning("❌ Cannot test Imagen 4.0 - Google AI not configured")
864
+ return []
865
+
866
+ imagen4_models = [
867
+ "imagen-4.0-generate-preview-06-06",
868
+ "imagen-4.0-fast-generate-preview-06-06",
869
+ "imagen-4.0-ultra-generate-preview-06-06"
870
+ ]
871
+
872
+ logger.info("πŸ§ͺ Testing Imagen 4.0 model access...")
873
+ working_models = []
874
+
875
+ for model_name in imagen4_models:
876
+ try:
877
+ logger.info(f"Testing {model_name}...")
878
+
879
+ client = genai_sdk.Client(api_key=GOOGLE_API_KEY)
880
+ result = client.models.generate_images(
881
+ model=model_name,
882
+ prompt="A simple red circle",
883
+ config={
884
+ "number_of_images": 1,
885
+ "output_mime_type": "image/png"
886
+ }
887
+ )
888
+
889
+ if result and hasattr(result, 'generated_images') and len(result.generated_images) > 0:
890
+ working_models.append(model_name)
891
+ logger.info(f"βœ… {model_name}: ACCESSIBLE")
892
+ else:
893
+ logger.warning(f"⚠️ {model_name}: No image returned")
894
+
895
+ except Exception as e:
896
+ error_msg = str(e)
897
+ if "404" in error_msg or "not found" in error_msg.lower():
898
+ logger.warning(f"⚠️ {model_name}: Model not found/available")
899
+ elif "403" in error_msg or "permission" in error_msg.lower():
900
+ logger.warning(f"⚠️ {model_name}: Permission denied")
901
+ else:
902
+ logger.error(f"❌ {model_name}: {error_msg}")
903
+
904
+ logger.info("=" * 50)
905
+ logger.info(f"πŸŽ‰ ACCESSIBLE IMAGEN 4.0 MODELS: {len(working_models)}")
906
+ for model in working_models:
907
+ logger.info(f" βœ… {model}")
908
+
909
+ if not working_models:
910
+ logger.info(" ⚠️ No Imagen 4.0 models accessible - continuing with Imagen 3.0")
911
+
912
+ logger.info("=" * 50)
913
+ return working_models
914
+
915
  if __name__ == "__main__":
916
  logger.info("πŸš€ Starting Marketing Image Generator with MCP Server")
917
  logger.info(f"πŸ”‘ Google AI: {'βœ… Configured' if GOOGLE_API_KEY else '❌ No API Key'}")
918
  logger.info("πŸ”— MCP Server will be available at /gradio_api/mcp/sse")
919
 
920
+ # Test Imagen 4.0 model access on startup
921
+ if GOOGLE_API_KEY:
922
+ accessible_imagen4_models = test_imagen4_models()
923
+ if accessible_imagen4_models:
924
+ logger.info(f"πŸ’‘ UPGRADE OPPORTUNITY: You can use {accessible_imagen4_models[0]} for better quality!")
925
+
926
  demo.launch(mcp_server=True)