Noo88ear commited on
Commit
324a80f
Β·
verified Β·
1 Parent(s): 4eef2fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +227 -119
app.py CHANGED
@@ -1,4 +1,4 @@
1
- """
2
  Marketing Image Generator with Gradio MCP Server
3
  Professional AI image generation using Google Imagen3 with marketing review
4
  Deployed on HuggingFace Spaces with built-in MCP server support
@@ -10,7 +10,7 @@ import logging
10
  import json
11
  import base64
12
  import asyncio
13
- from typing import Dict, Any, Tuple
14
  from PIL import Image
15
  import io
16
 
@@ -23,8 +23,13 @@ def setup_google_credentials():
23
  import tempfile
24
  from google.oauth2 import service_account
25
 
26
- # Parse the JSON credentials
27
- credentials_dict = json.loads(service_account_json)
 
 
 
 
 
28
 
29
  # Create credentials from service account info
30
  credentials = service_account.Credentials.from_service_account_info(credentials_dict)
@@ -236,140 +241,223 @@ def generate_marketing_image(prompt: str, style: str = "realistic") -> str:
236
  "style": style
237
  })
238
 
239
- def analyze_marketing_prompt(prompt: str, review_guidelines: str = "") -> str:
240
  """
241
- Analyze a marketing prompt for quality, relevance, and compliance.
242
 
243
  Args:
244
- prompt (str): The marketing prompt to analyze
 
245
  review_guidelines (str): Specific guidelines to check against
246
 
247
  Returns:
248
  str: JSON string containing detailed analysis and recommendations
249
  """
250
- logger.info(f"πŸ” Analyzing marketing prompt: {prompt[:50]}...")
 
 
 
 
251
 
252
  try:
253
- word_count = len(prompt.split())
254
-
255
- # Check for marketing-specific elements
256
- marketing_keywords = [
257
- "professional", "corporate", "business", "marketing", "brand", "commercial",
258
- "office", "team", "collaboration", "presentation", "meeting", "workplace",
259
- "customer", "service", "product", "showcase", "display", "advertising"
260
- ]
261
-
262
- style_keywords = [
263
- "realistic", "photographic", "artistic", "creative", "modern", "clean",
264
- "minimalist", "professional", "high-quality", "detailed", "sharp"
265
- ]
266
-
267
- composition_keywords = [
268
- "lighting", "composition", "background", "foreground", "perspective",
269
- "angle", "framing", "focus", "depth", "contrast", "colors"
270
- ]
271
-
272
- # Count keyword categories
273
- marketing_score = sum(1 for word in marketing_keywords if word.lower() in prompt.lower()) / len(marketing_keywords)
274
- style_score = sum(1 for word in style_keywords if word.lower() in prompt.lower()) / len(style_keywords)
275
- composition_score = sum(1 for word in composition_keywords if word.lower() in prompt.lower()) / len(composition_keywords)
276
-
277
- # Base quality assessment
278
- if word_count < 5:
279
- base_quality = 0.3
280
- quality_issues = ["Prompt is too short and lacks detail"]
281
- elif word_count < 10:
282
- base_quality = 0.5
283
- quality_issues = ["Prompt could benefit from more descriptive details"]
284
- elif word_count < 20:
285
- base_quality = 0.7
286
- quality_issues = []
287
- elif word_count < 40:
288
- base_quality = 0.8
289
- quality_issues = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
  else:
291
- base_quality = 0.6
292
- quality_issues = ["Prompt might be too complex - consider simplifying"]
293
-
294
- # Adjust based on keyword presence
295
- quality_adjustment = (marketing_score * 0.2 + style_score * 0.15 + composition_score * 0.15)
296
- final_quality = min(1.0, base_quality + quality_adjustment)
297
-
298
- # Generate specific feedback
299
- missing_elements = []
300
- if marketing_score < 0.1:
301
- missing_elements.append("marketing context or business relevance")
302
- if style_score < 0.1:
303
- missing_elements.append("artistic style or visual quality descriptors")
304
- if "english" in review_guidelines.lower() and "english" not in prompt.lower():
305
- missing_elements.append("English language specification for text/signage")
306
-
307
- present_elements = []
308
- if marketing_score > 0.1:
309
- present_elements.append("marketing/business context")
310
- if style_score > 0.1:
311
- present_elements.append("style descriptors")
312
- if composition_score > 0.1:
313
- present_elements.append("composition guidance")
314
-
315
- # Calculate overall scores
316
- relevance_score = min(1.0, final_quality + (marketing_score * 0.2))
317
- safety_score = 0.95 # Generally high for marketing prompts
318
-
319
- # Check for potentially problematic content
320
- problematic_terms = ["violence", "inappropriate", "offensive", "controversial"]
321
- for term in problematic_terms:
322
- if term in prompt.lower():
323
- safety_score = 0.7
324
- break
325
-
326
- overall_score = (final_quality * 0.4 + relevance_score * 0.4 + safety_score * 0.2)
327
-
328
- # Generate recommendations
329
- recommendations = []
330
-
331
- if final_quality < 0.6:
332
- recommendations.append("Consider adding more descriptive details about the desired image")
333
-
334
- if marketing_score < 0.1:
335
- recommendations.append("Add marketing context (e.g., professional, business, corporate)")
336
-
337
- if "english" in review_guidelines.lower() and "english" not in prompt.lower():
338
- recommendations.append("Add 'English signage' or 'English text' to meet language requirements")
339
-
340
- if word_count < 10:
341
- recommendations.append("Expand prompt with lighting, composition, or environmental details")
342
- elif word_count > 50:
343
- recommendations.append("Consider simplifying prompt while keeping key elements")
344
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
  if not recommendations:
346
  if overall_score > 0.8:
347
- recommendations.append("Excellent prompt! Should generate high-quality marketing image")
 
 
348
  else:
349
- recommendations.append("Good prompt foundation - image should meet basic requirements")
350
 
351
- analysis_result = {
352
  "success": True,
353
- "quality_score": round(final_quality, 2),
354
- "relevance_score": round(relevance_score, 2),
355
- "safety_score": round(safety_score, 2),
356
  "overall_score": round(overall_score, 2),
357
- "word_count": word_count,
 
 
 
358
  "missing_elements": missing_elements,
359
  "present_elements": present_elements,
360
- "recommendations": recommendations[:5],
361
- "analysis_method": "prompt_analysis"
 
 
 
 
362
  }
363
 
364
- return json.dumps(analysis_result)
365
-
366
  except Exception as e:
367
- logger.error(f"Prompt analysis failed: {e}")
368
- return json.dumps({
369
- "success": False,
370
- "error": f"Analysis failed: {str(e)}",
371
- "prompt": prompt
372
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
 
374
  def generate_and_review_marketing_image(prompt: str, style: str = "realistic", review_guidelines: str = "") -> str:
375
  """
@@ -393,8 +481,9 @@ def generate_and_review_marketing_image(prompt: str, style: str = "realistic", r
393
  if not generation_data.get("success", False):
394
  return generation_response # Return error
395
 
396
- # Step 2: Analyze the prompt (marketing review)
397
- analysis_response = analyze_marketing_prompt(prompt, review_guidelines)
 
398
  analysis_data = json.loads(analysis_response)
399
 
400
  # Combine results
@@ -417,6 +506,7 @@ def generate_and_review_marketing_image(prompt: str, style: str = "realistic", r
417
  "metadata": {
418
  "generation_method": generation_data.get("generation_method", "unknown"),
419
  "real_ai_generation": generation_data.get("real_ai_generation", False),
 
420
  "workflow_type": "gradio_mcp_server"
421
  }
422
  }
@@ -467,6 +557,7 @@ def process_generated_image_and_results(api_response_str: str) -> Tuple[Image.Im
467
 
468
  # Extract review data
469
  review_data = response_data.get('review', {})
 
470
 
471
  if review_data:
472
  quality_score = review_data.get('quality_score', 0)
@@ -476,9 +567,10 @@ def process_generated_image_and_results(api_response_str: str) -> Tuple[Image.Im
476
 
477
  status_emoji = "🟒" if passed else "πŸ”΄"
478
 
479
- # Extract metadata about generation method
480
  metadata = response_data.get('metadata', {})
481
  generation_method = metadata.get('generation_method', 'unknown')
 
482
 
483
  generation_info = ""
484
  if generation_method == "google-genai-sdk":
@@ -486,13 +578,29 @@ def process_generated_image_and_results(api_response_str: str) -> Tuple[Image.Im
486
  elif generation_method == "placeholder":
487
  generation_info = "🎨 **Generated with**: Placeholder (Fallback)\n"
488
 
 
 
 
 
 
 
 
 
 
 
 
489
  review_text = f"""**πŸ” Marketing Review Results**
490
 
491
- {generation_info}
492
  **Quality Score:** {quality_score:.2f}/1.0
493
  **Status:** {status_emoji} {final_status.upper()}
494
  **Architecture:** Gradio MCP Server
495
 
 
 
 
 
 
496
  **πŸ’‘ Recommendations:**
497
  """
498
 
 
1
+ """
2
  Marketing Image Generator with Gradio MCP Server
3
  Professional AI image generation using Google Imagen3 with marketing review
4
  Deployed on HuggingFace Spaces with built-in MCP server support
 
10
  import json
11
  import base64
12
  import asyncio
13
+ from typing import Dict, Any, Tuple, List
14
  from PIL import Image
15
  import io
16
 
 
23
  import tempfile
24
  from google.oauth2 import service_account
25
 
26
+ # Clean and parse the JSON credentials
27
+ # Remove common problematic characters
28
+ cleaned_json = service_account_json.strip()
29
+ # Replace common escape sequences
30
+ cleaned_json = cleaned_json.replace('\\n', '\n').replace('\\t', '\t').replace('\\r', '\r')
31
+
32
+ credentials_dict = json.loads(cleaned_json)
33
 
34
  # Create credentials from service account info
35
  credentials = service_account.Credentials.from_service_account_info(credentials_dict)
 
241
  "style": style
242
  })
243
 
244
+ def analyze_marketing_image_with_gemini(image_url: str, prompt: str, review_guidelines: str = "") -> str:
245
  """
246
+ Analyze a generated marketing image using Gemini Vision for quality, relevance, and compliance.
247
 
248
  Args:
249
+ image_url (str): URL or base64 data of the generated image
250
+ prompt (str): The original marketing prompt used to generate the image
251
  review_guidelines (str): Specific guidelines to check against
252
 
253
  Returns:
254
  str: JSON string containing detailed analysis and recommendations
255
  """
256
+ logger.info(f"πŸ” Analyzing marketing image with Gemini Vision: {prompt[:50]}...")
257
+
258
+ if not GEMINI_AVAILABLE or not GOOGLE_API_KEY:
259
+ logger.warning("Gemini Vision not available, using fallback analysis")
260
+ return _fallback_image_analysis(prompt, review_guidelines)
261
 
262
  try:
263
+ # Create a detailed prompt for marketing image analysis
264
+ analysis_prompt = f"""
265
+ You are a Marketing Image Reviewer analyzing this image generated from: "{prompt}"
266
+
267
+ CUSTOM REVIEW GUIDELINES (HIGHEST PRIORITY):
268
+ {review_guidelines if review_guidelines.strip() else 'No specific guidelines provided - use standard marketing criteria'}
269
+
270
+ CRITICAL MARKETING CHECKS:
271
+ 1. **Language/Text Requirements**: If guidelines mention "English" or specific language requirements, verify ALL visible text matches
272
+ 2. **Brand Compliance**: Check professional appearance, color consistency, readability
273
+ 3. **Marketing Effectiveness**: Assess visual appeal and message clarity
274
+ 4. **Target Audience**: Evaluate cultural appropriateness and accessibility
275
+
276
+ Evaluate on these marketing criteria:
277
+ 1. **Marketing Quality**: Visual appeal, composition, professional appearance (0.0 to 1.0)
278
+ 2. **Brand/Prompt Compliance**: How well it matches requirements and guidelines (0.0 to 1.0)
279
+ 3. **Marketing Effectiveness**: Message clarity, target audience appeal (0.0 to 1.0)
280
+
281
+ RESPONSE FORMAT:
282
+ Marketing Quality Score: [0.0-1.0]
283
+ Brand Compliance Score: [0.0-1.0]
284
+ Marketing Effectiveness Score: [0.0-1.0]
285
+
286
+ Guideline Violations: [List specific violations of user guidelines, especially language/text requirements]
287
+ Missing Elements: [List prompt elements missing from image]
288
+ Present Elements: [List prompt elements correctly represented]
289
+
290
+ Marketing Issues: [Brand compliance, readability, professional appearance problems]
291
+ Language/Text Issues: [Specific text/signage language violations if any]
292
+ Effectiveness Issues: [Marketing message clarity and appeal problems]
293
+
294
+ Marketing Recommendations: [Specific marketing-focused improvement suggestions]
295
+
296
+ CRITICAL: If guidelines specify English text/signage, explicitly check and report on ALL visible text language compliance.
297
+ """
298
+
299
+ # Load the image
300
+ image = None
301
+ if image_url.startswith('data:image'):
302
+ # Handle base64 data URLs
303
+ base64_data = image_url.split(',')[1]
304
+ image_bytes = base64.b64decode(base64_data)
305
+ image = Image.open(io.BytesIO(image_bytes))
306
+ elif image_url.startswith('http'):
307
+ # Handle regular URLs
308
+ import requests
309
+ response = requests.get(image_url, timeout=10)
310
+ if response.status_code == 200:
311
+ image = Image.open(io.BytesIO(response.content))
312
+ else:
313
+ logger.error(f"Failed to fetch image from URL: {response.status_code}")
314
+ return _fallback_image_analysis(prompt, review_guidelines)
315
  else:
316
+ logger.error("Invalid image URL format")
317
+ return _fallback_image_analysis(prompt, review_guidelines)
318
+
319
+ if not image:
320
+ logger.error("Could not load image for analysis")
321
+ return _fallback_image_analysis(prompt, review_guidelines)
322
+
323
+ # Generate analysis using Gemini Vision
324
+ model = genai.GenerativeModel('gemini-1.5-flash')
325
+ response = model.generate_content([analysis_prompt, image])
326
+ analysis_text = response.text
327
+
328
+ # Parse the analysis response
329
+ parsed_result = _parse_gemini_analysis(analysis_text, prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
 
331
+ logger.info(f"βœ… Gemini Vision analysis completed with score: {parsed_result.get('overall_score', 0)}")
332
+ return json.dumps(parsed_result)
333
+
334
+ except Exception as e:
335
+ logger.error(f"Error in Gemini Vision analysis: {str(e)}")
336
+ return _fallback_image_analysis(prompt, review_guidelines)
337
+
338
+ def _parse_gemini_analysis(analysis_text: str, original_prompt: str) -> Dict[str, Any]:
339
+ """Parse Gemini Vision analysis response"""
340
+ try:
341
+ # Extract scores using regex patterns
342
+ import re
343
+
344
+ def extract_score(text: str, score_type: str) -> float:
345
+ pattern = rf"{score_type}.*?Score:\s*([0-9]*\.?[0-9]+)"
346
+ match = re.search(pattern, text, re.IGNORECASE)
347
+ if match:
348
+ return float(match.group(1))
349
+ return 0.7 # Default score
350
+
351
+ def extract_list_items(text: str, section: str) -> List[str]:
352
+ pattern = rf"{section}:\s*\[(.*?)\]"
353
+ match = re.search(pattern, text, re.IGNORECASE | re.DOTALL)
354
+ if match:
355
+ items_text = match.group(1).strip()
356
+ if items_text:
357
+ return [item.strip() for item in items_text.split(',') if item.strip()]
358
+ return []
359
+
360
+ # Extract scores
361
+ marketing_quality = extract_score(analysis_text, "Marketing Quality")
362
+ brand_compliance = extract_score(analysis_text, "Brand Compliance")
363
+ marketing_effectiveness = extract_score(analysis_text, "Marketing Effectiveness")
364
+
365
+ # Calculate overall score
366
+ overall_score = (marketing_quality * 0.4 + brand_compliance * 0.4 + marketing_effectiveness * 0.2)
367
+
368
+ # Extract lists
369
+ violations = extract_list_items(analysis_text, "Guideline Violations")
370
+ missing_elements = extract_list_items(analysis_text, "Missing Elements")
371
+ present_elements = extract_list_items(analysis_text, "Present Elements")
372
+ marketing_issues = extract_list_items(analysis_text, "Marketing Issues")
373
+ language_issues = extract_list_items(analysis_text, "Language/Text Issues")
374
+ effectiveness_issues = extract_list_items(analysis_text, "Effectiveness Issues")
375
+ recommendations = extract_list_items(analysis_text, "Marketing Recommendations")
376
+
377
+ # Generate recommendations if none found
378
  if not recommendations:
379
  if overall_score > 0.8:
380
+ recommendations = ["Excellent marketing image! Meets all quality standards"]
381
+ elif overall_score > 0.6:
382
+ recommendations = ["Good marketing image with minor improvements needed"]
383
  else:
384
+ recommendations = ["Image needs significant improvements for marketing use"]
385
 
386
+ return {
387
  "success": True,
 
 
 
388
  "overall_score": round(overall_score, 2),
389
+ "marketing_quality": round(marketing_quality, 2),
390
+ "brand_compliance": round(brand_compliance, 2),
391
+ "marketing_effectiveness": round(marketing_effectiveness, 2),
392
+ "violations": violations,
393
  "missing_elements": missing_elements,
394
  "present_elements": present_elements,
395
+ "marketing_issues": marketing_issues,
396
+ "language_issues": language_issues,
397
+ "effectiveness_issues": effectiveness_issues,
398
+ "recommendations": recommendations[:5], # Limit to top 5
399
+ "analysis_method": "gemini_vision",
400
+ "original_prompt": original_prompt
401
  }
402
 
 
 
403
  except Exception as e:
404
+ logger.error(f"Error parsing Gemini analysis: {str(e)}")
405
+ return _fallback_image_analysis(original_prompt, "")
406
+
407
+ def _fallback_image_analysis(prompt: str, review_guidelines: str) -> str:
408
+ """Fallback analysis when Gemini Vision is not available"""
409
+ logger.info("Using fallback text-based analysis")
410
+
411
+ # Basic analysis based on prompt and guidelines
412
+ word_count = len(prompt.split())
413
+
414
+ # Simple scoring based on prompt quality
415
+ if word_count < 10:
416
+ quality_score = 0.5
417
+ elif word_count < 20:
418
+ quality_score = 0.7
419
+ else:
420
+ quality_score = 0.8
421
+
422
+ # Check for marketing keywords
423
+ marketing_keywords = ["professional", "corporate", "business", "marketing", "brand"]
424
+ marketing_score = sum(1 for word in marketing_keywords if word.lower() in prompt.lower()) / len(marketing_keywords)
425
+
426
+ # Check for language requirements
427
+ language_issues = []
428
+ if "english" in review_guidelines.lower() and "english" not in prompt.lower():
429
+ language_issues.append("English language requirement not specified in prompt")
430
+
431
+ # Generate recommendations
432
+ recommendations = []
433
+ if marketing_score < 0.2:
434
+ recommendations.append("Add marketing context (e.g., professional, business, corporate)")
435
+ if language_issues:
436
+ recommendations.extend(language_issues)
437
+ if word_count < 10:
438
+ recommendations.append("Expand prompt with more descriptive details")
439
+
440
+ if not recommendations:
441
+ recommendations = ["Image should meet basic marketing requirements"]
442
+
443
+ overall_score = (quality_score * 0.6 + marketing_score * 0.4)
444
+
445
+ return json.dumps({
446
+ "success": True,
447
+ "overall_score": round(overall_score, 2),
448
+ "marketing_quality": round(quality_score, 2),
449
+ "brand_compliance": round(marketing_score, 2),
450
+ "marketing_effectiveness": round(overall_score, 2),
451
+ "violations": language_issues,
452
+ "missing_elements": [],
453
+ "present_elements": [],
454
+ "marketing_issues": [],
455
+ "language_issues": language_issues,
456
+ "effectiveness_issues": [],
457
+ "recommendations": recommendations,
458
+ "analysis_method": "fallback_text",
459
+ "original_prompt": prompt
460
+ })
461
 
462
  def generate_and_review_marketing_image(prompt: str, style: str = "realistic", review_guidelines: str = "") -> str:
463
  """
 
481
  if not generation_data.get("success", False):
482
  return generation_response # Return error
483
 
484
+ # Step 2: Analyze the generated image with Gemini Vision
485
+ image_url = generation_data.get("image_url", "")
486
+ analysis_response = analyze_marketing_image_with_gemini(image_url, prompt, review_guidelines)
487
  analysis_data = json.loads(analysis_response)
488
 
489
  # Combine results
 
506
  "metadata": {
507
  "generation_method": generation_data.get("generation_method", "unknown"),
508
  "real_ai_generation": generation_data.get("real_ai_generation", False),
509
+ "review_method": analysis_data.get("analysis_method", "unknown"),
510
  "workflow_type": "gradio_mcp_server"
511
  }
512
  }
 
557
 
558
  # Extract review data
559
  review_data = response_data.get('review', {})
560
+ analysis_details = review_data.get('analysis_details', {})
561
 
562
  if review_data:
563
  quality_score = review_data.get('quality_score', 0)
 
567
 
568
  status_emoji = "🟒" if passed else "πŸ”΄"
569
 
570
+ # Extract metadata about generation and review methods
571
  metadata = response_data.get('metadata', {})
572
  generation_method = metadata.get('generation_method', 'unknown')
573
+ review_method = metadata.get('review_method', 'unknown')
574
 
575
  generation_info = ""
576
  if generation_method == "google-genai-sdk":
 
578
  elif generation_method == "placeholder":
579
  generation_info = "🎨 **Generated with**: Placeholder (Fallback)\n"
580
 
581
+ review_method_info = ""
582
+ if review_method == "gemini_vision":
583
+ review_method_info = "πŸ” **Reviewed with**: Gemini Vision (AI Analysis)\n"
584
+ elif review_method == "fallback_text":
585
+ review_method_info = "πŸ” **Reviewed with**: Text Analysis (Fallback)\n"
586
+
587
+ # Get detailed scores from analysis
588
+ marketing_quality = analysis_details.get('marketing_quality', quality_score)
589
+ brand_compliance = analysis_details.get('brand_compliance', quality_score)
590
+ marketing_effectiveness = analysis_details.get('marketing_effectiveness', quality_score)
591
+
592
  review_text = f"""**πŸ” Marketing Review Results**
593
 
594
+ {generation_info}{review_method_info}
595
  **Quality Score:** {quality_score:.2f}/1.0
596
  **Status:** {status_emoji} {final_status.upper()}
597
  **Architecture:** Gradio MCP Server
598
 
599
+ **πŸ“Š Detailed Scores:**
600
+ β€’ Marketing Quality: {marketing_quality:.2f}/1.0
601
+ β€’ Brand Compliance: {brand_compliance:.2f}/1.0
602
+ β€’ Marketing Effectiveness: {marketing_effectiveness:.2f}/1.0
603
+
604
  **πŸ’‘ Recommendations:**
605
  """
606