Spaces:

aiqtech
/

Nano-Banana-API

Running

App Files Files Community

aiqtech commited on 2 days ago

Commit

bb8a67b

verified ·

1 Parent(s): 6966316

Update app.py

Browse files

Files changed (1) hide show

app.py +106 -10

app.py CHANGED Viewed

@@ -42,34 +42,113 @@ def verify_pro_status(token: Optional[gr.OAuthToken]) -> bool:
 def _extract_image_data_from_response(response) -> Optional[bytes]:
     """Helper to extract image data from the model's response."""
     if hasattr(response, 'candidates') and response.candidates:
-        for candidate in response.candidates:
-            if hasattr(candidate, 'content') and hasattr(candidate.content, 'parts') and candidate.content.parts:
-                for part in candidate.content.parts:
-                    if hasattr(part, 'inline_data') and hasattr(part.inline_data, 'data'):
-                        return part.inline_data.data
     return None
 def run_single_image_logic(prompt: str, image_path: Optional[str] = None, progress=gr.Progress()) -> str:
     """Handles text-to-image or single image-to-image using Google Gemini."""
     try:
         progress(0.2, desc="🎨 준비 중...")
-        contents = [prompt]
         if image_path:
             input_image = Image.open(image_path)
             contents.append(input_image)
         progress(0.5, desc="✨ 생성 중...")
         response = client.models.generate_content(
             model=GEMINI_MODEL_NAME,
             contents=contents,
         )
         progress(0.8, desc="🖼️ 마무리 중...")
         image_data = _extract_image_data_from_response(response)
         if not image_data:
-            raise ValueError("No image data found in the model response.")
         # Save the generated image to a temporary file to return its path
         pil_image = Image.open(BytesIO(image_data))
@@ -79,6 +158,8 @@ def run_single_image_logic(prompt: str, image_path: Optional[str] = None, progre
             return tmpfile.name
     except Exception as e:
         raise gr.Error(f"이미지 생성 실패: {e}")
@@ -91,20 +172,34 @@ def run_multi_image_logic(prompt: str, images: List[str], progress=gr.Progress()
     try:
         progress(0.2, desc="🎨 이미지 준비 중...")
-        contents = [Image.open(image_path[0]) for image_path in images]
-        contents.append(prompt)
         progress(0.5, desc="✨ 생성 중...")
         response = client.models.generate_content(
             model=GEMINI_MODEL_NAME,
             contents=contents,
         )
         progress(0.8, desc="🖼️ 마무리 중...")
         image_data = _extract_image_data_from_response(response)
         if not image_data:
-            raise ValueError("No image data found in the model response.")
         pil_image = Image.open(BytesIO(image_data))
         with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmpfile:
@@ -113,6 +208,7 @@ def run_multi_image_logic(prompt: str, images: List[str], progress=gr.Progress()
             return tmpfile.name
     except Exception as e:
         raise gr.Error(f"이미지 생성 실패: {e}")

 def _extract_image_data_from_response(response) -> Optional[bytes]:
     """Helper to extract image data from the model's response."""
+    # Debug: Print response structure
+    print(f"Response type: {type(response)}")
+    # Try multiple ways to extract image data
+    # Method 1: Direct image attribute
+    if hasattr(response, 'image'):
+        print("Found response.image")
+        return response.image
+    # Method 2: Images array
+    if hasattr(response, 'images') and response.images:
+        print(f"Found response.images with {len(response.images)} images")
+        return response.images[0]
+    # Method 3: Candidates with parts
     if hasattr(response, 'candidates') and response.candidates:
+        print(f"Found {len(response.candidates)} candidates")
+        for i, candidate in enumerate(response.candidates):
+            print(f"Candidate {i}: {type(candidate)}")
+            # Check for content.parts
+            if hasattr(candidate, 'content'):
+                print(f"  Has content: {type(candidate.content)}")
+                if hasattr(candidate.content, 'parts') and candidate.content.parts:
+                    print(f"  Has {len(candidate.content.parts)} parts")
+                    for j, part in enumerate(candidate.content.parts):
+                        print(f"    Part {j}: {type(part)}")
+                        # Check for inline_data
+                        if hasattr(part, 'inline_data'):
+                            print(f"      Has inline_data")
+                            if hasattr(part.inline_data, 'data'):
+                                print(f"      Found image data!")
+                                return part.inline_data.data
+                            if hasattr(part.inline_data, 'blob'):
+                                print(f"      Found blob data!")
+                                return part.inline_data.blob
+                        # Check for blob directly
+                        if hasattr(part, 'blob'):
+                            print(f"      Has blob")
+                            return part.blob
+                        # Check for data directly
+                        if hasattr(part, 'data'):
+                            print(f"      Has data")
+                            return part.data
+    # Method 4: Text response (might need different API configuration)
+    if hasattr(response, 'text'):
+        print(f"Response has text but no image: {response.text[:200] if response.text else 'Empty'}")
+    print("No image data found in response")
     return None
 def run_single_image_logic(prompt: str, image_path: Optional[str] = None, progress=gr.Progress()) -> str:
     """Handles text-to-image or single image-to-image using Google Gemini."""
     try:
         progress(0.2, desc="🎨 준비 중...")
+        # Prepare the prompt with image generation instruction
+        generation_prompt = f"Generate an image: {prompt}"
+        contents = []
         if image_path:
+            # Image-to-image generation
             input_image = Image.open(image_path)
             contents.append(input_image)
+            contents.append(f"Edit this image: {prompt}")
+        else:
+            # Text-to-image generation
+            contents.append(generation_prompt)
         progress(0.5, desc="✨ 생성 중...")
+        # Try with generation config for images
+        generation_config = types.GenerationConfig(
+            temperature=1.0,
+            max_output_tokens=8192,
+        )
         response = client.models.generate_content(
             model=GEMINI_MODEL_NAME,
             contents=contents,
+            generation_config=generation_config,
         )
+        # Debug: Print full response
+        print(f"Full response: {response}")
         progress(0.8, desc="🖼️ 마무리 중...")
         image_data = _extract_image_data_from_response(response)
         if not image_data:
+            # Try alternative approach - generate_images if available
+            if hasattr(client.models, 'generate_images'):
+                print("Trying generate_images method...")
+                response = client.models.generate_images(
+                    model=GEMINI_MODEL_NAME,
+                    prompt=prompt,
+                    n=1,
+                )
+                if hasattr(response, 'images') and response.images:
+                    image_data = response.images[0]
+            if not image_data:
+                raise ValueError("No image data found in the model response. The API might not support image generation or the model name might be incorrect.")
         # Save the generated image to a temporary file to return its path
         pil_image = Image.open(BytesIO(image_data))
             return tmpfile.name
     except Exception as e:
+        print(f"Error details: {e}")
+        print(f"Error type: {type(e)}")
         raise gr.Error(f"이미지 생성 실패: {e}")
     try:
         progress(0.2, desc="🎨 이미지 준비 중...")
+        contents = []
+        for image_path in images:
+            if isinstance(image_path, (list, tuple)):
+                image_path = image_path[0]
+            contents.append(Image.open(image_path))
+        contents.append(f"Combine/edit these images: {prompt}")
         progress(0.5, desc="✨ 생성 중...")
+        generation_config = types.GenerationConfig(
+            temperature=1.0,
+            max_output_tokens=8192,
+        )
         response = client.models.generate_content(
             model=GEMINI_MODEL_NAME,
             contents=contents,
+            generation_config=generation_config,
         )
+        # Debug: Print full response
+        print(f"Multi-image response: {response}")
         progress(0.8, desc="🖼️ 마무리 중...")
         image_data = _extract_image_data_from_response(response)
         if not image_data:
+            raise ValueError("No image data found in the model response. The API might not support multi-image generation.")
         pil_image = Image.open(BytesIO(image_data))
         with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmpfile:
             return tmpfile.name
     except Exception as e:
+        print(f"Multi-image error details: {e}")
         raise gr.Error(f"이미지 생성 실패: {e}")