Spaces:

Manireddy1508
/

imagetoimage

Paused

App Files Files Community

Manireddy1508 commited on Apr 7

Commit

8cafce9

verified ·

1 Parent(s): 34cc19a

Update utils/planner.py

Browse files

Files changed (1) hide show

utils/planner.py +35 -4

utils/planner.py CHANGED Viewed

@@ -5,6 +5,7 @@ from openai import OpenAI
 from PIL import Image
 import torch
 from transformers import BlipProcessor, BlipForConditionalGeneration
 # ----------------------------
 # 🔐 Load Environment & GPT Client
@@ -18,6 +19,14 @@ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 device = "cuda" if torch.cuda.is_available() else "cpu"
 processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
 blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
 # ----------------------------
 # 📸 Generate Caption from Image
@@ -76,7 +85,7 @@ def extract_scene_plan(prompt: str, image: Image.Image) -> dict:
         }
 # ----------------------------
-# 🧠 Generate Positive Prompt Variations
 # ----------------------------
 def generate_prompt_variations_from_scene(scene_plan: dict, base_prompt: str, n: int = 3) -> list:
     try:
@@ -88,8 +97,10 @@ Each prompt should:
 - Include stylistic or contextual variation
 - Reference the same product and environment
 - Stay faithful to the base prompt and extracted plan
 Respond ONLY with a JSON array of strings. No explanations.
 """
         response = client.chat.completions.create(
             model="gpt-4o-mini-2024-07-18",
             messages=[
@@ -104,8 +115,17 @@ Respond ONLY with a JSON array of strings. No explanations.
         )
         content = response.choices[0].message.content
-        print("🧠 Prompt Variations (Raw):", content)
-        return json.loads(content)
     except Exception as e:
         print("❌ generate_prompt_variations_from_scene() Error:", e)
@@ -143,4 +163,15 @@ No explanations.
         return "deformed hands, extra limbs, text, watermark, signature"

 from PIL import Image
 import torch
 from transformers import BlipProcessor, BlipForConditionalGeneration
+from transformers import CLIPTokenizer
 # ----------------------------
 # 🔐 Load Environment & GPT Client
 device = "cuda" if torch.cuda.is_available() else "cpu"
 processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
 blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
+tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
+# ----------------------------
+# 📁 Log File
+# ----------------------------
+LOG_PATH = "logs/prompt_log.jsonl"
+os.makedirs(os.path.dirname(LOG_PATH), exist_ok=True)
 # ----------------------------
 # 📸 Generate Caption from Image
         }
 # ----------------------------
+# 🧠 Generate Positive Prompt Variations (CLIP-safe)
 # ----------------------------
 def generate_prompt_variations_from_scene(scene_plan: dict, base_prompt: str, n: int = 3) -> list:
     try:
 - Include stylistic or contextual variation
 - Reference the same product and environment
 - Stay faithful to the base prompt and extracted plan
+- Be under 77 tokens when tokenized using a CLIP tokenizer
 Respond ONLY with a JSON array of strings. No explanations.
 """
         response = client.chat.completions.create(
             model="gpt-4o-mini-2024-07-18",
             messages=[
         )
         content = response.choices[0].message.content
+        all_prompts = json.loads(content)
+        # Enforce token limit using CLIP tokenizer
+        filtered = []
+        for p in all_prompts:
+            tokens = clip_tokenizer(p)["input_ids"]
+            if len(tokens) <= 77:
+                filtered.append(p)
+        print("🧠 Filtered Prompts (<=77 tokens):", filtered)
+        return filtered or [base_prompt]
     except Exception as e:
         print("❌ generate_prompt_variations_from_scene() Error:", e)
         return "deformed hands, extra limbs, text, watermark, signature"
+# ----------------------------
+# 📝 Save Logs for Analysis
+# ----------------------------
+def save_generation_log(caption, scene_plan, prompts, negative_prompt):
+    log = {
+        "blip_caption": caption,
+        "scene_plan": scene_plan,
+        "enriched_prompts": prompts,
+        "negative_prompt": negative_prompt
+    }
+    with open(LOG_PATH, "a") as f:
+        f.write(json.dumps(log, indent=2) + "\n")