Manireddy1508 commited on
Commit
045059f
Β·
verified Β·
1 Parent(s): 8cafce9

Update utils/planner.py

Browse files
Files changed (1) hide show
  1. utils/planner.py +13 -15
utils/planner.py CHANGED
@@ -1,11 +1,12 @@
 
 
1
  import os
2
  import json
3
  from dotenv import load_dotenv
4
  from openai import OpenAI
5
  from PIL import Image
6
  import torch
7
- from transformers import BlipProcessor, BlipForConditionalGeneration
8
- from transformers import CLIPTokenizer
9
 
10
  # ----------------------------
11
  # πŸ” Load Environment & GPT Client
@@ -14,17 +15,16 @@ load_dotenv()
14
  client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
15
 
16
  # ----------------------------
17
- # 🧠 Load BLIP Captioning Model
18
  # ----------------------------
19
  device = "cuda" if torch.cuda.is_available() else "cpu"
20
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
21
  blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
22
- tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
23
 
24
  # ----------------------------
25
- # πŸ“ Log File
26
  # ----------------------------
27
-
28
  LOG_PATH = "logs/prompt_log.jsonl"
29
  os.makedirs(os.path.dirname(LOG_PATH), exist_ok=True)
30
 
@@ -92,7 +92,7 @@ def generate_prompt_variations_from_scene(scene_plan: dict, base_prompt: str, n:
92
  system_msg = f"""
93
  You are a creative prompt variation generator for an AI image generation system.
94
  Given a base user prompt and its structured scene plan, generate {n} diverse image generation prompts.
95
- Each prompt should:
96
  - Be visually rich and descriptive
97
  - Include stylistic or contextual variation
98
  - Reference the same product and environment
@@ -117,14 +117,13 @@ Respond ONLY with a JSON array of strings. No explanations.
117
  content = response.choices[0].message.content
118
  all_prompts = json.loads(content)
119
 
120
- # Enforce token limit using CLIP tokenizer
121
  filtered = []
122
  for p in all_prompts:
123
- tokens = clip_tokenizer(p)["input_ids"]
124
- if len(tokens) <= 77:
125
  filtered.append(p)
126
- print("🧠 Filtered Prompts (<=77 tokens):", filtered)
127
 
 
128
  return filtered or [base_prompt]
129
 
130
  except Exception as e:
@@ -140,8 +139,7 @@ def generate_negative_prompt_from_scene(scene_plan: dict) -> str:
140
  You are an assistant that generates negative prompts for an image generation model.
141
  Based on the structured scene plan, return a list of things that should NOT appear in the image,
142
  such as incorrect objects, extra limbs, distorted hands, text, watermark, etc.
143
- Return a single negative prompt string (comma-separated values).
144
- No explanations.
145
  """
146
 
147
  response = client.chat.completions.create(
@@ -162,9 +160,8 @@ No explanations.
162
  print("❌ generate_negative_prompt_from_scene() Error:", e)
163
  return "deformed hands, extra limbs, text, watermark, signature"
164
 
165
-
166
  # ----------------------------
167
- # πŸ“ Save Logs for Analysis
168
  # ----------------------------
169
  def save_generation_log(caption, scene_plan, prompts, negative_prompt):
170
  log = {
@@ -175,3 +172,4 @@ def save_generation_log(caption, scene_plan, prompts, negative_prompt):
175
  }
176
  with open(LOG_PATH, "a") as f:
177
  f.write(json.dumps(log, indent=2) + "\n")
 
 
1
+ # utils/planner.py
2
+
3
  import os
4
  import json
5
  from dotenv import load_dotenv
6
  from openai import OpenAI
7
  from PIL import Image
8
  import torch
9
+ from transformers import BlipProcessor, BlipForConditionalGeneration, CLIPTokenizer
 
10
 
11
  # ----------------------------
12
  # πŸ” Load Environment & GPT Client
 
15
  client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
16
 
17
  # ----------------------------
18
+ # 🧠 Load BLIP & CLIP Tokenizer
19
  # ----------------------------
20
  device = "cuda" if torch.cuda.is_available() else "cpu"
21
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
22
  blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
23
+ clip_tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
24
 
25
  # ----------------------------
26
+ # πŸ“ Log Path
27
  # ----------------------------
 
28
  LOG_PATH = "logs/prompt_log.jsonl"
29
  os.makedirs(os.path.dirname(LOG_PATH), exist_ok=True)
30
 
 
92
  system_msg = f"""
93
  You are a creative prompt variation generator for an AI image generation system.
94
  Given a base user prompt and its structured scene plan, generate {n} diverse image generation prompts.
95
+ Each prompt must:
96
  - Be visually rich and descriptive
97
  - Include stylistic or contextual variation
98
  - Reference the same product and environment
 
117
  content = response.choices[0].message.content
118
  all_prompts = json.loads(content)
119
 
 
120
  filtered = []
121
  for p in all_prompts:
122
+ token_count = len(clip_tokenizer(p)["input_ids"])
123
+ if token_count <= 77:
124
  filtered.append(p)
 
125
 
126
+ print("🧠 Filtered Prompts (<=77 tokens):", filtered)
127
  return filtered or [base_prompt]
128
 
129
  except Exception as e:
 
139
  You are an assistant that generates negative prompts for an image generation model.
140
  Based on the structured scene plan, return a list of things that should NOT appear in the image,
141
  such as incorrect objects, extra limbs, distorted hands, text, watermark, etc.
142
+ Return a single negative prompt string (comma-separated values). No explanations.
 
143
  """
144
 
145
  response = client.chat.completions.create(
 
160
  print("❌ generate_negative_prompt_from_scene() Error:", e)
161
  return "deformed hands, extra limbs, text, watermark, signature"
162
 
 
163
  # ----------------------------
164
+ # πŸ“ Save Logs
165
  # ----------------------------
166
  def save_generation_log(caption, scene_plan, prompts, negative_prompt):
167
  log = {
 
172
  }
173
  with open(LOG_PATH, "a") as f:
174
  f.write(json.dumps(log, indent=2) + "\n")
175
+