prthm11 commited on
Commit
a175429
·
verified ·
1 Parent(s): ed4b853

Update app_main.py

Browse files
Files changed (1) hide show
  1. app_main.py +168 -1
app_main.py CHANGED
@@ -235,6 +235,171 @@ def extract_images_from_pdf(pdf_path, output_json_path):
235
  except Exception as e:
236
  raise RuntimeError(f"❌ Error in extract_images_from_pdf: {str(e)}")
237
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  @app.route('/')
239
  def index():
240
  return render_template('app_index.html')
@@ -264,12 +429,14 @@ def process_pdf():
264
  json_path = None
265
  output_path, result = extract_images_from_pdf(saved_pdf_path, json_path)
266
 
 
267
  logger.info("Received request to process PDF.")
268
 
269
  return jsonify({
270
  "message": "✅ PDF processed successfully",
271
  "output_json": output_path,
272
- "sprites": result
 
273
  })
274
  except Exception as e:
275
  logger.exception("❌ Failed to process PDF")
 
235
  except Exception as e:
236
  raise RuntimeError(f"❌ Error in extract_images_from_pdf: {str(e)}")
237
 
238
+ def similarity_matching(input_json_path: str) -> str:
239
+ import uuid, shutil, tempfile
240
+ from langchain_experimental.open_clip.open_clip import OpenCLIPEmbeddings
241
+ from matplotlib.offsetbox import OffsetImage, AnnotationBbox
242
+ from io import BytesIO
243
+
244
+ logger.info("🔍 Running similarity matching...")
245
+
246
+ # ============================== #
247
+ # DEFINE PATHS #
248
+ # ============================== #
249
+ backdrop_images_path = r"E:\Pratham\2025\Harsh Sir\Scratch Vision\images\Backdrops"
250
+ sprite_images_path = r"E:\Pratham\2025\Harsh Sir\Scratch Vision\images\sprites"
251
+ image_dirs = [backdrop_images_path, sprite_images_path]
252
+
253
+
254
+ # ================================================= #
255
+ # Generate Random UUID for project folder name #
256
+ # ================================================= #
257
+ random_id = str(uuid.uuid4()).replace('-', '')
258
+ project_folder = os.path.join("outputs", f"project_{random_id}")
259
+
260
+ # =========================================================================== #
261
+ # Create empty json in project_{random_id} folder #
262
+ # =========================================================================== #
263
+ os.makedirs(project_folder, exist_ok=True)
264
+ project_json_path = os.path.join(project_folder, "project.json")
265
+
266
+ # ============================== #
267
+ # READ SPRITE METADATA #
268
+ # ============================== #
269
+ with open(input_json_path, 'r') as f:
270
+ sprites_data = json.load(f)
271
+
272
+ sprite_ids, texts, sprite_base64 = [], [], []
273
+ for sid, sprite in sprites_data.items():
274
+ sprite_ids.append(sid)
275
+ texts.append("This is " + sprite.get("description", sprite.get("name", "")))
276
+ sprite_base64.append(sprite["base64"])
277
+
278
+ # ========================================= #
279
+ # Walk folders to collect all image paths #
280
+ # ========================================= #
281
+ folder_image_paths = []
282
+ for image_dir in image_dirs:
283
+ for root, dirs, files in os.walk(image_dir):
284
+ for fname in files:
285
+ if fname.lower().endswith((".png", ".jpg", ".jpeg")):
286
+ folder_image_paths.append(os.path.join(root, fname))
287
+
288
+ # ============================== #
289
+ # INITIALIZE CLIP EMBEDDER #
290
+ # ============================== #
291
+ clip_embd = OpenCLIPEmbeddings()
292
+
293
+ # ============================== #
294
+ # EMBED FOLDER IMAGES (REF) #
295
+ # ============================== #
296
+ img_features = clip_embd.embed_image(folder_image_paths)
297
+
298
+ # ============================== #
299
+ # DECODE SPRITE IMAGES #
300
+ # ============================== #
301
+ temp_dir = tempfile.mkdtemp()
302
+ sprite_image_paths = []
303
+ for idx, b64 in enumerate(sprite_base64):
304
+ image_data = base64.b64decode(b64.split(",")[-1])
305
+ img = Image.open(BytesIO(image_data)).convert("RGB")
306
+ temp_path = os.path.join(temp_dir, f"sprite_{idx}.png")
307
+ img.save(temp_path)
308
+ sprite_image_paths.append(temp_path)
309
+
310
+ # ============================== #
311
+ # EMBED SPRITE IMAGES #
312
+ # ============================== #
313
+ sprite_features = clip_embd.embed_image(sprite_image_paths)
314
+
315
+ # ============================== #
316
+ # COMPUTE SIMILARITIES #
317
+ # ============================== #
318
+ similarity = np.matmul(np.array(sprite_features), np.array(img_features).T)
319
+ most_similar_indices = np.argmax(similarity, axis=1)
320
+
321
+ # ============= Match and copy ================
322
+ project_data, backdrop_data = [], []
323
+ copied_folders = set()
324
+ for sprite_idx, matched_idx in enumerate(most_similar_indices):
325
+ matched_image_path = os.path.normpath(folder_image_paths[matched_idx])
326
+ matched_folder = os.path.dirname(matched_image_path)
327
+ if matched_folder in copied_folders:
328
+ continue
329
+ copied_folders.add(matched_folder)
330
+
331
+ # Sprite
332
+ sprite_json_path = os.path.join(matched_folder, 'sprite.json')
333
+ if os.path.exists(sprite_json_path):
334
+ with open(sprite_json_path, 'r') as f:
335
+ sprite_data = json.load(f)
336
+ project_data.append(sprite_data)
337
+
338
+ for fname in os.listdir(matched_folder):
339
+ if fname not in {os.path.basename(matched_image_path), 'sprite.json'}:
340
+ shutil.copy2(os.path.join(matched_folder, fname), project_folder)
341
+
342
+ # Backdrop
343
+ if matched_image_path.startswith(os.path.normpath(backdrop_images_path)):
344
+ backdrop_json_path = os.path.join(matched_folder, 'project.json')
345
+ if os.path.exists(backdrop_json_path):
346
+ with open(backdrop_json_path, 'r') as f:
347
+ backdrop_json_data = json.load(f)
348
+ for target in backdrop_json_data.get("targets", []):
349
+ if target.get("isStage"):
350
+ backdrop_data.append(target)
351
+ for fname in os.listdir(matched_folder):
352
+ if fname not in {os.path.basename(matched_image_path), 'project.json'}:
353
+ shutil.copy2(os.path.join(matched_folder, fname), project_folder)
354
+
355
+ # Merge JSON structure
356
+ final_project = {
357
+ "targets": [],
358
+ "monitors": [],
359
+ "extensions": [],
360
+ "meta": {
361
+ "semver": "3.0.0",
362
+ "vm": "11.3.0",
363
+ "agent": "OpenAI ScratchVision Agent"
364
+ }
365
+ }
366
+
367
+ for sprite in project_data:
368
+ if not sprite.get("isStage", False):
369
+ final_project["targets"].append(sprite)
370
+
371
+ if backdrop_data:
372
+ all_costumes, sounds = [], []
373
+ for idx, bd in enumerate(backdrop_data):
374
+ all_costumes.extend(bd.get("costumes", []))
375
+ if idx == 0 and "sounds" in bd:
376
+ sounds = bd["sounds"]
377
+ final_project["targets"].append({
378
+ "isStage": True,
379
+ "name": "Stage",
380
+ "variables": {},
381
+ "lists": {},
382
+ "broadcasts": {},
383
+ "blocks": {},
384
+ "comments": {},
385
+ "currentCostume": 1 if len(all_costumes) > 1 else 0,
386
+ "costumes": all_costumes,
387
+ "sounds": sounds,
388
+ "volume": 100,
389
+ "layerOrder": 0,
390
+ "tempo": 60,
391
+ "videoTransparency": 50,
392
+ "videoState": "on",
393
+ "textToSpeechLanguage": None
394
+ })
395
+
396
+ with open(project_json_path, 'w') as f:
397
+ json.dump(final_project, f, indent=2)
398
+
399
+ logger.info(f"🎉 Final project saved: {project_json_path}")
400
+ return project_json_path
401
+
402
+
403
  @app.route('/')
404
  def index():
405
  return render_template('app_index.html')
 
429
  json_path = None
430
  output_path, result = extract_images_from_pdf(saved_pdf_path, json_path)
431
 
432
+ project_output = similarity_matching(output_path)
433
  logger.info("Received request to process PDF.")
434
 
435
  return jsonify({
436
  "message": "✅ PDF processed successfully",
437
  "output_json": output_path,
438
+ "sprites": result,
439
+ "project_output_json": project_output
440
  })
441
  except Exception as e:
442
  logger.exception("❌ Failed to process PDF")