Spaces:
Sleeping
Sleeping
Update app_main.py
Browse files- app_main.py +168 -1
app_main.py
CHANGED
@@ -235,6 +235,171 @@ def extract_images_from_pdf(pdf_path, output_json_path):
|
|
235 |
except Exception as e:
|
236 |
raise RuntimeError(f"❌ Error in extract_images_from_pdf: {str(e)}")
|
237 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
238 |
@app.route('/')
|
239 |
def index():
|
240 |
return render_template('app_index.html')
|
@@ -264,12 +429,14 @@ def process_pdf():
|
|
264 |
json_path = None
|
265 |
output_path, result = extract_images_from_pdf(saved_pdf_path, json_path)
|
266 |
|
|
|
267 |
logger.info("Received request to process PDF.")
|
268 |
|
269 |
return jsonify({
|
270 |
"message": "✅ PDF processed successfully",
|
271 |
"output_json": output_path,
|
272 |
-
"sprites": result
|
|
|
273 |
})
|
274 |
except Exception as e:
|
275 |
logger.exception("❌ Failed to process PDF")
|
|
|
235 |
except Exception as e:
|
236 |
raise RuntimeError(f"❌ Error in extract_images_from_pdf: {str(e)}")
|
237 |
|
238 |
+
def similarity_matching(input_json_path: str) -> str:
|
239 |
+
import uuid, shutil, tempfile
|
240 |
+
from langchain_experimental.open_clip.open_clip import OpenCLIPEmbeddings
|
241 |
+
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
|
242 |
+
from io import BytesIO
|
243 |
+
|
244 |
+
logger.info("🔍 Running similarity matching...")
|
245 |
+
|
246 |
+
# ============================== #
|
247 |
+
# DEFINE PATHS #
|
248 |
+
# ============================== #
|
249 |
+
backdrop_images_path = r"E:\Pratham\2025\Harsh Sir\Scratch Vision\images\Backdrops"
|
250 |
+
sprite_images_path = r"E:\Pratham\2025\Harsh Sir\Scratch Vision\images\sprites"
|
251 |
+
image_dirs = [backdrop_images_path, sprite_images_path]
|
252 |
+
|
253 |
+
|
254 |
+
# ================================================= #
|
255 |
+
# Generate Random UUID for project folder name #
|
256 |
+
# ================================================= #
|
257 |
+
random_id = str(uuid.uuid4()).replace('-', '')
|
258 |
+
project_folder = os.path.join("outputs", f"project_{random_id}")
|
259 |
+
|
260 |
+
# =========================================================================== #
|
261 |
+
# Create empty json in project_{random_id} folder #
|
262 |
+
# =========================================================================== #
|
263 |
+
os.makedirs(project_folder, exist_ok=True)
|
264 |
+
project_json_path = os.path.join(project_folder, "project.json")
|
265 |
+
|
266 |
+
# ============================== #
|
267 |
+
# READ SPRITE METADATA #
|
268 |
+
# ============================== #
|
269 |
+
with open(input_json_path, 'r') as f:
|
270 |
+
sprites_data = json.load(f)
|
271 |
+
|
272 |
+
sprite_ids, texts, sprite_base64 = [], [], []
|
273 |
+
for sid, sprite in sprites_data.items():
|
274 |
+
sprite_ids.append(sid)
|
275 |
+
texts.append("This is " + sprite.get("description", sprite.get("name", "")))
|
276 |
+
sprite_base64.append(sprite["base64"])
|
277 |
+
|
278 |
+
# ========================================= #
|
279 |
+
# Walk folders to collect all image paths #
|
280 |
+
# ========================================= #
|
281 |
+
folder_image_paths = []
|
282 |
+
for image_dir in image_dirs:
|
283 |
+
for root, dirs, files in os.walk(image_dir):
|
284 |
+
for fname in files:
|
285 |
+
if fname.lower().endswith((".png", ".jpg", ".jpeg")):
|
286 |
+
folder_image_paths.append(os.path.join(root, fname))
|
287 |
+
|
288 |
+
# ============================== #
|
289 |
+
# INITIALIZE CLIP EMBEDDER #
|
290 |
+
# ============================== #
|
291 |
+
clip_embd = OpenCLIPEmbeddings()
|
292 |
+
|
293 |
+
# ============================== #
|
294 |
+
# EMBED FOLDER IMAGES (REF) #
|
295 |
+
# ============================== #
|
296 |
+
img_features = clip_embd.embed_image(folder_image_paths)
|
297 |
+
|
298 |
+
# ============================== #
|
299 |
+
# DECODE SPRITE IMAGES #
|
300 |
+
# ============================== #
|
301 |
+
temp_dir = tempfile.mkdtemp()
|
302 |
+
sprite_image_paths = []
|
303 |
+
for idx, b64 in enumerate(sprite_base64):
|
304 |
+
image_data = base64.b64decode(b64.split(",")[-1])
|
305 |
+
img = Image.open(BytesIO(image_data)).convert("RGB")
|
306 |
+
temp_path = os.path.join(temp_dir, f"sprite_{idx}.png")
|
307 |
+
img.save(temp_path)
|
308 |
+
sprite_image_paths.append(temp_path)
|
309 |
+
|
310 |
+
# ============================== #
|
311 |
+
# EMBED SPRITE IMAGES #
|
312 |
+
# ============================== #
|
313 |
+
sprite_features = clip_embd.embed_image(sprite_image_paths)
|
314 |
+
|
315 |
+
# ============================== #
|
316 |
+
# COMPUTE SIMILARITIES #
|
317 |
+
# ============================== #
|
318 |
+
similarity = np.matmul(np.array(sprite_features), np.array(img_features).T)
|
319 |
+
most_similar_indices = np.argmax(similarity, axis=1)
|
320 |
+
|
321 |
+
# ============= Match and copy ================
|
322 |
+
project_data, backdrop_data = [], []
|
323 |
+
copied_folders = set()
|
324 |
+
for sprite_idx, matched_idx in enumerate(most_similar_indices):
|
325 |
+
matched_image_path = os.path.normpath(folder_image_paths[matched_idx])
|
326 |
+
matched_folder = os.path.dirname(matched_image_path)
|
327 |
+
if matched_folder in copied_folders:
|
328 |
+
continue
|
329 |
+
copied_folders.add(matched_folder)
|
330 |
+
|
331 |
+
# Sprite
|
332 |
+
sprite_json_path = os.path.join(matched_folder, 'sprite.json')
|
333 |
+
if os.path.exists(sprite_json_path):
|
334 |
+
with open(sprite_json_path, 'r') as f:
|
335 |
+
sprite_data = json.load(f)
|
336 |
+
project_data.append(sprite_data)
|
337 |
+
|
338 |
+
for fname in os.listdir(matched_folder):
|
339 |
+
if fname not in {os.path.basename(matched_image_path), 'sprite.json'}:
|
340 |
+
shutil.copy2(os.path.join(matched_folder, fname), project_folder)
|
341 |
+
|
342 |
+
# Backdrop
|
343 |
+
if matched_image_path.startswith(os.path.normpath(backdrop_images_path)):
|
344 |
+
backdrop_json_path = os.path.join(matched_folder, 'project.json')
|
345 |
+
if os.path.exists(backdrop_json_path):
|
346 |
+
with open(backdrop_json_path, 'r') as f:
|
347 |
+
backdrop_json_data = json.load(f)
|
348 |
+
for target in backdrop_json_data.get("targets", []):
|
349 |
+
if target.get("isStage"):
|
350 |
+
backdrop_data.append(target)
|
351 |
+
for fname in os.listdir(matched_folder):
|
352 |
+
if fname not in {os.path.basename(matched_image_path), 'project.json'}:
|
353 |
+
shutil.copy2(os.path.join(matched_folder, fname), project_folder)
|
354 |
+
|
355 |
+
# Merge JSON structure
|
356 |
+
final_project = {
|
357 |
+
"targets": [],
|
358 |
+
"monitors": [],
|
359 |
+
"extensions": [],
|
360 |
+
"meta": {
|
361 |
+
"semver": "3.0.0",
|
362 |
+
"vm": "11.3.0",
|
363 |
+
"agent": "OpenAI ScratchVision Agent"
|
364 |
+
}
|
365 |
+
}
|
366 |
+
|
367 |
+
for sprite in project_data:
|
368 |
+
if not sprite.get("isStage", False):
|
369 |
+
final_project["targets"].append(sprite)
|
370 |
+
|
371 |
+
if backdrop_data:
|
372 |
+
all_costumes, sounds = [], []
|
373 |
+
for idx, bd in enumerate(backdrop_data):
|
374 |
+
all_costumes.extend(bd.get("costumes", []))
|
375 |
+
if idx == 0 and "sounds" in bd:
|
376 |
+
sounds = bd["sounds"]
|
377 |
+
final_project["targets"].append({
|
378 |
+
"isStage": True,
|
379 |
+
"name": "Stage",
|
380 |
+
"variables": {},
|
381 |
+
"lists": {},
|
382 |
+
"broadcasts": {},
|
383 |
+
"blocks": {},
|
384 |
+
"comments": {},
|
385 |
+
"currentCostume": 1 if len(all_costumes) > 1 else 0,
|
386 |
+
"costumes": all_costumes,
|
387 |
+
"sounds": sounds,
|
388 |
+
"volume": 100,
|
389 |
+
"layerOrder": 0,
|
390 |
+
"tempo": 60,
|
391 |
+
"videoTransparency": 50,
|
392 |
+
"videoState": "on",
|
393 |
+
"textToSpeechLanguage": None
|
394 |
+
})
|
395 |
+
|
396 |
+
with open(project_json_path, 'w') as f:
|
397 |
+
json.dump(final_project, f, indent=2)
|
398 |
+
|
399 |
+
logger.info(f"🎉 Final project saved: {project_json_path}")
|
400 |
+
return project_json_path
|
401 |
+
|
402 |
+
|
403 |
@app.route('/')
|
404 |
def index():
|
405 |
return render_template('app_index.html')
|
|
|
429 |
json_path = None
|
430 |
output_path, result = extract_images_from_pdf(saved_pdf_path, json_path)
|
431 |
|
432 |
+
project_output = similarity_matching(output_path)
|
433 |
logger.info("Received request to process PDF.")
|
434 |
|
435 |
return jsonify({
|
436 |
"message": "✅ PDF processed successfully",
|
437 |
"output_json": output_path,
|
438 |
+
"sprites": result,
|
439 |
+
"project_output_json": project_output
|
440 |
})
|
441 |
except Exception as e:
|
442 |
logger.exception("❌ Failed to process PDF")
|