prthm11 commited on
Commit
7d1aceb
·
verified ·
1 Parent(s): bee310f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +605 -348
app.py CHANGED
@@ -9,19 +9,30 @@ import pytesseract
9
  from werkzeug.utils import secure_filename
10
  from langchain_groq import ChatGroq
11
  from langgraph.prebuilt import create_react_agent
12
- from pdf2image import convert_from_path
13
  from concurrent.futures import ThreadPoolExecutor
14
  from pdf2image.exceptions import PDFInfoNotInstalledError
15
  from typing import Dict, TypedDict, Optional, Any
16
  from langgraph.graph import StateGraph, END
17
  import uuid
18
- import shutil, time
19
  from langchain_experimental.open_clip.open_clip import OpenCLIPEmbeddings
20
  # from matplotlib.offsetbox import OffsetImage, AnnotationBbox
21
  from io import BytesIO
22
  from pathlib import Path
23
  import os
24
  from utils.block_relation_builder import block_builder, variable_adder_main
 
 
 
 
 
 
 
 
 
 
 
25
  global pdf_doc
26
  # ============================== #
27
  # INITIALIZE CLIP EMBEDDER #
@@ -64,7 +75,8 @@ pytesseract.pytesseract.tesseract_cmd = (r'/usr/bin/tesseract')
64
 
65
  # poppler_path = r"C:\poppler\Library\bin"
66
  backdrop_images_path = r"app\blocks\Backdrops"
67
- sprite_images_path = r"blocks\sprites"
 
68
 
69
  count = 0
70
 
@@ -74,12 +86,12 @@ STATIC_DIR = BASE_DIR / "static"
74
  GEN_PROJECT_DIR = BASE_DIR / "generated_projects"
75
  BACKDROP_DIR = BLOCKS_DIR / "Backdrops"
76
  SPRITE_DIR = BLOCKS_DIR / "sprites"
77
-
78
  # === new: outputs rooted under BASE_DIR ===
79
  OUTPUT_DIR = BASE_DIR / "outputs"
80
- DETECTED_IMAGE_DIR = OUTPUT_DIR / "DETECTED_IMAGE"
81
- SCANNED_IMAGE_DIR = OUTPUT_DIR / "SCANNED_IMAGE"
82
- JSON_DIR = OUTPUT_DIR / "EXTRACTED_JSON"
83
 
84
  # make all of them in one go
85
  for d in (
@@ -88,10 +100,11 @@ for d in (
88
  GEN_PROJECT_DIR,
89
  BACKDROP_DIR,
90
  SPRITE_DIR,
 
91
  OUTPUT_DIR,
92
- DETECTED_IMAGE_DIR,
93
- SCANNED_IMAGE_DIR,
94
- JSON_DIR,
95
  ):
96
  d.mkdir(parents=True, exist_ok=True)
97
  # def classify_image_type(description_or_name: str) -> str:
@@ -124,6 +137,9 @@ class GameState(TypedDict):
124
  action_plan: Optional[Dict]
125
  temporary_node: Optional[Dict]
126
 
 
 
 
127
 
128
  # Refined SYSTEM_PROMPT with more explicit Scratch JSON rules, especially for variables
129
  SYSTEM_PROMPT = """
@@ -223,7 +239,7 @@ agent_json_resolver = create_react_agent(
223
  prompt=SYSTEM_PROMPT_JSON_CORRECTOR
224
  )
225
 
226
- # Helper function to load the block catalog from a JSON file
227
  # def _load_block_catalog(file_path: str) -> Dict:
228
  # """Loads the Scratch block catalog from a specified JSON file."""
229
  # try:
@@ -287,26 +303,8 @@ def find_block_in_all(opcode: str, all_catalogs: list[dict]) -> dict | None:
287
  return None
288
 
289
 
290
-
291
- # --- Global variable for the block catalog ---
292
  # --- Global variable for the block catalog ---
293
  ALL_SCRATCH_BLOCKS_CATALOG = {}
294
- # BLOCK_CATALOG_PATH = r"blocks\blocks.json" # Define the path to your JSON file
295
- # HAT_BLOCKS_PATH = r"blocks\hat_blocks.json" # Path to the hat blocks JSON file
296
- # STACK_BLOCKS_PATH = r"blocks\stack_blocks.json" # Path to the stack blocks JSON file
297
- # REPORTER_BLOCKS_PATH = r"blocks\reporter_blocks.json" # Path to the reporter blocks JSON file
298
- # BOOLEAN_BLOCKS_PATH = r"blocks\boolean_blocks.json" # Path to the boolean blocks JSON file
299
- # C_BLOCKS_PATH = r"blocks\c_blocks.json" # Path to the C blocks JSON file
300
- # CAP_BLOCKS_PATH = r"blocks\cap_blocks.json" # Path to the cap blocks JSON file
301
-
302
- # BLOCK_CATALOG_PATH = r"blocks/blocks.json"
303
- # HAT_BLOCKS_PATH = r"blocks/hat_blocks.json"
304
- # STACK_BLOCKS_PATH = r"blocks/stack_blocks.json"
305
- # REPORTER_BLOCKS_PATH = r"blocks/reporter_blocks.json"
306
- # BOOLEAN_BLOCKS_PATH = r"blocks/boolean_blocks.json"
307
- # C_BLOCKS_PATH = r"blocks/c_blocks.json"
308
- # CAP_BLOCKS_PATH = r"blocks/cap_blocks.json"
309
-
310
  BLOCK_CATALOG_PATH = "blocks" # Define the path to your JSON file
311
  HAT_BLOCKS_PATH = "hat_blocks" # Path to the hat blocks JSON file
312
  STACK_BLOCKS_PATH = "stack_blocks" # Path to the stack blocks JSON file
@@ -315,15 +313,24 @@ BOOLEAN_BLOCKS_PATH = "boolean_blocks" # Path to the boolean blocks JSON file
315
  C_BLOCKS_PATH = "c_blocks" # Path to the C blocks JSON file
316
  CAP_BLOCKS_PATH = "cap_blocks" # Path to the cap blocks JSON file
317
 
 
 
 
 
 
 
 
 
318
  # Load the block catalogs from their respective JSON files
319
  hat_block_data = _load_block_catalog(HAT_BLOCKS_PATH)
320
- # hat_description = hat_block_data["description"]
321
- hat_description = hat_block_data.get("description", "No description available")
322
  # hat_opcodes_functionalities = "\n".join([f" - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in hat_block_data["blocks"]])
323
  hat_opcodes_functionalities = "\n".join([
324
  f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
325
  for block in hat_block_data.get("blocks", [])
326
  ]) if isinstance(hat_block_data.get("blocks"), list) else " No blocks information available."
 
327
  print("Hat blocks loaded successfully.", hat_description)
328
 
329
  boolean_block_data = _load_block_catalog(BOOLEAN_BLOCKS_PATH)
@@ -333,6 +340,7 @@ boolean_opcodes_functionalities = "\n".join([
333
  f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
334
  for block in boolean_block_data.get("blocks", [])
335
  ]) if isinstance(boolean_block_data.get("blocks"), list) else " No blocks information available."
 
336
 
337
  c_block_data = _load_block_catalog(C_BLOCKS_PATH)
338
  c_description = c_block_data["description"]
@@ -341,6 +349,7 @@ c_opcodes_functionalities = "\n".join([
341
  f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
342
  for block in c_block_data.get("blocks", [])
343
  ]) if isinstance(c_block_data.get("blocks"), list) else " No blocks information available."
 
344
 
345
  cap_block_data = _load_block_catalog(CAP_BLOCKS_PATH)
346
  cap_description = cap_block_data["description"]
@@ -349,6 +358,7 @@ cap_opcodes_functionalities = "\n".join([
349
  f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
350
  for block in cap_block_data.get("blocks", [])
351
  ]) if isinstance(cap_block_data.get("blocks"), list) else " No blocks information available."
 
352
 
353
  reporter_block_data = _load_block_catalog(REPORTER_BLOCKS_PATH)
354
  reporter_description = reporter_block_data["description"]
@@ -357,6 +367,7 @@ reporter_opcodes_functionalities = "\n".join([
357
  f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
358
  for block in reporter_block_data.get("blocks", [])
359
  ]) if isinstance(reporter_block_data.get("blocks"), list) else " No blocks information available."
 
360
 
361
  stack_block_data = _load_block_catalog(STACK_BLOCKS_PATH)
362
  stack_description = stack_block_data["description"]
@@ -365,6 +376,7 @@ stack_opcodes_functionalities = "\n".join([
365
  f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
366
  for block in stack_block_data.get("blocks", [])
367
  ]) if isinstance(stack_block_data.get("blocks"), list) else " No blocks information available."
 
368
 
369
  # This makes ALL_SCRATCH_BLOCKS_CATALOG available globally
370
  ALL_SCRATCH_BLOCKS_CATALOG = _load_block_catalog(BLOCK_CATALOG_PATH)
@@ -435,6 +447,49 @@ def extract_json_from_llm_response(raw_response: str) -> dict:
435
  logger.error("Sanitized JSON still invalid:\n%s", json_string)
436
  raise
437
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
438
  # Node 1: Logic updating if any issue here
439
  def pseudo_generator_node(state: GameState):
440
  logger.info("--- Running plan_logic_aligner_node ---")
@@ -598,7 +653,8 @@ If you find any "Code-Blocks" then,
598
  image_input = {
599
  "type": "image_url",
600
  "image_url": {
601
- "url": f"data:image/png;base64,{image}"
 
602
  }
603
  }
604
 
@@ -1458,86 +1514,86 @@ Example output:
1458
  return state
1459
 
1460
  # Node 10:Function based block builder node
1461
- # def overall_block_builder_node_2(state: dict):
1462
- # logger.info("--- Running OverallBlockBuilderNode ---")
1463
-
1464
- # project_json = state["project_json"]
1465
- # targets = project_json["targets"]
1466
- # # --- Sprite and Stage Target Mapping ---
1467
- # sprite_map = {target["name"]: target for target in targets if not target["isStage"]}
1468
- # stage_target = next((target for target in targets if target["isStage"]), None)
1469
- # if stage_target:
1470
- # sprite_map[stage_target["name"]] = stage_target
1471
-
1472
- # action_plan = state.get("action_plan", {})
1473
- # print("[Overall Action Plan received at the block generator]:", json.dumps(action_plan, indent=2))
1474
- # if not action_plan:
1475
- # logger.warning("No action plan found in state. Skipping OverallBlockBuilderNode.")
1476
- # return state
1477
-
1478
- # # Initialize offsets for script placement on the Scratch canvas
1479
- # script_y_offset = {}
1480
- # script_x_offset_per_sprite = {name: 0 for name in sprite_map.keys()}
1481
-
1482
- # # This handles potential variations in the action_plan structure.
1483
- # if action_plan.get("action_overall_flow", {}) == {}:
1484
- # plan_data = action_plan.items()
1485
- # else:
1486
- # plan_data = action_plan.get("action_overall_flow", {}).items()
1487
-
1488
- # # --- Extract global project context for LLM ---
1489
- # all_sprite_names = list(sprite_map.keys())
1490
- # all_variable_names = {}
1491
- # all_list_names = {}
1492
- # all_broadcast_messages = {}
1493
-
1494
- # for target in targets:
1495
- # for var_id, var_info in target.get("variables", {}).items():
1496
- # all_variable_names[var_info[0]] = var_id # Store name -> ID mapping (e.g., "myVariable": "myVarId123")
1497
- # for list_id, list_info in target.get("lists", {}).items():
1498
- # all_list_names[list_info[0]] = list_id # Store name -> ID mapping
1499
- # for broadcast_id, broadcast_name in target.get("broadcasts", {}).items():
1500
- # all_broadcast_messages[broadcast_name] = broadcast_id # Store name -> ID mapping
1501
-
1502
- # # --- Process each sprite's action plan ---
1503
- # for sprite_name, sprite_actions_data in plan_data:
1504
- # if sprite_name in sprite_map:
1505
- # current_sprite_target = sprite_map[sprite_name]
1506
- # if "blocks" not in current_sprite_target:
1507
- # current_sprite_target["blocks"] = {}
1508
-
1509
- # if sprite_name not in script_y_offset:
1510
- # script_y_offset[sprite_name] = 0
1511
-
1512
- # for plan_entry in sprite_actions_data.get("plans", []):
1513
- # logic_sequence = str(plan_entry["logic"])
1514
- # opcode_counts = plan_entry.get("opcode_counts", {})
1515
-
1516
- # try:
1517
- # generated_blocks=block_builder(opcode_counts,logic_sequence)
1518
- # if "blocks" in generated_blocks and isinstance(generated_blocks["blocks"], dict):
1519
- # logger.warning(f"LLM returned nested 'blocks' key for {sprite_name}. Unwrapping.")
1520
- # generated_blocks = generated_blocks["blocks"]
1521
-
1522
- # # Update block positions for top-level script
1523
- # for block_id, block_data in generated_blocks.items():
1524
- # if block_data.get("topLevel"):
1525
- # block_data["x"] = script_x_offset_per_sprite.get(sprite_name, 0)
1526
- # block_data["y"] = script_y_offset[sprite_name]
1527
- # script_y_offset[sprite_name] += 150 # Increment for next script
1528
-
1529
- # current_sprite_target["blocks"].update(generated_blocks)
1530
- # print(f"[current_sprite_target block updated]: {current_sprite_target['blocks']}")
1531
- # state["iteration_count"] = 0
1532
- # logger.info(f"Action blocks added for sprite '{sprite_name}' by OverallBlockBuilderNode.")
1533
- # except Exception as e:
1534
- # logger.error(f"Error generating blocks for sprite '{sprite_name}': {e}")
1535
 
1536
- # state["project_json"] = project_json
1537
- # # with open("debug_state.json", "w", encoding="utf-8") as f:
1538
- # # json.dump(state, f, indent=2, ensure_ascii=False)
1539
 
1540
- # return state
1541
  # Node 10:Function based block builder node
1542
  def overall_block_builder_node_2(state: dict):
1543
  logger.info("--- Running OverallBlockBuilderNode ---")
@@ -1641,46 +1697,273 @@ def variable_adder_node(state: GameState):
1641
  raise
1642
 
1643
 
1644
- scratch_keywords = [
1645
- "move", "turn", "wait", "repeat", "if", "else", "broadcast",
1646
- "glide", "change", "forever", "when", "switch",
1647
- "next costume", "set", "show", "hide", "play sound",
1648
- "go to", "x position", "y position", "think", "say",
1649
- "variable", "stop", "clone",
1650
- "touching", "sensing", "pen", "clear","Scratch","Code","scratch blocks"
1651
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1652
 
1653
  #def extract_images_from_pdf(pdf_path: Path, json_base_dir: Path, image_base_dir: Path):
1654
  #def extract_images_from_pdf(pdf_path: Path, json_base_dir: Path):
1655
- def extract_images_from_pdf(pdf_path: Path):
 
 
 
 
 
1656
  ''' Extract images from PDF and generate structured sprite JSON '''
1657
  try:
1658
- pdf_path = Path(pdf_path)
1659
- pdf_filename = pdf_path.stem # e.g., "scratch_crab"
1660
- pdf_dir_path = str(pdf_path.parent).replace("/", "\\")
1661
- print("-------------------------------pdf_filename-------------------------------",pdf_filename)
1662
- print("-------------------------------pdf_dir_path-------------------------------",pdf_dir_path)
1663
- # Create subfolders under the provided base directories
1664
- # This will create paths like:
1665
- # /app/detected_images/pdf_filename/
1666
- # /app/json_data/pdf_filename/
1667
- extracted_image_subdir = DETECTED_IMAGE_DIR / pdf_filename
1668
- json_subdir = JSON_DIR / pdf_filename
1669
- extracted_image_subdir.mkdir(parents=True, exist_ok=True)
1670
- json_subdir.mkdir(parents=True, exist_ok=True)
1671
- print("-------------------------------extracted_image_subdir-------------------------------",extracted_image_subdir)
1672
- print("-------------------------------json_subdir-------------------------------",json_subdir)
1673
- # Output paths (now using Path objects directly)
1674
- output_json_path = json_subdir / "extracted.json"
1675
- final_json_path = json_subdir / "extracted_sprites.json" # Path to extracted_sprites.json
1676
- final_json_path_2 = json_subdir / "extracted_sprites_2.json"
1677
- print("-------------------------------output_json_path-------------------------------",output_json_path)
1678
- print("-------------------------------final_json_path-------------------------------",final_json_path)
1679
- print("-------------------------------final_json_path_2-------------------------------",final_json_path_2)
 
 
 
 
1680
 
 
1681
  try:
1682
  elements = partition_pdf(
1683
- filename=str(pdf_path), # partition_pdf might expect a string
 
1684
  strategy="hi_res",
1685
  extract_image_block_types=["Image"],
1686
  hi_res_model_name="yolox",
@@ -1691,155 +1974,44 @@ def extract_images_from_pdf(pdf_path: Path):
1691
  raise RuntimeError(
1692
  f"❌ Failed to extract images from PDF: {str(e)}")
1693
 
1694
- try:
1695
- with open(output_json_path, "w") as f:
1696
- json.dump([element.to_dict()
1697
- for element in elements], f, indent=4)
1698
- except Exception as e:
1699
- raise RuntimeError(f"❌ Failed to write extracted.json: {str(e)}")
1700
-
1701
- try:
1702
- # Display extracted images
1703
- with open(output_json_path, 'r') as file:
1704
- file_elements = json.load(file)
1705
- except Exception as e:
1706
- raise RuntimeError(f"❌ Failed to read extracted.json: {str(e)}")
1707
-
1708
- # Prepare manipulated sprite JSON structure
1709
- manipulated_json = {}
 
1710
 
1711
- # SET A SYSTEM PROMPT
1712
- system_prompt = """
1713
- You are an expert in visual scene understanding.
1714
- Your Job is to analyze an image and respond acoording if asked for name give simple name by analyzing it and if ask for descrption generate a short description covering its elements.
 
1715
 
1716
- Guidelines:
1717
- - Focus only the images given in Square Shape.
1718
- - Don't Consider Blank areas in Image as.
1719
- - Don't include generic summary or explanation outside the fields.
1720
- Return only string.
1721
- """
1722
- agent = create_react_agent(
1723
- model=llm,
1724
- tools=[],
1725
- prompt=system_prompt
1726
- )
1727
-
1728
- # If JSON already exists, load it and find the next available Sprite number
1729
- if final_json_path.exists(): # Use Path.exists()
1730
- with open(final_json_path, "r") as existing_file:
1731
- manipulated = json.load(existing_file)
1732
- # Determine the next available index (e.g., Sprite 4 if 1–3 already exist)
1733
- existing_keys = [int(k.replace("Sprite ", ""))
1734
- for k in manipulated.keys()]
1735
- start_count = max(existing_keys, default=0) + 1
1736
- else:
1737
- start_count = 1
1738
-
1739
- sprite_count = start_count
1740
- for i, element in enumerate(file_elements):
1741
- if "image_base64" in element["metadata"]:
1742
- try:
1743
- image_data = base64.b64decode(
1744
- element["metadata"]["image_base64"])
1745
- print(f"\n ------------------------------image_data: {image_data}")
1746
- image = Image.open(BytesIO(image_data)).convert("RGB") # Use BytesIO here
1747
-
1748
- image = upscale_image(image, scale=2)
1749
- # image.show(title=f"Extracted Image {i+1}")
1750
-
1751
- # MODIFIED: Store image directly to BytesIO to avoid saving to disk if not needed
1752
- # and then converting back to base64.
1753
- img_buffer = BytesIO()
1754
- image.save(img_buffer, format="PNG")
1755
- img_bytes = img_buffer.getvalue()
1756
- img_base64 = base64.b64encode(img_bytes).decode("utf-8")
1757
- print(f"\n------------------------------------------------Image_Base64: {img_base64}")
1758
- # Optionally save image to disk if desired for debugging/permanent storage
1759
- image_path = extracted_image_subdir / f"Sprite_{i+1}.png"
1760
- image.save(image_path)
1761
-
1762
- prompt_combined = """
1763
- Analyze this image and return JSON with keys:# modify prompt for "name", if it detects "code-blocks only then give name as 'scratch-block'"
1764
- {
1765
- "name": "<short name or 'scratch blocks'>" ,
1766
- "description": "<short description>"
1767
- }
1768
- Guidelines:
1769
- - If image contains logical/code blocks from Scratch (e.g., move, turn, repeat, when clicked, etc.), use 'scratch-block' as the name.
1770
- - If image is a character, object, or backdrop, give an appropriate descriptive name instead.
1771
- - Avoid generic names like 'image1' or 'picture'.
1772
- - Keep the response strictly in JSON format.
1773
- """
1774
-
1775
- content = [
1776
- {"type": "text", "text": prompt_combined},
1777
- {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_base64}"}}
1778
- ]
1779
-
1780
- response = agent.invoke({"messages": [{"role": "user", "content": content}]})
1781
-
1782
- # Ensure response is handled correctly, it might be a string that needs json.loads
1783
- try:
1784
- # Assuming the agent returns a dictionary with 'messages' key,
1785
- # and the last message's content is the JSON string.
1786
- response_content_str = response.get("messages", [])[-1].content
1787
- result_json = json.loads(response_content_str)
1788
- except (json.JSONDecodeError, IndexError, AttributeError) as e:
1789
- logger.error(f"⚠️ Failed to parse agent response as JSON: {e}. Response was: {response}", exc_info=True)
1790
- result_json = {} # Default to empty dict if parsing fails
1791
-
1792
- try:
1793
- name = result_json.get("name", "").strip()
1794
- description = result_json.get("description", "").strip()
1795
- except Exception as e:
1796
- logger.error(f"⚠️ Failed to extract name/description from result_json: {str(e)}", exc_info=True)
1797
- name = "unknown"
1798
- description = "unknown"
1799
-
1800
- manipulated_json[f"Sprite {sprite_count}"] = {
1801
- "name": name,
1802
- "base64": element["metadata"]["image_base64"],
1803
- "file-path": pdf_dir_path,
1804
- "description": description
1805
- }
1806
- print(f"\n ------------------elemente: {element['metadata']['image_base64']}")
1807
- print(f"\n ------------------pdf_dir_path: {pdf_dir_path}")
1808
- sprite_count += 1
1809
- print(f"\n===================manipulated JSON: {manipulated_json}")
1810
- except Exception as e:
1811
- logger.error(f"⚠️ Error processing Sprite {i+1}: {str(e)}", exc_info=True)
1812
-
1813
- # Save manipulated JSON
1814
- with open(final_json_path, "w") as sprite_file:
1815
- json.dump(manipulated_json, sprite_file, indent=4)
1816
-
1817
- def is_code_block(name: str) -> bool:
1818
- for kw in scratch_keywords:
1819
- if kw.lower() in name.lower():
1820
- return True
1821
- return False
1822
-
1823
- # Filter out code block images
1824
- filtered_sprites = {}
1825
- for key, value in manipulated_json.items():
1826
- sprite_name = value.get("name", "")
1827
- if not is_code_block(sprite_name):
1828
- filtered_sprites[key] = value
1829
- else:
1830
- logger.info(f"🛑 Excluded code block-like image: {key}")
1831
-
1832
- # Overwrite with filtered content
1833
- with open(final_json_path_2, "w") as sprite_file:
1834
- json.dump(filtered_sprites, sprite_file, indent=4)
1835
-
1836
- # MODIFIED RETURN VALUE: Return the Path to the primary extracted_sprites.json file
1837
- # and the directory where it's located.
1838
- return final_json_path, json_subdir # Return the file path and its parent directory
1839
  except Exception as e:
1840
  raise RuntimeError(f"❌ Error in extract_images_from_pdf: {str(e)}")
1841
 
1842
- def similarity_matching(input_json_path: str, project_folder: str) -> str:
 
1843
  logger.info("🔍 Running similarity matching…")
1844
  os.makedirs(project_folder, exist_ok=True)
1845
 
@@ -1847,6 +2019,7 @@ def similarity_matching(input_json_path: str, project_folder: str) -> str:
1847
  # CHANGED: define normalized base-paths so startswith() checks work
1848
  backdrop_base_path = os.path.normpath(str(BACKDROP_DIR))
1849
  sprite_base_path = os.path.normpath(str(SPRITE_DIR))
 
1850
  # ----------------------------------------
1851
 
1852
  project_json_path = os.path.join(project_folder, "project.json")
@@ -1854,15 +2027,23 @@ def similarity_matching(input_json_path: str, project_folder: str) -> str:
1854
  # ==============================
1855
  # READ SPRITE METADATA
1856
  # ==============================
1857
- with open(input_json_path, 'r') as f:
1858
- sprites_data = json.load(f)
1859
 
1860
- sprite_ids, texts, sprite_base64 = [], [], []
1861
  for sid, sprite in sprites_data.items():
1862
  sprite_ids.append(sid)
1863
- texts.append("This is " + sprite.get("description", sprite.get("name", "")))
1864
  sprite_base64.append(sprite["base64"])
1865
 
 
 
 
 
 
 
 
 
1866
  # =========================================
1867
  # Build the list of all candidate images
1868
  # =========================================
@@ -1882,6 +2063,13 @@ def similarity_matching(input_json_path: str, project_folder: str) -> str:
1882
  SPRITE_DIR / "Centaur.sprite3" / "2373556e776cad3ba4d6ee04fc34550b.png",
1883
  SPRITE_DIR / "Crab.sprite3" / "bear_element.png",
1884
  SPRITE_DIR / "Soccer Ball.sprite3" / "cat_football.png",
 
 
 
 
 
 
 
1885
  ]
1886
  folder_image_paths = [os.path.normpath(str(p)) for p in folder_image_paths]
1887
  # =========================================
@@ -1891,24 +2079,31 @@ def similarity_matching(input_json_path: str, project_folder: str) -> str:
1891
  # -----------------------------------------
1892
  with open(f"{BLOCKS_DIR}/embeddings.json", "r") as f:
1893
  embedding_json = json.load(f)
1894
- img_matrix = np.array([img["embeddings"] for img in embedding_json])
1895
 
1896
  # =========================================
1897
  # Decode & embed each sprite image
1898
  # =========================================
1899
- sprite_features = []
1900
- for b64 in sprite_base64:
1901
- if "," in b64:
1902
- b64 = b64.split(",", 1)[1]
1903
- img_bytes = base64.b64decode(b64)
1904
- pil_img = Image.open(BytesIO(img_bytes)).convert("RGB")
1905
- buf = BytesIO()
1906
- pil_img.save(buf, format="PNG")
1907
- buf.seek(0)
1908
- feats = clip_embd.embed_image([buf])[0]
1909
- sprite_features.append(feats)
 
 
 
 
 
 
 
1910
  sprite_matrix = np.vstack(sprite_features)
1911
-
 
1912
  # =========================================
1913
  # Compute similarities & pick best match
1914
  # =========================================
@@ -2002,6 +2197,7 @@ def similarity_matching(input_json_path: str, project_folder: str) -> str:
2002
  else:
2003
  logger.warning(f"No project.json in {matched_folder}")
2004
 
 
2005
  # =========================================
2006
  # Merge into final Scratch project.json
2007
  # =========================================
@@ -2179,6 +2375,29 @@ def similarity_matching(input_json_path: str, project_folder: str) -> str:
2179
  # # logger.info(f"🎉 Final project saved: {project_json_path}")
2180
  # return project_json_path
2181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2182
  def delay_for_tpm_node(state: GameState):
2183
  logger.info("--- Running DelayForTPMNode ---")
2184
  time.sleep(60) # Adjust the delay as needed
@@ -2229,7 +2448,8 @@ def upscale_image(image: Image.Image, scale: int = 2) -> Image.Image:
2229
  except Exception as e:
2230
  logger.error(f"❌ Error during image upscaling: {str(e)}")
2231
  return image
2232
-
 
2233
  def create_sb3_archive(project_folder, project_id):
2234
  """
2235
  Zips the project folder and renames it to an .sb3 file.
@@ -2269,19 +2489,22 @@ def create_sb3_archive(project_folder, project_id):
2269
  os.remove(sb3_path)
2270
  return sb3_path
2271
 
2272
- def save_pdf_to_generated_dir(pdf_path: str, project_id: str) -> str:
 
 
2273
  """
2274
- Copies the PDF at `pdf_path` into GEN_PROJECT_DIR/project_id/,
2275
  renaming it to <project_id>.pdf.
2276
 
2277
  Args:
2278
- pdf_path (str): Any existing path to a PDF file.
2279
  project_id (str): Your unique project identifier.
2280
 
2281
  Returns:
2282
  str: Path to the copied PDF in the generated directory,
2283
  or None if something went wrong.
2284
- """
 
2285
  try:
2286
  # 1) Build the destination directory and base filename
2287
  output_dir = GEN_PROJECT_DIR / project_id
@@ -2292,9 +2515,16 @@ def save_pdf_to_generated_dir(pdf_path: str, project_id: str) -> str:
2292
  target_pdf = output_dir / f"{project_id}.pdf"
2293
  print(f"\n--------------------------------target_pdf {target_pdf}")
2294
  # 3) Copy the PDF
2295
- shutil.copy2(pdf_path, target_pdf)
2296
- print(f"Copied PDF from {pdf_path} → {target_pdf}")
2297
- logger.info(f"Copied PDF from {pdf_path} → {target_pdf}")
 
 
 
 
 
 
 
2298
 
2299
 
2300
  return str(target_pdf)
@@ -2355,22 +2585,40 @@ def process_pdf():
2355
  # Create empty json in project_{random_id} folder #
2356
  # =========================================================================== #
2357
  #os.makedirs(project_folder, exist_ok=True)
2358
-
 
2359
  # Save the uploaded PDF temporarily
2360
- filename = secure_filename(pdf_file.filename)
2361
- temp_dir = tempfile.mkdtemp()
2362
- saved_pdf_path = os.path.join(temp_dir, filename)
2363
- pdf_file.save(saved_pdf_path)
2364
- pdf_doc = saved_pdf_path
2365
- pdf= save_pdf_to_generated_dir(saved_pdf_path, project_id)
 
 
 
 
 
 
 
2366
  # logger.info(f"Created project folder: {project_folder}")
2367
- logger.info(f"Saved uploaded PDF to: {saved_pdf_path}")
2368
  logger.info(f"Saved uploaded PDF to: {pdf_file}: {pdf}")
2369
- print("--------------------------------pdf_file_path---------------------",pdf_file,saved_pdf_path)
 
 
 
 
 
2370
  # Extract & process
2371
- # output_path, result = extract_images_from_pdf(saved_pdf_path, json_path)
2372
- output_path, result = extract_images_from_pdf(saved_pdf_path)
2373
- print(" --------------------------------------- zip_path_str ---------------------------------------", output_path, result)
 
 
 
 
 
2374
  # Check extracted_sprites.json for "scratch block" in any 'name'
2375
  # extracted_dir = os.path.join(JSON_DIR, os.path.splitext(filename)[0])
2376
  # extracted_sprites_json = os.path.join(extracted_dir, "extracted_sprites.json")
@@ -2380,14 +2628,16 @@ def process_pdf():
2380
 
2381
  # with open(extracted_sprites_json, 'r') as f:
2382
  # sprite_data = json.load(f)
2383
-
2384
- # project_output = similarity_matching(output_path, project_folder)
2385
- # logger.info("Received request to process PDF.")
 
 
2386
 
2387
- # with open(project_output, 'r') as f:
2388
- # project_skeleton = json.load(f)
2389
 
2390
- # images = convert_from_path(saved_pdf_path, dpi=300)
2391
  # print(type)
2392
  # page = images[0]
2393
  # # img_base64 = base64.b64encode(images).decode("utf-8")
@@ -2396,31 +2646,39 @@ def process_pdf():
2396
  # img_bytes = buf.getvalue()
2397
  # img_b64 = base64.b64encode(img_bytes).decode("utf-8")
2398
  #image_paths = await convert_pdf_to_images_async(saved_pdf_path)
2399
-
 
 
 
 
 
 
 
 
2400
  #updating logic here [Dev Patel]
2401
- # initial_state_dict = {
2402
- # "project_json": project_skeleton,
2403
- # "description": "The pseudo code for the script",
2404
- # "project_id": project_id,
2405
- # "project_image": img_b64,
2406
- # "action_plan": {},
2407
- # "pseudo_code": {},
2408
- # "temporary_node": {},
2409
- # }
 
2410
 
2411
- # final_state_dict = app_graph.invoke(initial_state_dict) # Pass dictionary
2412
 
2413
- # final_project_json = final_state_dict['project_json'] # Access as dict
2414
  # final_project_json = project_skeleton
2415
 
2416
  # Save the *final* filled project JSON, overwriting the skeleton
2417
- # with open(project_output, "w") as f:
2418
- # json.dump(final_project_json, f, indent=2)
2419
- # logger.info(f"Final project JSON saved to {project_output}")
2420
 
2421
  # --- Call the new function to create the .sb3 file ---
2422
- # sb3_file_path = create_sb3_archive(project_folder, project_id)
2423
- sb3_file_path = BACKDROP_DIR #create_sb3_archive(project_folder, project_id)
2424
 
2425
  if sb3_file_path:
2426
  logger.info(f"Successfully created SB3 file: {sb3_file_path}")
@@ -2435,8 +2693,7 @@ def process_pdf():
2435
  "output_json": "output_path",
2436
  "sprites": "result",
2437
  "project_output_json": "project_output",
2438
- # "test_url": download_url
2439
- "test_url":r"https://prthm11-scratch-vision-game.hf.space/download_sb3/Event_test"
2440
  })
2441
  else:
2442
  return jsonify(error="Failed to create SB3 archive"), 500
 
9
  from werkzeug.utils import secure_filename
10
  from langchain_groq import ChatGroq
11
  from langgraph.prebuilt import create_react_agent
12
+ from pdf2image import convert_from_path, convert_from_bytes
13
  from concurrent.futures import ThreadPoolExecutor
14
  from pdf2image.exceptions import PDFInfoNotInstalledError
15
  from typing import Dict, TypedDict, Optional, Any
16
  from langgraph.graph import StateGraph, END
17
  import uuid
18
+ import shutil, time, functools
19
  from langchain_experimental.open_clip.open_clip import OpenCLIPEmbeddings
20
  # from matplotlib.offsetbox import OffsetImage, AnnotationBbox
21
  from io import BytesIO
22
  from pathlib import Path
23
  import os
24
  from utils.block_relation_builder import block_builder, variable_adder_main
25
+
26
+ def log_execution_time(func):
27
+ @functools.wraps(func)
28
+ def wrapper(*args, **kwargs):
29
+ start_time = time.time()
30
+ result = func(*args, **kwargs)
31
+ end_time = time.time()
32
+ logger.info(f"⏱ {func.__name__} executed in {end_time - start_time:.2f} seconds")
33
+ return result
34
+ return wrapper
35
+
36
  global pdf_doc
37
  # ============================== #
38
  # INITIALIZE CLIP EMBEDDER #
 
75
 
76
  # poppler_path = r"C:\poppler\Library\bin"
77
  backdrop_images_path = r"app\blocks\Backdrops"
78
+ sprite_images_path = r"app\blocks\sprites"
79
+ code_blocks_image_path = r"app\blocks\code_blocks"
80
 
81
  count = 0
82
 
 
86
  GEN_PROJECT_DIR = BASE_DIR / "generated_projects"
87
  BACKDROP_DIR = BLOCKS_DIR / "Backdrops"
88
  SPRITE_DIR = BLOCKS_DIR / "sprites"
89
+ CODE_BLOCKS_DIR = BLOCKS_DIR / "code_blocks"
90
  # === new: outputs rooted under BASE_DIR ===
91
  OUTPUT_DIR = BASE_DIR / "outputs"
92
+ # DETECTED_IMAGE_DIR = OUTPUT_DIR / "DETECTED_IMAGE"
93
+ # SCANNED_IMAGE_DIR = OUTPUT_DIR / "SCANNED_IMAGE"
94
+ # JSON_DIR = OUTPUT_DIR / "EXTRACTED_JSON"
95
 
96
  # make all of them in one go
97
  for d in (
 
100
  GEN_PROJECT_DIR,
101
  BACKDROP_DIR,
102
  SPRITE_DIR,
103
+ CODE_BLOCKS_DIR,
104
  OUTPUT_DIR,
105
+ # DETECTED_IMAGE_DIR,
106
+ # SCANNED_IMAGE_DIR,
107
+ # JSON_DIR,
108
  ):
109
  d.mkdir(parents=True, exist_ok=True)
110
  # def classify_image_type(description_or_name: str) -> str:
 
137
  action_plan: Optional[Dict]
138
  temporary_node: Optional[Dict]
139
 
140
+ # class GameState(TypedDict):
141
+ # image: str
142
+ # pseudo_node: Optional[Dict]
143
 
144
  # Refined SYSTEM_PROMPT with more explicit Scratch JSON rules, especially for variables
145
  SYSTEM_PROMPT = """
 
239
  prompt=SYSTEM_PROMPT_JSON_CORRECTOR
240
  )
241
 
242
+ # # Helper function to load the block catalog from a JSON file
243
  # def _load_block_catalog(file_path: str) -> Dict:
244
  # """Loads the Scratch block catalog from a specified JSON file."""
245
  # try:
 
303
  return None
304
 
305
 
 
 
306
  # --- Global variable for the block catalog ---
307
  ALL_SCRATCH_BLOCKS_CATALOG = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
  BLOCK_CATALOG_PATH = "blocks" # Define the path to your JSON file
309
  HAT_BLOCKS_PATH = "hat_blocks" # Path to the hat blocks JSON file
310
  STACK_BLOCKS_PATH = "stack_blocks" # Path to the stack blocks JSON file
 
313
  C_BLOCKS_PATH = "c_blocks" # Path to the C blocks JSON file
314
  CAP_BLOCKS_PATH = "cap_blocks" # Path to the cap blocks JSON file
315
 
316
+ # BLOCK_CATALOG_PATH = BLOCKS_DIR / "blocks.json"
317
+ # HAT_BLOCKS_PATH = BLOCKS_DIR / "hat_blocks.json"
318
+ # STACK_BLOCKS_PATH = BLOCKS_DIR / "stack_blocks.json"
319
+ # REPORTER_BLOCKS_PATH = BLOCKS_DIR / "reporter_blocks.json"
320
+ # BOOLEAN_BLOCKS_PATH = BLOCKS_DIR / "boolean_blocks.json"
321
+ # C_BLOCKS_PATH = BLOCKS_DIR / "c_blocks.json"
322
+ # CAP_BLOCKS_PATH = BLOCKS_DIR / "cap_blocks.json"
323
+
324
  # Load the block catalogs from their respective JSON files
325
  hat_block_data = _load_block_catalog(HAT_BLOCKS_PATH)
326
+ hat_description = hat_block_data["description"]
327
+ #hat_description = hat_block_data.get("description", "No description available")
328
  # hat_opcodes_functionalities = "\n".join([f" - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in hat_block_data["blocks"]])
329
  hat_opcodes_functionalities = "\n".join([
330
  f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
331
  for block in hat_block_data.get("blocks", [])
332
  ]) if isinstance(hat_block_data.get("blocks"), list) else " No blocks information available."
333
+ hat_opcodes_functionalities = os.path.join(BLOCKS_DIR, "hat_blocks.txt")
334
  print("Hat blocks loaded successfully.", hat_description)
335
 
336
  boolean_block_data = _load_block_catalog(BOOLEAN_BLOCKS_PATH)
 
340
  f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
341
  for block in boolean_block_data.get("blocks", [])
342
  ]) if isinstance(boolean_block_data.get("blocks"), list) else " No blocks information available."
343
+ boolean_opcodes_functionalities = os.path.join(BLOCKS_DIR, "boolean_blocks.txt")
344
 
345
  c_block_data = _load_block_catalog(C_BLOCKS_PATH)
346
  c_description = c_block_data["description"]
 
349
  f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
350
  for block in c_block_data.get("blocks", [])
351
  ]) if isinstance(c_block_data.get("blocks"), list) else " No blocks information available."
352
+ c_opcodes_functionalities = os.path.join(BLOCKS_DIR, "c_blocks.txt")
353
 
354
  cap_block_data = _load_block_catalog(CAP_BLOCKS_PATH)
355
  cap_description = cap_block_data["description"]
 
358
  f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
359
  for block in cap_block_data.get("blocks", [])
360
  ]) if isinstance(cap_block_data.get("blocks"), list) else " No blocks information available."
361
+ cap_opcodes_functionalities = os.path.join(BLOCKS_DIR, "cap_blocks.txt")
362
 
363
  reporter_block_data = _load_block_catalog(REPORTER_BLOCKS_PATH)
364
  reporter_description = reporter_block_data["description"]
 
367
  f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
368
  for block in reporter_block_data.get("blocks", [])
369
  ]) if isinstance(reporter_block_data.get("blocks"), list) else " No blocks information available."
370
+ reporter_opcodes_functionalities = os.path.join(BLOCKS_DIR, "reporter_blocks.txt")
371
 
372
  stack_block_data = _load_block_catalog(STACK_BLOCKS_PATH)
373
  stack_description = stack_block_data["description"]
 
376
  f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
377
  for block in stack_block_data.get("blocks", [])
378
  ]) if isinstance(stack_block_data.get("blocks"), list) else " No blocks information available."
379
+ stack_opcodes_functionalities = os.path.join(BLOCKS_DIR, "stack_blocks.txt")
380
 
381
  # This makes ALL_SCRATCH_BLOCKS_CATALOG available globally
382
  ALL_SCRATCH_BLOCKS_CATALOG = _load_block_catalog(BLOCK_CATALOG_PATH)
 
447
  logger.error("Sanitized JSON still invalid:\n%s", json_string)
448
  raise
449
 
450
+ def clean_base64_for_model(raw_b64):
451
+ """
452
+ Normalize input into a valid data:image/png;base64,<payload> string.
453
+
454
+ Accepts:
455
+ - a list of base64 strings → picks the first element
456
+ - a PIL Image instance → encodes to PNG/base64
457
+ - a raw base64 string → strips whitespace and data URI prefix
458
+ """
459
+ if not raw_b64:
460
+ return ""
461
+
462
+ # 1. If it’s a list, take its first element
463
+ if isinstance(raw_b64, list):
464
+ raw_b64 = raw_b64[0] if raw_b64 else ""
465
+ if not raw_b64:
466
+ return ""
467
+
468
+ # 2. If it’s a PIL Image, convert to base64
469
+ if isinstance(raw_b64, Image.Image):
470
+ buf = io.BytesIO()
471
+ raw_b64.save(buf, format="PNG")
472
+ raw_b64 = base64.b64encode(buf.getvalue()).decode()
473
+
474
+ # 3. At this point it must be a string
475
+ if not isinstance(raw_b64, str):
476
+ raise TypeError(f"Expected base64 string or PIL Image, got {type(raw_b64)}")
477
+
478
+ # 4. Strip any existing data URI prefix, whitespace, or newlines
479
+ clean_b64 = re.sub(r"^data:image\/[a-zA-Z]+;base64,", "", raw_b64)
480
+ clean_b64 = clean_b64.replace("\n", "").replace("\r", "").strip()
481
+
482
+ # 5. Validate it’s proper base64
483
+ try:
484
+ base64.b64decode(clean_b64)
485
+ except Exception as e:
486
+ logger.error(f"Invalid Base64 passed to model: {e}")
487
+ raise
488
+
489
+ # 6. Return with the correct data URI prefix
490
+ return f"data:image/png;base64,{clean_b64}"
491
+
492
+
493
  # Node 1: Logic updating if any issue here
494
  def pseudo_generator_node(state: GameState):
495
  logger.info("--- Running plan_logic_aligner_node ---")
 
653
  image_input = {
654
  "type": "image_url",
655
  "image_url": {
656
+ # "url": f"data:image/png;base64,{image}"
657
+ "url": clean_base64_for_model(image)
658
  }
659
  }
660
 
 
1514
  return state
1515
 
1516
  # Node 10:Function based block builder node
1517
+ def overall_block_builder_node_2(state: dict):
1518
+ logger.info("--- Running OverallBlockBuilderNode ---")
1519
+
1520
+ project_json = state["project_json"]
1521
+ targets = project_json["targets"]
1522
+ # --- Sprite and Stage Target Mapping ---
1523
+ sprite_map = {target["name"]: target for target in targets if not target["isStage"]}
1524
+ stage_target = next((target for target in targets if target["isStage"]), None)
1525
+ if stage_target:
1526
+ sprite_map[stage_target["name"]] = stage_target
1527
+
1528
+ action_plan = state.get("action_plan", {})
1529
+ print("[Overall Action Plan received at the block generator]:", json.dumps(action_plan, indent=2))
1530
+ if not action_plan:
1531
+ logger.warning("No action plan found in state. Skipping OverallBlockBuilderNode.")
1532
+ return state
1533
+
1534
+ # Initialize offsets for script placement on the Scratch canvas
1535
+ script_y_offset = {}
1536
+ script_x_offset_per_sprite = {name: 0 for name in sprite_map.keys()}
1537
+
1538
+ # This handles potential variations in the action_plan structure.
1539
+ if action_plan.get("action_overall_flow", {}) == {}:
1540
+ plan_data = action_plan.items()
1541
+ else:
1542
+ plan_data = action_plan.get("action_overall_flow", {}).items()
1543
+
1544
+ # --- Extract global project context for LLM ---
1545
+ all_sprite_names = list(sprite_map.keys())
1546
+ all_variable_names = {}
1547
+ all_list_names = {}
1548
+ all_broadcast_messages = {}
1549
+
1550
+ for target in targets:
1551
+ for var_id, var_info in target.get("variables", {}).items():
1552
+ all_variable_names[var_info[0]] = var_id # Store name -> ID mapping (e.g., "myVariable": "myVarId123")
1553
+ for list_id, list_info in target.get("lists", {}).items():
1554
+ all_list_names[list_info[0]] = list_id # Store name -> ID mapping
1555
+ for broadcast_id, broadcast_name in target.get("broadcasts", {}).items():
1556
+ all_broadcast_messages[broadcast_name] = broadcast_id # Store name -> ID mapping
1557
+
1558
+ # --- Process each sprite's action plan ---
1559
+ for sprite_name, sprite_actions_data in plan_data:
1560
+ if sprite_name in sprite_map:
1561
+ current_sprite_target = sprite_map[sprite_name]
1562
+ if "blocks" not in current_sprite_target:
1563
+ current_sprite_target["blocks"] = {}
1564
+
1565
+ if sprite_name not in script_y_offset:
1566
+ script_y_offset[sprite_name] = 0
1567
+
1568
+ for plan_entry in sprite_actions_data.get("plans", []):
1569
+ logic_sequence = str(plan_entry["logic"])
1570
+ opcode_counts = plan_entry.get("opcode_counts", {})
1571
+
1572
+ try:
1573
+ generated_blocks=block_builder(opcode_counts,logic_sequence)
1574
+ if "blocks" in generated_blocks and isinstance(generated_blocks["blocks"], dict):
1575
+ logger.warning(f"LLM returned nested 'blocks' key for {sprite_name}. Unwrapping.")
1576
+ generated_blocks = generated_blocks["blocks"]
1577
+
1578
+ # Update block positions for top-level script
1579
+ for block_id, block_data in generated_blocks.items():
1580
+ if block_data.get("topLevel"):
1581
+ block_data["x"] = script_x_offset_per_sprite.get(sprite_name, 0)
1582
+ block_data["y"] = script_y_offset[sprite_name]
1583
+ script_y_offset[sprite_name] += 150 # Increment for next script
1584
+
1585
+ current_sprite_target["blocks"].update(generated_blocks)
1586
+ print(f"[current_sprite_target block updated]: {current_sprite_target['blocks']}")
1587
+ state["iteration_count"] = 0
1588
+ logger.info(f"Action blocks added for sprite '{sprite_name}' by OverallBlockBuilderNode.")
1589
+ except Exception as e:
1590
+ logger.error(f"Error generating blocks for sprite '{sprite_name}': {e}")
1591
 
1592
+ state["project_json"] = project_json
1593
+ # with open("debug_state.json", "w", encoding="utf-8") as f:
1594
+ # json.dump(state, f, indent=2, ensure_ascii=False)
1595
 
1596
+ return state
1597
  # Node 10:Function based block builder node
1598
  def overall_block_builder_node_2(state: dict):
1599
  logger.info("--- Running OverallBlockBuilderNode ---")
 
1697
  raise
1698
 
1699
 
1700
+ # scratch_keywords = [
1701
+ # "move", "turn", "wait", "repeat", "if", "else", "broadcast",
1702
+ # "glide", "change", "forever", "when", "switch",
1703
+ # "next costume", "set", "show", "hide", "play sound",
1704
+ # "go to", "x position", "y position", "think", "say",
1705
+ # "variable", "stop", "clone",
1706
+ # "touching", "sensing", "pen", "clear","Scratch","Code","scratch blocks"
1707
+ # ]
1708
+
1709
+ # Node 6: Logic updating if any issue here
1710
+ # def plan_logic_aligner_node(state: GameState):
1711
+ # logger.info("--- Running plan_logic_aligner_node ---")
1712
+
1713
+ # image = state.get("image", "")
1714
+
1715
+ # refinement_prompt = f"""
1716
+ # You are an expert in Scratch 3.0 game development, specializing in understanding block relationships (stacked, nested).
1717
+ # "Analyze the Scratch code-block image and generate Pseudo-Code for what this logic appears to be doing."
1718
+ # From Image, you also have to detect a value of Key given in Text form "Script for: ". Below is the example
1719
+ # Example: "Script for: Bear", "Script for:" is a key and "Bear" is value.
1720
+ # --- Scratch 3.0 Block Reference ---
1721
+ # ### Hat Blocks
1722
+ # Description: {hat_description}
1723
+ # Blocks:
1724
+ # {hat_opcodes_functionalities}
1725
+
1726
+ # ### Boolean Blocks
1727
+ # Description: {boolean_description}
1728
+ # Blocks:
1729
+ # {boolean_opcodes_functionalities}
1730
+
1731
+ # ### C Blocks
1732
+ # Description: {c_description}
1733
+ # Blocks:
1734
+ # {c_opcodes_functionalities}
1735
+
1736
+ # ### Cap Blocks
1737
+ # Description: {cap_description}
1738
+ # Blocks:
1739
+ # {cap_opcodes_functionalities}
1740
+
1741
+ # ### Reporter Blocks
1742
+ # Description: {reporter_description}
1743
+ # Blocks:
1744
+ # {reporter_opcodes_functionalities}
1745
+
1746
+ # ### Stack Blocks
1747
+ # Description: {stack_description}
1748
+ # Blocks:
1749
+ # {stack_opcodes_functionalities}
1750
+ # -----------------------------------
1751
+
1752
+ # Your task is to:
1753
+ # If you don't find any "Code-Blocks" then,
1754
+ # **Don't generate Pseudo Code, and pass the message "No Code-blocks" find...
1755
+ # If you find any "Code-Blocks" then,
1756
+ # 1. **Refine the 'logic'**: Make it precise, accurate, and fully aligned with the Game Description. Use Scratch‑consistent verbs and phrasing. **Do NOT** use raw double‑quotes inside the logic string.
1757
+
1758
+ # 2. **Structural requirements**:
1759
+ # - **Numeric values** `(e.g., 0, 5, 0.2, -130)` **must** be in parentheses: `(0)`, `(5)`, `(0.2)`, `(-130)`.
1760
+ # - **AlphaNumeric values** `(e.g., hello, say 5, 4, hi!)` **must** be in parentheses: `(hello)`, `(say 5)`, `(4)`, `(hi!)`.
1761
+ # - **Variables** must be in the form `[variable v]` (e.g., `[score v]`), even when used inside expressions two example use `set [score v] to (1)` or `show variable ([speed v])`.
1762
+ # - **Dropdown options** must be in the form `[option v]` (e.g., `[Game Start v]`, `[blue sky v]`). example use `when [space v] key pressed`.
1763
+ # - **Reporter blocks** used as inputs must be double‑wrapped: `((x position))`, `((y position))`. example use `if <((y position)) = (-130)> then` or `(((x position)) * (1))`.
1764
+ # - **Boolean blocks** in conditions must be inside `< >`, including nested ones: `<not <condition>>`, `<<cond1> and <cond2>>`,`<<cond1> or <cond2>>`.
1765
+ # - **Other Boolean blocks** in conditions must be inside `< >`, including nested ones or values or variables: `<(block/value/variable) * (block/value/variable)>`,`<(block/value/variable) < (block/value/variable)>`, and example of another variable`<[apple v] contains [a v]?>`.
1766
+ # - **Operator expressions** must use explicit Scratch operator blocks, e.g.:
1767
+ # ```
1768
+ # (([ballSpeed v]) * (1.1))
1769
+ # ```
1770
+ # - **Every hat block script must end** with a final `end` on its own line.
1771
+
1772
+ # 3. **Pseudo‑code formatting**:
1773
+ # - Represent each block or nested block on its own line.
1774
+ # - Indent nested blocks by 4 spaces under their parent (`forever`, `if`, etc.).
1775
+ # - No comments or explanatory text—just the block sequence.
1776
+ # - a natural language breakdown of each step taken after the event, formatted as a multi-line string representing pseudo-code. Ensure clarity and granularity—each described action should map closely to a Scratch block or tight sequence.
1777
+
1778
+ # 4. **Logic content**:
1779
+ # - Build clear flow for mechanics (movement, jumping, flying, scoring, collisions).
1780
+ # - Match each action closely to a Scratch block or tight sequence.
1781
+ # - Do **NOT** include any justification or comments—only the raw logic.
1782
+
1783
+ # 5. **Examples for reference**:
1784
+ # **Correct** pattern for a simple start script:
1785
+ # ```
1786
+ # when green flag clicked
1787
+ # switch backdrop to [blue sky v]
1788
+ # set [score v] to (0)
1789
+ # show variable [score v]
1790
+ # broadcast [Game Start v]
1791
+ # end
1792
+ # ```
1793
+ # **Correct** pattern for updating the high score variable handling:
1794
+ # ```
1795
+ # when I receive [Game Over v]
1796
+ # if <((score)) > (([High Score v]))> then
1797
+ # set [High Score v] to ([score v])
1798
+ # end
1799
+ # switch backdrop to [Game Over v]
1800
+ # end
1801
+ # ```
1802
+ # **Correct** pattern for level up and increase difficulty use:
1803
+ # ```
1804
+ # when I receive [Level Up v]
1805
+ # change [level v] by (1)
1806
+ # set [ballSpeed v] to ((([ballSpeed v]) * (1.1)))
1807
+ # end
1808
+ # ```
1809
+ # **Correct** pattern for jumping mechanics use:
1810
+ # ```
1811
+ # when [space v] key pressed
1812
+ # if <((y position)) = (-100)> then
1813
+ # repeat (5)
1814
+ # change y by (100)
1815
+ # wait (0.1) seconds
1816
+ # change y by (-100)
1817
+ # wait (0.1) seconds
1818
+ # end
1819
+ # end
1820
+ # end
1821
+ # ```
1822
+ # **Correct** pattern for continuos moving objects use:
1823
+ # ```
1824
+ # when green flag clicked
1825
+ # go to x: (240) y: (-100)
1826
+ # set [speed v] to (-5)
1827
+ # show variable [speed v]
1828
+ # forever
1829
+ # change x by ([speed v])
1830
+ # if <((x position)) < (-240)> then
1831
+ # go to x: (240) y: (-100)
1832
+ # end
1833
+ # end
1834
+ # end
1835
+ # ```
1836
+ # **Correct** pattern for continuos moving objects use:
1837
+ # ```
1838
+ # when green flag clicked
1839
+ # go to x: (240) y: (-100)
1840
+ # set [speed v] to (-5)
1841
+ # show variable [speed v]
1842
+ # forever
1843
+ # change x by ([speed v])
1844
+ # if <((x position)) < (-240)> then
1845
+ # go to x: (240) y: (-100)
1846
+ # end
1847
+ # end
1848
+ # end
1849
+ # ```
1850
+ # 6. **Donot** add any explaination of logic or comments to justify or explain just put the logic content in the json.
1851
+ # 7. **Output**:
1852
+ # Return **only** a JSON object, using double quotes everywhere:
1853
+ # ```json
1854
+ # {{
1855
+ # "refined_logic":{{
1856
+ # "name_variable": 'Value of "Sript for: "',
1857
+ # "pseudocode":"…your fully‑formatted pseudo‑code here…",
1858
+ # }}
1859
+ # }}
1860
+ # ```
1861
+ # """
1862
+ # image_input = {
1863
+ # "type": "image_url",
1864
+ # "image_url": {
1865
+ # "url": f"data:image/png;base64,{image}"
1866
+ # }
1867
+ # }
1868
+
1869
+ # content = [
1870
+ # {"type": "text", "text": refinement_prompt},
1871
+ # image_input
1872
+ # ]
1873
+
1874
+ # try:
1875
+ # # Invoke the main agent for logic refinement and relationship identification
1876
+ # response = agent.invoke({"messages": [{"role": "user", "content": content}]})
1877
+ # llm_output_raw = response["messages"][-1].content.strip()
1878
+
1879
+ # parsed_llm_output = extract_json_from_llm_response(llm_output_raw)
1880
+
1881
+ # # result = parsed_llm_output
1882
+ # # Extract needed values directly
1883
+ # logic_data = parsed_llm_output.get("refined_logic", {})
1884
+ # name_variable = logic_data.get("name_variable", "Unknown")
1885
+ # pseudocode = logic_data.get("pseudocode", "No logic extracted")
1886
+
1887
+ # result = {"pseudo_node": {
1888
+ # "name_variable": name_variable,
1889
+ # "pseudocode": pseudocode
1890
+ # }}
1891
+
1892
+ # print(f"result:\n\n {result}")
1893
+ # return result
1894
+ # except Exception as e:
1895
+ # logger.error(f"❌ plan_logic_aligner_node failed: {str(e)}")
1896
+ # return {"error": str(e)}
1897
+ # except json.JSONDecodeError as error_json:
1898
+ # # If JSON parsing fails, use the json resolver agent
1899
+ # correction_prompt = (
1900
+ # "Your task is to correct the provided JSON string to ensure it is **syntactically perfect and adheres strictly to JSON rules**.\n"
1901
+ # "It must be a JSON object with `refined_logic` (string) and `block_relationships` (array of objects).\n"
1902
+ # f"- **Error Details**: {error_json}\n\n"
1903
+ # "**Strict Instructions for your response:**\n"
1904
+ # "1. **ONLY** output the corrected JSON. Do not include any other text or explanations.\n"
1905
+ # "2. Ensure all keys and string values are enclosed in **double quotes**. Escape internal quotes (`\\`).\n"
1906
+ # "3. No trailing commas. Correct nesting.\n\n"
1907
+ # "Here is the problematic JSON string to correct:\n"
1908
+ # f"```json\n{llm_output_raw}\n```\n"
1909
+ # "Corrected JSON:\n"
1910
+ # )
1911
+ # try:
1912
+ # correction_response = agent_json_resolver.invoke({"messages": [{"role": "user", "content": correction_prompt}]})
1913
+ # corrected_output = extract_json_from_llm_response(correction_response["messages"][-1].content)
1914
+
1915
+ # result = {
1916
+ # #"image_path": image_path,
1917
+ # "pseudo_code": corrected_output
1918
+ # }
1919
+
1920
+ # return result
1921
+
1922
+ # except Exception as e_corr:
1923
+ # logger.error(f"Failed to correct JSON output for even after retry: {e_corr}")
1924
 
1925
  #def extract_images_from_pdf(pdf_path: Path, json_base_dir: Path, image_base_dir: Path):
1926
  #def extract_images_from_pdf(pdf_path: Path, json_base_dir: Path):
1927
+
1928
+ # Prepare manipulated sprite JSON structure
1929
+ manipulated_json = {}
1930
+ img_elements = []
1931
+ # { changes: "pdf_stream" in place of "pdf_path"
1932
+ def extract_images_from_pdf(pdf_stream: io.BytesIO):
1933
  ''' Extract images from PDF and generate structured sprite JSON '''
1934
  try:
1935
+ # {
1936
+ # pdf_path = Path(pdf_path)
1937
+ # pdf_filename = pdf_path.stem # e.g., "scratch_crab"
1938
+ # pdf_dir_path = str(pdf_path.parent).replace("/", "\\")
1939
+ # print("-------------------------------pdf_filename-------------------------------",pdf_filename)
1940
+ # print("-------------------------------pdf_dir_path-------------------------------",pdf_dir_path)
1941
+
1942
+ if isinstance(pdf_stream, io.BytesIO):
1943
+ # use a random ID since there's no filename
1944
+ pdf_id = uuid.uuid4().hex
1945
+ else:
1946
+ pdf_id = os.path.splitext(os.path.basename(pdf_stream))[0]
1947
+
1948
+ # extracted_image_subdir = DETECTED_IMAGE_DIR / pdf_filename
1949
+ # json_subdir = JSON_DIR / pdf_filename
1950
+ # extracted_image_subdir.mkdir(parents=True, exist_ok=True)
1951
+ # json_subdir.mkdir(parents=True, exist_ok=True)
1952
+ # print("-------------------------------extracted_image_subdir-------------------------------",extracted_image_subdir)
1953
+ # print("-------------------------------json_subdir-------------------------------",json_subdir)
1954
+ # # Output paths (now using Path objects directly)
1955
+ # output_json_path = json_subdir / "extracted.json"
1956
+ # final_json_path = json_subdir / "extracted_sprites.json" # Path to extracted_sprites.json
1957
+ # final_json_path_2 = json_subdir / "extracted_sprites_2.json"
1958
+ # print("-------------------------------output_json_path-------------------------------",output_json_path)
1959
+ # print("-------------------------------final_json_path-------------------------------",final_json_path)
1960
+ # print("-------------------------------final_json_path_2-------------------------------",final_json_path_2)
1961
 
1962
+ # }
1963
  try:
1964
  elements = partition_pdf(
1965
+ # filename=str(pdf_path), # partition_pdf might expect a string
1966
+ file=pdf_stream, # 'file=', inplace of 'filename'
1967
  strategy="hi_res",
1968
  extract_image_block_types=["Image"],
1969
  hi_res_model_name="yolox",
 
1974
  raise RuntimeError(
1975
  f"❌ Failed to extract images from PDF: {str(e)}")
1976
 
1977
+ file_elements = [element.to_dict() for element in elements]
1978
+
1979
+ #{
1980
+ # try:
1981
+ # with open(output_json_path, "w") as f:
1982
+ # json.dump([element.to_dict()
1983
+ # for element in elements], f, indent=4)
1984
+ # except Exception as e:
1985
+ # raise RuntimeError(f"❌ Failed to write extracted.json: {str(e)}")
1986
+
1987
+ # try:
1988
+ # # Display extracted images
1989
+ # with open(output_json_path, 'r') as file:
1990
+ # file_elements = json.load(file)
1991
+ # except Exception as e:
1992
+ # raise RuntimeError(f"❌ Failed to read extracted.json: {str(e)}")
1993
+ # }
1994
 
1995
+ sprite_count = 1
1996
+ for el in file_elements:
1997
+ img_b64 = el["metadata"].get("image_base64")
1998
+ if not img_b64:
1999
+ continue
2000
 
2001
+ manipulated_json[f"Sprite {sprite_count}"] = {
2002
+ # "id":auto_id,
2003
+ # "name": name,
2004
+ "base64": el["metadata"]["image_base64"],
2005
+ "file-path": pdf_id,
2006
+ # "description": description
2007
+ }
2008
+ sprite_count += 1
2009
+ return manipulated_json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2010
  except Exception as e:
2011
  raise RuntimeError(f"❌ Error in extract_images_from_pdf: {str(e)}")
2012
 
2013
+ # def similarity_matching(input_json_path: str, project_folder: str) -> str:
2014
+ def similarity_matching(sprites_data: str, project_folder: str) -> str:
2015
  logger.info("🔍 Running similarity matching…")
2016
  os.makedirs(project_folder, exist_ok=True)
2017
 
 
2019
  # CHANGED: define normalized base-paths so startswith() checks work
2020
  backdrop_base_path = os.path.normpath(str(BACKDROP_DIR))
2021
  sprite_base_path = os.path.normpath(str(SPRITE_DIR))
2022
+ code_blocks_path = os.path.normpath(str(CODE_BLOCKS_DIR))
2023
  # ----------------------------------------
2024
 
2025
  project_json_path = os.path.join(project_folder, "project.json")
 
2027
  # ==============================
2028
  # READ SPRITE METADATA
2029
  # ==============================
2030
+ # with open(input_json_path, 'r') as f:
2031
+ # sprites_data = json.load(f)
2032
 
2033
+ sprite_ids, sprite_base64 = [], []
2034
  for sid, sprite in sprites_data.items():
2035
  sprite_ids.append(sid)
2036
+ # texts.append("This is " + sprite.get("description", sprite.get("name", "")))
2037
  sprite_base64.append(sprite["base64"])
2038
 
2039
+ sprite_images_bytes = []
2040
+ for b64 in sprite_base64:
2041
+ img = Image.open(BytesIO(base64.b64decode(b64.split(",")[-1]))).convert("RGB")
2042
+ buffer = BytesIO()
2043
+ img.save(buffer, format="PNG")
2044
+ buffer.seek(0)
2045
+ sprite_images_bytes.append(buffer)
2046
+
2047
  # =========================================
2048
  # Build the list of all candidate images
2049
  # =========================================
 
2063
  SPRITE_DIR / "Centaur.sprite3" / "2373556e776cad3ba4d6ee04fc34550b.png",
2064
  SPRITE_DIR / "Crab.sprite3" / "bear_element.png",
2065
  SPRITE_DIR / "Soccer Ball.sprite3" / "cat_football.png",
2066
+
2067
+ CODE_BLOCKS_DIR / "script1.jpg",
2068
+ CODE_BLOCKS_DIR / "script2.jpg",
2069
+ CODE_BLOCKS_DIR / "script3.jpg",
2070
+ CODE_BLOCKS_DIR / "script4.jpg",
2071
+ CODE_BLOCKS_DIR / "script5.jpg",
2072
+ CODE_BLOCKS_DIR / "script6.jpg"
2073
  ]
2074
  folder_image_paths = [os.path.normpath(str(p)) for p in folder_image_paths]
2075
  # =========================================
 
2079
  # -----------------------------------------
2080
  with open(f"{BLOCKS_DIR}/embeddings.json", "r") as f:
2081
  embedding_json = json.load(f)
 
2082
 
2083
  # =========================================
2084
  # Decode & embed each sprite image
2085
  # =========================================
2086
+ # sprite_features = []
2087
+ # for b64 in sprite_base64:
2088
+ # if "," in b64:
2089
+ # b64 = b64.split(",", 1)[1]
2090
+
2091
+ # img_bytes = base64.b64decode(b64)
2092
+ # pil_img = Image.open(BytesIO(img_bytes)).convert("RGB")
2093
+ # buf = BytesIO()
2094
+ # pil_img.save(buf, format="PNG")
2095
+ # buf.seek(0)
2096
+ # feats = clip_embd.embed_image([buf])[0]
2097
+ # sprite_features.append(feats)
2098
+
2099
+ # ============================== #
2100
+ # EMBED SPRITE IMAGES #
2101
+ # ============================== #
2102
+ sprite_features = clip_embd.embed_image(sprite_images_bytes)
2103
+
2104
  sprite_matrix = np.vstack(sprite_features)
2105
+ img_matrix = np.array([img["embeddings"] for img in embedding_json])
2106
+
2107
  # =========================================
2108
  # Compute similarities & pick best match
2109
  # =========================================
 
2197
  else:
2198
  logger.warning(f"No project.json in {matched_folder}")
2199
 
2200
+
2201
  # =========================================
2202
  # Merge into final Scratch project.json
2203
  # =========================================
 
2375
  # # logger.info(f"🎉 Final project saved: {project_json_path}")
2376
  # return project_json_path
2377
 
2378
+ # def convert_bytes_to_image(pdf_bytes: bytes, dpi: int):
2379
+ # images = convert_from_bytes(pdf_bytes, dpi=dpi, poppler_path=poppler_path)
2380
+ # # Save each page to an in-memory BytesIO and return a list of BytesIOs
2381
+ # buffers = []
2382
+ # for img in images:
2383
+ # buf = BytesIO()
2384
+ # img.save(buf, format="PNG")
2385
+ # buf.seek(0)
2386
+ # buffers.append(buf)
2387
+ # return buffers
2388
+
2389
+ def convert_pdf_stream_to_images(pdf_stream: io.BytesIO, dpi=300):
2390
+ # Ensure we are at the start of the stream
2391
+ pdf_stream.seek(0)
2392
+
2393
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_pdf:
2394
+ tmp_pdf.write(pdf_stream.read())
2395
+ tmp_pdf_path = tmp_pdf.name
2396
+
2397
+ # Now use convert_from_path on the temp file
2398
+ images = convert_from_path(tmp_pdf_path, dpi=dpi)
2399
+ return images
2400
+
2401
  def delay_for_tpm_node(state: GameState):
2402
  logger.info("--- Running DelayForTPMNode ---")
2403
  time.sleep(60) # Adjust the delay as needed
 
2448
  except Exception as e:
2449
  logger.error(f"❌ Error during image upscaling: {str(e)}")
2450
  return image
2451
+
2452
+ @log_execution_time
2453
  def create_sb3_archive(project_folder, project_id):
2454
  """
2455
  Zips the project folder and renames it to an .sb3 file.
 
2489
  os.remove(sb3_path)
2490
  return sb3_path
2491
 
2492
+ #{ changes -> pdf_stream replacement of pdf_path
2493
+ # def save_pdf_to_generated_dir(pdf_path: str, project_id: str) -> str:
2494
+ def save_pdf_to_generated_dir(pdf_stream: io.BytesIO, project_id: str) -> str:
2495
  """
2496
+ Copies the PDF at `pdf_stream` into GEN_PROJECT_DIR/project_id/,
2497
  renaming it to <project_id>.pdf.
2498
 
2499
  Args:
2500
+ pdf_stream (io.BytesIO): Any existing stream to a PDF file.
2501
  project_id (str): Your unique project identifier.
2502
 
2503
  Returns:
2504
  str: Path to the copied PDF in the generated directory,
2505
  or None if something went wrong.
2506
+ """
2507
+ # }
2508
  try:
2509
  # 1) Build the destination directory and base filename
2510
  output_dir = GEN_PROJECT_DIR / project_id
 
2515
  target_pdf = output_dir / f"{project_id}.pdf"
2516
  print(f"\n--------------------------------target_pdf {target_pdf}")
2517
  # 3) Copy the PDF
2518
+ # {
2519
+ # shutil.copy2(pdf_path, target_pdf)
2520
+ if isinstance(pdf_stream, io.BytesIO):
2521
+ with open(target_pdf, "wb") as f:
2522
+ f.write(pdf_stream.getbuffer())
2523
+ else:
2524
+ shutil.copy2(pdf_stream, target_pdf)
2525
+ print(f"Copied PDF from {pdf_stream} → {target_pdf}")
2526
+ logger.info(f"Copied PDF from {pdf_stream} → {target_pdf}")
2527
+ # }
2528
 
2529
 
2530
  return str(target_pdf)
 
2585
  # Create empty json in project_{random_id} folder #
2586
  # =========================================================================== #
2587
  #os.makedirs(project_folder, exist_ok=True)
2588
+
2589
+ # {
2590
  # Save the uploaded PDF temporarily
2591
+ # filename = secure_filename(pdf_file.filename)
2592
+ # temp_dir = tempfile.mkdtemp()
2593
+ # saved_pdf_path = os.path.join(temp_dir, filename)
2594
+ # pdf_file.save(saved_pdf_path)
2595
+ # pdf_doc = saved_pdf_path
2596
+
2597
+ pdf_bytes = pdf_file.read()
2598
+ pdf_stream = io.BytesIO(pdf_bytes)
2599
+ logger.info(f"Saved uploaded PDF to: {pdf_stream}")
2600
+
2601
+ # pdf= save_pdf_to_generated_dir(saved_pdf_path, project_id)
2602
+ start_time = time.time()
2603
+ pdf= save_pdf_to_generated_dir(pdf_stream, project_id)
2604
  # logger.info(f"Created project folder: {project_folder}")
2605
+ # logger.info(f"Saved uploaded PDF to: {saved_pdf_path}")
2606
  logger.info(f"Saved uploaded PDF to: {pdf_file}: {pdf}")
2607
+ print("--------------------------------pdf_file_path---------------------",pdf_file,pdf_stream)
2608
+ total_time = time.time() - start_time
2609
+ print(f"-----------------------------Execution Time save_pdf_to_generated_dir() : {total_time}-----------------------------\n")
2610
+ # }
2611
+
2612
+ # {
2613
  # Extract & process
2614
+ # output_path, result = extract_images_from_pdf(saved_pdf_path)
2615
+ start_time = time.time()
2616
+ output_path = extract_images_from_pdf(pdf_stream)
2617
+ print(" --------------------------------------- zip_path_str ---------------------------------------", output_path)
2618
+ total_time = time.time() - start_time
2619
+ print(f"-----------------------------Execution Time extract_images_from_pdf() : {total_time}-----------------------------\n")
2620
+ # }
2621
+
2622
  # Check extracted_sprites.json for "scratch block" in any 'name'
2623
  # extracted_dir = os.path.join(JSON_DIR, os.path.splitext(filename)[0])
2624
  # extracted_sprites_json = os.path.join(extracted_dir, "extracted_sprites.json")
 
2628
 
2629
  # with open(extracted_sprites_json, 'r') as f:
2630
  # sprite_data = json.load(f)
2631
+ start_time = time.time()
2632
+ project_output = similarity_matching(output_path, project_folder)
2633
+ logger.info("Received request to process PDF.")
2634
+ total_time = time.time() - start_time
2635
+ print(f"-----------------------------Execution Time similarity_matching() : {total_time}-----------------------------\n")
2636
 
2637
+ with open(project_output, 'r') as f:
2638
+ project_skeleton = json.load(f)
2639
 
2640
+ # images = convert_from_path(pdf_stream, dpi=300)
2641
  # print(type)
2642
  # page = images[0]
2643
  # # img_base64 = base64.b64encode(images).decode("utf-8")
 
2646
  # img_bytes = buf.getvalue()
2647
  # img_b64 = base64.b64encode(img_bytes).decode("utf-8")
2648
  #image_paths = await convert_pdf_to_images_async(saved_pdf_path)
2649
+
2650
+ # images = convert_bytes_to_image(pdf_stream, dpi=250)
2651
+ # print("PDF converted to images:", images)
2652
+
2653
+ if isinstance(pdf_stream, io.BytesIO):
2654
+ images = convert_pdf_stream_to_images(pdf_stream, dpi=300)
2655
+ else:
2656
+ images = convert_from_path(pdf_stream, dpi=300)
2657
+
2658
  #updating logic here [Dev Patel]
2659
+ initial_state_dict = {
2660
+ "project_json": project_skeleton,
2661
+ "description": "The pseudo code for the script",
2662
+ "project_id": project_id,
2663
+ # "project_image": img_b64,
2664
+ "project_image": images,
2665
+ "action_plan": {},
2666
+ "pseudo_code": {},
2667
+ "temporary_node": {},
2668
+ }
2669
 
2670
+ final_state_dict = app_graph.invoke(initial_state_dict) # Pass dictionary
2671
 
2672
+ final_project_json = final_state_dict['project_json'] # Access as dict
2673
  # final_project_json = project_skeleton
2674
 
2675
  # Save the *final* filled project JSON, overwriting the skeleton
2676
+ with open(project_output, "w") as f:
2677
+ json.dump(final_project_json, f, indent=2)
2678
+ logger.info(f"Final project JSON saved to {project_output}")
2679
 
2680
  # --- Call the new function to create the .sb3 file ---
2681
+ sb3_file_path = create_sb3_archive(project_folder, project_id)
 
2682
 
2683
  if sb3_file_path:
2684
  logger.info(f"Successfully created SB3 file: {sb3_file_path}")
 
2693
  "output_json": "output_path",
2694
  "sprites": "result",
2695
  "project_output_json": "project_output",
2696
+ "test_url": download_url
 
2697
  })
2698
  else:
2699
  return jsonify(error="Failed to create SB3 archive"), 500