Scratch_vlm_v1_2

Sleeping

App Files Files Community

prthm11 commited on 19 days ago

Commit

4959a61

verified ·

1 Parent(s): ea87f78

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -55

app.py CHANGED Viewed

@@ -1651,31 +1651,31 @@ scratch_keywords = [
                     "touching", "sensing", "pen", "clear","Scratch","Code","scratch blocks"
                 ]
-# --- FUNCTION: Extract images from saved PDF ---
-def extract_images_from_pdf(pdf_path, final_json_path_2):
     ''' Extract images from PDF and generate structured sprite JSON '''
     try:
-        pdf_filename = os.path.splitext(os.path.basename(pdf_path))[0]  # e.g., "scratch_crab"
-        pdf_dir_path = os.path.dirname(pdf_path).replace("/", "\\")
-        # Create subfolders
-        extracted_image_subdir = os.path.join(
-            DETECTED_IMAGE_DIR, pdf_filename)
-        json_subdir = os.path.join(JSON_DIR, pdf_filename)
-        os.makedirs(extracted_image_subdir, exist_ok=True)
-        os.makedirs(json_subdir, exist_ok=True)
-        # Output paths
-        output_json_path = os.path.join(json_subdir, "extracted.json")
-        final_json_path = os.path.join(json_subdir, "extracted_sprites.json")
-        final_json_path_2 = os.path.join(json_subdir, "extracted_sprites_2.json")
         try:
             elements = partition_pdf(
-                filename=pdf_path,
                 strategy="hi_res",
                 extract_image_block_types=["Image"],
-                extract_image_block_to_payload=True,  # Set to True to get base64 in output
             )
         except Exception as e:
             raise RuntimeError(
@@ -1716,7 +1716,7 @@ def extract_images_from_pdf(pdf_path, final_json_path_2):
         )
         # If JSON already exists, load it and find the next available Sprite number
-        if os.path.exists(final_json_path):
             with open(final_json_path, "r") as existing_file:
                 manipulated = json.load(existing_file)
             # Determine the next available index (e.g., Sprite 4 if 1–3 already exist)
@@ -1732,17 +1732,22 @@ def extract_images_from_pdf(pdf_path, final_json_path_2):
                 try:
                     image_data = base64.b64decode(
                         element["metadata"]["image_base64"])
-                    image = Image.open(io.BytesIO(image_data)).convert("RGB")
                     image = upscale_image(image, scale=2)
                     # image.show(title=f"Extracted Image {i+1}")
-                    image_path = os.path.join(extracted_image_subdir, f"Sprite_{i+1}.png")
-                    image.save(image_path) # don't need to store image in local folder, process it from variable
-                    with open(image_path, "rb") as image_file:
-                        image_bytes = image_file.read()
-                    img_base64 = base64.b64encode(image_bytes).decode("utf-8")
                     prompt_combined = """
                     Analyze this image and return JSON with keys:# modify prompt for "name", if it detects "code-blocks only then give name as 'scratch-block'"
                     {
@@ -1762,24 +1767,34 @@ def extract_images_from_pdf(pdf_path, final_json_path_2):
                     ]
                     response = agent.invoke({"messages": [{"role": "user", "content": content}]})
-                    result_json = json.loads(response["messages"][-1].content)
                     try:
                         name = result_json.get("name", "").strip()
                         description = result_json.get("description", "").strip()
                     except Exception as e:
-                        logger.error(f"⚠️ Failed to extract name/description: {str(e)}")
                         name = "unknown"
                         description = "unknown"
                     manipulated_json[f"Sprite {sprite_count}"] = {
                         "name": name,
-                        "base64": element["metadata"]["image_base64"],
-                        "file-path": pdf_dir_path,
                         "description": description
                     }
                     sprite_count += 1
                 except Exception as e:
-                    print(f"⚠️ Error processing Sprite {i+1}: {str(e)}")
         # Save manipulated JSON
         with open(final_json_path, "w") as sprite_file:
@@ -1800,17 +1815,13 @@ def extract_images_from_pdf(pdf_path, final_json_path_2):
             else:
                 logger.info(f"🛑 Excluded code block-like image: {key}")
-        # if not any(is_code_block(value.get("name","")) for value in manipulated_json.values()):
-        #     return jsonify({"message":"Invalid Content"}), 400
-        # if not filtered_sprites:
-        #     return "Invalid Content", {}
         # Overwrite with filtered content
         with open(final_json_path_2, "w") as sprite_file:
             json.dump(filtered_sprites, sprite_file, indent=4)
-        # print(f"✅ Manipulated sprite JSON saved: {final_json_path}")
-        return final_json_path, manipulated_json
     except Exception as e:
         raise RuntimeError(f"❌ Error in extract_images_from_pdf: {str(e)}")
@@ -2110,10 +2121,12 @@ def create_sb3_archive(project_folder: Path, project_id: str) -> Path | None:
     zip_path = None
     sb3_path = None
     try:
         zip_path_str = shutil.make_archive(str(output_base_name), 'zip', root_dir=str(project_folder))
-        zip_path = Path(zip_path_str)
         logger.info(f"Project folder zipped to: {zip_path}")
         sb3_path = GEN_PROJECT_DIR / f"{project_id}.sb3"
         os.rename(zip_path, sb3_path)
         logger.info(f"Renamed {zip_path} to {sb3_path}")
@@ -2121,13 +2134,13 @@ def create_sb3_archive(project_folder: Path, project_id: str) -> Path | None:
         return sb3_path
     except Exception as e:
         logger.error(f"Error creating SB3 archive for {project_id}: {e}", exc_info=True)
         if zip_path and zip_path.exists():
             os.remove(zip_path)
         if sb3_path and sb3_path.exists():
             os.remove(sb3_path)
         return None
 @app.route('/')
 def index():
     return render_template('app_index.html')
@@ -2160,7 +2173,8 @@ def download_sb3(project_id):
 def process_pdf():
     project_id = None
     project_folder = None
-    temp_dir = None # Initialize temp_dir for finally block
     try:
         logger.info("Received request to process PDF.")
         if 'pdf_file' not in request.files:
@@ -2184,24 +2198,23 @@ def process_pdf():
         logger.info(f"Saved uploaded PDF to: {saved_pdf_path}")
-        json_path = None
-        extracted_output_dir, result = extract_images_from_pdf(saved_pdf_path, json_path)
-        # Ensure extracted_output_dir is a Path object for the '/' operator
-        # This was the source of the TypeError
-        if not isinstance(extracted_output_dir, Path):
-            extracted_output_dir = Path(extracted_output_dir)
-        extracted_sprites_json_path = extracted_output_dir / "extracted_sprites.json"
-        if not extracted_sprites_json_path.exists():
-            logger.error(f"No extracted_sprites.json found at {extracted_sprites_json_path}")
             return jsonify({"error": "No extracted_sprites.json found"}), 500
-        with open(extracted_sprites_json_path, 'r') as f:
             sprite_data = json.load(f)
-        project_output = similarity_matching(extracted_output_dir, project_folder)
         logger.info("Similarity matching completed.")
         with open(project_output, 'r') as f:
@@ -2248,6 +2261,17 @@ def process_pdf():
         if temp_dir and temp_dir.exists():
             shutil.rmtree(temp_dir)
             logger.info(f"Cleaned up temporary directory: {temp_dir}")
 @app.route('/list_projects', methods=['GET'])
 def list_projects():
@@ -2268,4 +2292,4 @@ def list_projects():
         return jsonify({"error": "Failed to list generated projects"}), 500
 if __name__ == '__main__':
-    app.run(host='0.0.0.0', port=7860, debug=True)

                     "touching", "sensing", "pen", "clear","Scratch","Code","scratch blocks"
                 ]
+def extract_images_from_pdf(pdf_path: Path, json_base_dir: Path, image_base_dir: Path):
     ''' Extract images from PDF and generate structured sprite JSON '''
     try:
+        pdf_filename = pdf_path.stem  # e.g., "scratch_crab" from Path object
+        # Create subfolders under the provided base directories
+        # This will create paths like:
+        # /app/detected_images/pdf_filename/
+        # /app/json_data/pdf_filename/
+        extracted_image_subdir = image_base_dir / pdf_filename
+        json_subdir = json_base_dir / pdf_filename
+        extracted_image_subdir.mkdir(parents=True, exist_ok=True)
+        json_subdir.mkdir(parents=True, exist_ok=True)
+        # Output paths (now using Path objects directly)
+        output_json_path = json_subdir / "extracted.json"
+        final_json_path = json_subdir / "extracted_sprites.json" # Path to extracted_sprites.json
+        final_json_path_2 = json_subdir / "extracted_sprites_2.json"
         try:
             elements = partition_pdf(
+                filename=str(pdf_path), # partition_pdf might expect a string
                 strategy="hi_res",
                 extract_image_block_types=["Image"],
+                extract_image_block_to_payload=True,
             )
         except Exception as e:
             raise RuntimeError(
         )
         # If JSON already exists, load it and find the next available Sprite number
+        if final_json_path.exists(): # Use Path.exists()
             with open(final_json_path, "r") as existing_file:
                 manipulated = json.load(existing_file)
             # Determine the next available index (e.g., Sprite 4 if 1–3 already exist)
                 try:
                     image_data = base64.b64decode(
                         element["metadata"]["image_base64"])
+                    image = Image.open(BytesIO(image_data)).convert("RGB") # Use BytesIO here
                     image = upscale_image(image, scale=2)
                     # image.show(title=f"Extracted Image {i+1}")
+                    # MODIFIED: Store image directly to BytesIO to avoid saving to disk if not needed
+                    # and then converting back to base64.
+                    img_buffer = BytesIO()
+                    image.save(img_buffer, format="PNG")
+                    img_bytes = img_buffer.getvalue()
+                    img_base64 = base64.b64encode(img_bytes).decode("utf-8")
+                    # Optionally save image to disk if desired for debugging/permanent storage
+                    image_path = extracted_image_subdir / f"Sprite_{i+1}.png"
+                    image.save(image_path)
                     prompt_combined = """
                     Analyze this image and return JSON with keys:# modify prompt for "name", if it detects "code-blocks only then give name as 'scratch-block'"
                     {
                     ]
                     response = agent.invoke({"messages": [{"role": "user", "content": content}]})
+                    # Ensure response is handled correctly, it might be a string that needs json.loads
+                    try:
+                        # Assuming the agent returns a dictionary with 'messages' key,
+                        # and the last message's content is the JSON string.
+                        response_content_str = response.get("messages", [])[-1].content
+                        result_json = json.loads(response_content_str)
+                    except (json.JSONDecodeError, IndexError, AttributeError) as e:
+                        logger.error(f"⚠️ Failed to parse agent response as JSON: {e}. Response was: {response}", exc_info=True)
+                        result_json = {} # Default to empty dict if parsing fails
                     try:
                         name = result_json.get("name", "").strip()
                         description = result_json.get("description", "").strip()
                     except Exception as e:
+                        logger.error(f"⚠️ Failed to extract name/description from result_json: {str(e)}", exc_info=True)
                         name = "unknown"
                         description = "unknown"
                     manipulated_json[f"Sprite {sprite_count}"] = {
                         "name": name,
+                        "base64": element["metadata"]["image_base64"], # Keep original base64 if needed
+                        "file-path": str(extracted_image_subdir), # Store the directory path as string
                         "description": description
                     }
                     sprite_count += 1
                 except Exception as e:
+                    logger.error(f"⚠️ Error processing Sprite {i+1}: {str(e)}", exc_info=True)
         # Save manipulated JSON
         with open(final_json_path, "w") as sprite_file:
             else:
                 logger.info(f"🛑 Excluded code block-like image: {key}")
         # Overwrite with filtered content
         with open(final_json_path_2, "w") as sprite_file:
             json.dump(filtered_sprites, sprite_file, indent=4)
+        # MODIFIED RETURN VALUE: Return the Path to the primary extracted_sprites.json file
+        # and the directory where it's located.
+        return final_json_path, json_subdir # Return the file path and its parent directory
     except Exception as e:
         raise RuntimeError(f"❌ Error in extract_images_from_pdf: {str(e)}")
     zip_path = None
     sb3_path = None
     try:
+        # shutil.make_archive automatically adds .zip extension
         zip_path_str = shutil.make_archive(str(output_base_name), 'zip', root_dir=str(project_folder))
+        zip_path = Path(zip_path_str) # Convert back to Path object
         logger.info(f"Project folder zipped to: {zip_path}")
+        # 2. Rename the .zip file to .sb3
         sb3_path = GEN_PROJECT_DIR / f"{project_id}.sb3"
         os.rename(zip_path, sb3_path)
         logger.info(f"Renamed {zip_path} to {sb3_path}")
         return sb3_path
     except Exception as e:
         logger.error(f"Error creating SB3 archive for {project_id}: {e}", exc_info=True)
+        # Clean up any partial files if an error occurs
         if zip_path and zip_path.exists():
             os.remove(zip_path)
         if sb3_path and sb3_path.exists():
             os.remove(sb3_path)
         return None
 @app.route('/')
 def index():
     return render_template('app_index.html')
 def process_pdf():
     project_id = None
     project_folder = None
+    temp_dir = None
+    extracted_json_parent_dir = None # Initialize for finally block cleanup or later use
     try:
         logger.info("Received request to process PDF.")
         if 'pdf_file' not in request.files:
         logger.info(f"Saved uploaded PDF to: {saved_pdf_path}")
+        # MODIFIED CALL: Pass JSON_DIR and DETECTED_IMAGE_DIR
+        # extract_images_from_pdf now returns the file path to extracted_sprites.json
+        # and the parent directory where it was created.
+        extracted_sprites_json_file_path, extracted_json_parent_dir = extract_images_from_pdf(
+            saved_pdf_path, JSON_DIR, DETECTED_IMAGE_DIR
+        )
+        # Now, directly use extracted_sprites_json_file_path to check for its existence
+        if not extracted_sprites_json_file_path.exists():
+            logger.error(f"No extracted_sprites.json found at {extracted_sprites_json_file_path}")
             return jsonify({"error": "No extracted_sprites.json found"}), 500
+        with open(extracted_sprites_json_file_path, 'r') as f:
             sprite_data = json.load(f)
+        # MODIFIED CALL: Pass the extracted_json_parent_dir (the directory) to similarity_matching
+        project_output = similarity_matching(extracted_json_parent_dir, project_folder)
         logger.info("Similarity matching completed.")
         with open(project_output, 'r') as f:
         if temp_dir and temp_dir.exists():
             shutil.rmtree(temp_dir)
             logger.info(f"Cleaned up temporary directory: {temp_dir}")
+        # Optional: Clean up the extracted JSON and image directories for this project_id
+        # if extracted_json_parent_dir and extracted_json_parent_dir.exists():
+        #     shutil.rmtree(extracted_json_parent_dir)
+        #     logger.info(f"Cleaned up extracted JSON directory: {extracted_json_parent_dir}")
+        # if pdf_filename_stem: # You'd need to get pdf_filename_stem from `filename` earlier
+        #     corresponding_image_dir = DETECTED_IMAGE_DIR / pdf_filename_stem
+        #     if corresponding_image_dir.exists():
+        #         shutil.rmtree(corresponding_image_dir)
+        #         logger.info(f"Cleaned up detected image directory: {corresponding_image_dir}")
 @app.route('/list_projects', methods=['GET'])
 def list_projects():
         return jsonify({"error": "Failed to list generated projects"}), 500
 if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=7860, debug=True))