WebashalarForML commited on
Commit
9aa0db6
·
verified ·
1 Parent(s): b9b48a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -5
app.py CHANGED
@@ -1651,25 +1651,30 @@ scratch_keywords = [
1651
  "touching", "sensing", "pen", "clear","Scratch","Code","scratch blocks"
1652
  ]
1653
 
1654
- def extract_images_from_pdf(pdf_path: Path, json_base_dir: Path, image_base_dir: Path):
 
1655
  ''' Extract images from PDF and generate structured sprite JSON '''
1656
  try:
1657
  pdf_filename = pdf_path.stem # e.g., "scratch_crab" from Path object
1658
-
1659
  # Create subfolders under the provided base directories
1660
  # This will create paths like:
1661
  # /app/detected_images/pdf_filename/
1662
  # /app/json_data/pdf_filename/
1663
- extracted_image_subdir = image_base_dir / pdf_filename
1664
  json_subdir = json_base_dir / pdf_filename
1665
  extracted_image_subdir.mkdir(parents=True, exist_ok=True)
1666
  json_subdir.mkdir(parents=True, exist_ok=True)
1667
-
 
1668
  # Output paths (now using Path objects directly)
1669
  output_json_path = json_subdir / "extracted.json"
1670
  final_json_path = json_subdir / "extracted_sprites.json" # Path to extracted_sprites.json
1671
  final_json_path_2 = json_subdir / "extracted_sprites_2.json"
1672
-
 
 
 
1673
  try:
1674
  elements = partition_pdf(
1675
  filename=str(pdf_path), # partition_pdf might expect a string
 
1651
  "touching", "sensing", "pen", "clear","Scratch","Code","scratch blocks"
1652
  ]
1653
 
1654
+ #def extract_images_from_pdf(pdf_path: Path, json_base_dir: Path, image_base_dir: Path):
1655
+ def extract_images_from_pdf(pdf_path: Path, json_base_dir: Path):
1656
  ''' Extract images from PDF and generate structured sprite JSON '''
1657
  try:
1658
  pdf_filename = pdf_path.stem # e.g., "scratch_crab" from Path object
1659
+ print("-------------------------------pdf_filename-------------------------------",pdf_filename)
1660
  # Create subfolders under the provided base directories
1661
  # This will create paths like:
1662
  # /app/detected_images/pdf_filename/
1663
  # /app/json_data/pdf_filename/
1664
+ extracted_image_subdir = DETECTED_IMAGE_DIR / pdf_filename
1665
  json_subdir = json_base_dir / pdf_filename
1666
  extracted_image_subdir.mkdir(parents=True, exist_ok=True)
1667
  json_subdir.mkdir(parents=True, exist_ok=True)
1668
+ print("-------------------------------extracted_image_subdir-------------------------------",extracted_image_subdir)
1669
+ print("-------------------------------json_subdir-------------------------------",json_subdir)
1670
  # Output paths (now using Path objects directly)
1671
  output_json_path = json_subdir / "extracted.json"
1672
  final_json_path = json_subdir / "extracted_sprites.json" # Path to extracted_sprites.json
1673
  final_json_path_2 = json_subdir / "extracted_sprites_2.json"
1674
+ print("-------------------------------output_json_path-------------------------------",output_json_path)
1675
+ print("-------------------------------final_json_path-------------------------------",final_json_path)
1676
+ print("-------------------------------final_json_path_2-------------------------------",final_json_path_2)
1677
+
1678
  try:
1679
  elements = partition_pdf(
1680
  filename=str(pdf_path), # partition_pdf might expect a string