Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1651,25 +1651,30 @@ scratch_keywords = [
|
|
1651 |
"touching", "sensing", "pen", "clear","Scratch","Code","scratch blocks"
|
1652 |
]
|
1653 |
|
1654 |
-
def extract_images_from_pdf(pdf_path: Path, json_base_dir: Path, image_base_dir: Path):
|
|
|
1655 |
''' Extract images from PDF and generate structured sprite JSON '''
|
1656 |
try:
|
1657 |
pdf_filename = pdf_path.stem # e.g., "scratch_crab" from Path object
|
1658 |
-
|
1659 |
# Create subfolders under the provided base directories
|
1660 |
# This will create paths like:
|
1661 |
# /app/detected_images/pdf_filename/
|
1662 |
# /app/json_data/pdf_filename/
|
1663 |
-
extracted_image_subdir =
|
1664 |
json_subdir = json_base_dir / pdf_filename
|
1665 |
extracted_image_subdir.mkdir(parents=True, exist_ok=True)
|
1666 |
json_subdir.mkdir(parents=True, exist_ok=True)
|
1667 |
-
|
|
|
1668 |
# Output paths (now using Path objects directly)
|
1669 |
output_json_path = json_subdir / "extracted.json"
|
1670 |
final_json_path = json_subdir / "extracted_sprites.json" # Path to extracted_sprites.json
|
1671 |
final_json_path_2 = json_subdir / "extracted_sprites_2.json"
|
1672 |
-
|
|
|
|
|
|
|
1673 |
try:
|
1674 |
elements = partition_pdf(
|
1675 |
filename=str(pdf_path), # partition_pdf might expect a string
|
|
|
1651 |
"touching", "sensing", "pen", "clear","Scratch","Code","scratch blocks"
|
1652 |
]
|
1653 |
|
1654 |
+
#def extract_images_from_pdf(pdf_path: Path, json_base_dir: Path, image_base_dir: Path):
|
1655 |
+
def extract_images_from_pdf(pdf_path: Path, json_base_dir: Path):
|
1656 |
''' Extract images from PDF and generate structured sprite JSON '''
|
1657 |
try:
|
1658 |
pdf_filename = pdf_path.stem # e.g., "scratch_crab" from Path object
|
1659 |
+
print("-------------------------------pdf_filename-------------------------------",pdf_filename)
|
1660 |
# Create subfolders under the provided base directories
|
1661 |
# This will create paths like:
|
1662 |
# /app/detected_images/pdf_filename/
|
1663 |
# /app/json_data/pdf_filename/
|
1664 |
+
extracted_image_subdir = DETECTED_IMAGE_DIR / pdf_filename
|
1665 |
json_subdir = json_base_dir / pdf_filename
|
1666 |
extracted_image_subdir.mkdir(parents=True, exist_ok=True)
|
1667 |
json_subdir.mkdir(parents=True, exist_ok=True)
|
1668 |
+
print("-------------------------------extracted_image_subdir-------------------------------",extracted_image_subdir)
|
1669 |
+
print("-------------------------------json_subdir-------------------------------",json_subdir)
|
1670 |
# Output paths (now using Path objects directly)
|
1671 |
output_json_path = json_subdir / "extracted.json"
|
1672 |
final_json_path = json_subdir / "extracted_sprites.json" # Path to extracted_sprites.json
|
1673 |
final_json_path_2 = json_subdir / "extracted_sprites_2.json"
|
1674 |
+
print("-------------------------------output_json_path-------------------------------",output_json_path)
|
1675 |
+
print("-------------------------------final_json_path-------------------------------",final_json_path)
|
1676 |
+
print("-------------------------------final_json_path_2-------------------------------",final_json_path_2)
|
1677 |
+
|
1678 |
try:
|
1679 |
elements = partition_pdf(
|
1680 |
filename=str(pdf_path), # partition_pdf might expect a string
|