Spaces:
Sleeping
Sleeping
Update app_main.py
Browse files- app_main.py +148 -148
app_main.py
CHANGED
@@ -1,148 +1,148 @@
|
|
1 |
-
from flask import Flask, render_template, Response, flash, redirect, url_for, request, jsonify
|
2 |
-
import cv2
|
3 |
-
import numpy as np
|
4 |
-
from unstructured.partition.pdf import partition_pdf
|
5 |
-
import json, base64, io, os
|
6 |
-
from PIL import Image, ImageEnhance, ImageDraw
|
7 |
-
from imutils.perspective import four_point_transform
|
8 |
-
from dotenv import load_dotenv
|
9 |
-
import pytesseract
|
10 |
-
from transformers import AutoProcessor, AutoModelForImageTextToText
|
11 |
-
from langchain_community.document_loaders.image_captions import ImageCaptionLoader
|
12 |
-
|
13 |
-
app = Flask(__name__)
|
14 |
-
|
15 |
-
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
|
16 |
-
poppler_path=r"C:\poppler-23.11.0\Library\bin"
|
17 |
-
|
18 |
-
count = 0
|
19 |
-
PDF_GET = r"E:\Pratham\2025\Harsh Sir\Scratch Vision\images\scratch_crab.pdf"
|
20 |
-
|
21 |
-
OUTPUT_FOLDER = "OUTPUTS"
|
22 |
-
DETECTED_IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER,"DETECTED_IMAGE")
|
23 |
-
IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_IMAGE")
|
24 |
-
JSON_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "EXTRACTED_JSON")
|
25 |
-
|
26 |
-
for path in [OUTPUT_FOLDER, IMAGE_FOLDER_PATH, DETECTED_IMAGE_FOLDER_PATH, JSON_FOLDER_PATH]:
|
27 |
-
os.makedirs(path, exist_ok=True)
|
28 |
-
|
29 |
-
# Model Initialization
|
30 |
-
smolvlm256m_processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-256M-Instruct")
|
31 |
-
smolvlm256m_model = AutoModelForImageTextToText.from_pretrained("HuggingFaceTB/SmolVLM-256M-Instruct").to("cpu")
|
32 |
-
|
33 |
-
# SmolVLM Image Captioning functioning
|
34 |
-
def get_smolvlm_caption(image: Image.Image, prompt: str = "") -> str:
|
35 |
-
# Ensure exactly one <image> token
|
36 |
-
if "<image>" not in prompt:
|
37 |
-
prompt = f"<image> {prompt.strip()}"
|
38 |
-
|
39 |
-
num_image_tokens = prompt.count("<image>")
|
40 |
-
if num_image_tokens != 1:
|
41 |
-
raise ValueError(f"Prompt must contain exactly 1 <image> token. Found {num_image_tokens}")
|
42 |
-
|
43 |
-
inputs = smolvlm256m_processor(images=[image], text=[prompt], return_tensors="pt").to("cpu")
|
44 |
-
output_ids = smolvlm256m_model.generate(**inputs, max_new_tokens=100)
|
45 |
-
return smolvlm256m_processor.decode(output_ids[0], skip_special_tokens=True)
|
46 |
-
|
47 |
-
# --- FUNCTION: Extract images from saved PDF ---
|
48 |
-
def extract_images_from_pdf(pdf_path, output_json_path):
|
49 |
-
''' Extract images from PDF and generate structured sprite JSON '''
|
50 |
-
|
51 |
-
pdf_filename = os.path.splitext(os.path.basename(pdf_path))[0] # e.g., "scratch_crab"
|
52 |
-
pdf_dir_path = os.path.dirname(pdf_path).replace("/", "\\")
|
53 |
-
|
54 |
-
# Create subfolders
|
55 |
-
extracted_image_subdir = os.path.join(DETECTED_IMAGE_FOLDER_PATH, pdf_filename)
|
56 |
-
json_subdir = os.path.join(JSON_FOLDER_PATH, pdf_filename)
|
57 |
-
os.makedirs(extracted_image_subdir, exist_ok=True)
|
58 |
-
os.makedirs(json_subdir, exist_ok=True)
|
59 |
-
|
60 |
-
# Output paths
|
61 |
-
output_json_path = os.path.join(json_subdir, "extracted.json")
|
62 |
-
final_json_path = os.path.join(json_subdir, "extracted_sprites.json")
|
63 |
-
|
64 |
-
elements = partition_pdf(
|
65 |
-
filename=pdf_path,
|
66 |
-
strategy="hi_res",
|
67 |
-
extract_image_block_types=["Image"],
|
68 |
-
extract_image_block_to_payload=True, # Set to True to get base64 in output
|
69 |
-
)
|
70 |
-
|
71 |
-
with open(output_json_path, "w") as f:
|
72 |
-
json.dump([element.to_dict() for element in elements], f, indent=4)
|
73 |
-
|
74 |
-
# Display extracted images
|
75 |
-
with open(output_json_path, 'r') as file:
|
76 |
-
file_elements = json.load(file)
|
77 |
-
|
78 |
-
# extracted_images_dir = os.path.join(os.path.dirname(output_json_path), "extracted_images")
|
79 |
-
# os.makedirs(extracted_images_dir, exist_ok=True)
|
80 |
-
|
81 |
-
# Prepare manipulated sprite JSON structure
|
82 |
-
manipulated_json = {}
|
83 |
-
|
84 |
-
# Final manipulated file (for captions)
|
85 |
-
final_json_path = output_json_path.replace(".json", "_sprites.json")
|
86 |
-
|
87 |
-
# If JSON already exists, load it and find the next available Sprite number
|
88 |
-
if os.path.exists(final_json_path):
|
89 |
-
with open(final_json_path, "r") as existing_file:
|
90 |
-
manipulated = json.load(existing_file)
|
91 |
-
# Determine the next available index (e.g., Sprite 4 if 1–3 already exist)
|
92 |
-
existing_keys = [int(k.replace("Sprite ", "")) for k in manipulated.keys()]
|
93 |
-
start_count = max(existing_keys, default=0) + 1
|
94 |
-
else:
|
95 |
-
start_count = 1
|
96 |
-
|
97 |
-
sprite_count = start_count
|
98 |
-
for i,element in enumerate(file_elements):
|
99 |
-
if "image_base64" in element["metadata"]:
|
100 |
-
image_data = base64.b64decode(element["metadata"]["image_base64"])
|
101 |
-
image = Image.open(io.BytesIO(image_data)).convert("RGB")
|
102 |
-
image.show(title=f"Extracted Image {i+1}")
|
103 |
-
image_path = os.path.join(extracted_image_subdir, f"Sprite_{i+1}.png")
|
104 |
-
image.save(image_path)
|
105 |
-
|
106 |
-
description = get_smolvlm_caption(image, prompt="Give a brief Description")
|
107 |
-
name = get_smolvlm_caption(image, prompt="give a short name/title of this Image.")
|
108 |
-
|
109 |
-
manipulated_json[f"Sprite {sprite_count}"] = {
|
110 |
-
"name": name,
|
111 |
-
"base64": element["metadata"]["image_base64"],
|
112 |
-
"file-path": pdf_dir_path,
|
113 |
-
"description":description
|
114 |
-
}
|
115 |
-
sprite_count += 1
|
116 |
-
|
117 |
-
# Save manipulated JSON
|
118 |
-
with open(final_json_path, "w") as sprite_file:
|
119 |
-
json.dump(manipulated_json, sprite_file, indent=4)
|
120 |
-
|
121 |
-
print(f"✅ Manipulated sprite JSON saved: {final_json_path}")
|
122 |
-
return final_json_path, manipulated_json
|
123 |
-
|
124 |
-
# API endpoint
|
125 |
-
@app.route('/process_static_pdf', methods=['POST'])
|
126 |
-
def process_static_pdf():
|
127 |
-
# Option 1: Use hardcoded static PDF
|
128 |
-
pdf_path = r"E:\Pratham\2025\Harsh Sir\Scratch Vision\images\scratch_crab.pdf"
|
129 |
-
|
130 |
-
# Optional: Allow override via JSON request body
|
131 |
-
if request.json and "pdf_path" in request.json:
|
132 |
-
pdf_path = request.json["pdf_path"]
|
133 |
-
|
134 |
-
if not os.path.isfile(pdf_path):
|
135 |
-
return jsonify({"error": f"File not found: {pdf_path}"}), 400
|
136 |
-
|
137 |
-
# json_path = os.path.join(JSON_FOLDER_PATH, "extracted.json")
|
138 |
-
json_path = None
|
139 |
-
output_path, result = extract_images_from_pdf(pdf_path, json_path)
|
140 |
-
|
141 |
-
return jsonify({
|
142 |
-
"message": "✅ PDF processed successfully",
|
143 |
-
"output_json": output_path,
|
144 |
-
"sprites": result
|
145 |
-
})
|
146 |
-
|
147 |
-
if __name__ == '__main__':
|
148 |
-
app.run(debug=True)
|
|
|
1 |
+
from flask import Flask, render_template, Response, flash, redirect, url_for, request, jsonify
|
2 |
+
import cv2
|
3 |
+
import numpy as np
|
4 |
+
from unstructured.partition.pdf import partition_pdf
|
5 |
+
import json, base64, io, os
|
6 |
+
from PIL import Image, ImageEnhance, ImageDraw
|
7 |
+
from imutils.perspective import four_point_transform
|
8 |
+
from dotenv import load_dotenv
|
9 |
+
import pytesseract
|
10 |
+
from transformers import AutoProcessor, AutoModelForImageTextToText
|
11 |
+
from langchain_community.document_loaders.image_captions import ImageCaptionLoader
|
12 |
+
|
13 |
+
app = Flask(__name__)
|
14 |
+
|
15 |
+
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
|
16 |
+
poppler_path=r"C:\poppler-23.11.0\Library\bin"
|
17 |
+
|
18 |
+
count = 0
|
19 |
+
PDF_GET = r"E:\Pratham\2025\Harsh Sir\Scratch Vision\images\scratch_crab.pdf"
|
20 |
+
|
21 |
+
OUTPUT_FOLDER = "OUTPUTS"
|
22 |
+
DETECTED_IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER,"DETECTED_IMAGE")
|
23 |
+
IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_IMAGE")
|
24 |
+
JSON_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "EXTRACTED_JSON")
|
25 |
+
|
26 |
+
for path in [OUTPUT_FOLDER, IMAGE_FOLDER_PATH, DETECTED_IMAGE_FOLDER_PATH, JSON_FOLDER_PATH]:
|
27 |
+
os.makedirs(path, exist_ok=True)
|
28 |
+
|
29 |
+
# Model Initialization
|
30 |
+
smolvlm256m_processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-256M-Instruct")
|
31 |
+
smolvlm256m_model = AutoModelForImageTextToText.from_pretrained("HuggingFaceTB/SmolVLM-256M-Instruct").to("cpu")
|
32 |
+
|
33 |
+
# SmolVLM Image Captioning functioning
|
34 |
+
def get_smolvlm_caption(image: Image.Image, prompt: str = "") -> str:
|
35 |
+
# Ensure exactly one <image> token
|
36 |
+
if "<image>" not in prompt:
|
37 |
+
prompt = f"<image> {prompt.strip()}"
|
38 |
+
|
39 |
+
num_image_tokens = prompt.count("<image>")
|
40 |
+
if num_image_tokens != 1:
|
41 |
+
raise ValueError(f"Prompt must contain exactly 1 <image> token. Found {num_image_tokens}")
|
42 |
+
|
43 |
+
inputs = smolvlm256m_processor(images=[image], text=[prompt], return_tensors="pt").to("cpu")
|
44 |
+
output_ids = smolvlm256m_model.generate(**inputs, max_new_tokens=100)
|
45 |
+
return smolvlm256m_processor.decode(output_ids[0], skip_special_tokens=True)
|
46 |
+
|
47 |
+
# --- FUNCTION: Extract images from saved PDF ---
|
48 |
+
def extract_images_from_pdf(pdf_path, output_json_path):
|
49 |
+
''' Extract images from PDF and generate structured sprite JSON '''
|
50 |
+
|
51 |
+
pdf_filename = os.path.splitext(os.path.basename(pdf_path))[0] # e.g., "scratch_crab"
|
52 |
+
pdf_dir_path = os.path.dirname(pdf_path).replace("/", "\\")
|
53 |
+
|
54 |
+
# Create subfolders
|
55 |
+
extracted_image_subdir = os.path.join(DETECTED_IMAGE_FOLDER_PATH, pdf_filename)
|
56 |
+
json_subdir = os.path.join(JSON_FOLDER_PATH, pdf_filename)
|
57 |
+
os.makedirs(extracted_image_subdir, exist_ok=True)
|
58 |
+
os.makedirs(json_subdir, exist_ok=True)
|
59 |
+
|
60 |
+
# Output paths
|
61 |
+
output_json_path = os.path.join(json_subdir, "extracted.json")
|
62 |
+
final_json_path = os.path.join(json_subdir, "extracted_sprites.json")
|
63 |
+
|
64 |
+
elements = partition_pdf(
|
65 |
+
filename=pdf_path,
|
66 |
+
strategy="hi_res",
|
67 |
+
extract_image_block_types=["Image"],
|
68 |
+
extract_image_block_to_payload=True, # Set to True to get base64 in output
|
69 |
+
)
|
70 |
+
|
71 |
+
with open(output_json_path, "w") as f:
|
72 |
+
json.dump([element.to_dict() for element in elements], f, indent=4)
|
73 |
+
|
74 |
+
# Display extracted images
|
75 |
+
with open(output_json_path, 'r') as file:
|
76 |
+
file_elements = json.load(file)
|
77 |
+
|
78 |
+
# extracted_images_dir = os.path.join(os.path.dirname(output_json_path), "extracted_images")
|
79 |
+
# os.makedirs(extracted_images_dir, exist_ok=True)
|
80 |
+
|
81 |
+
# Prepare manipulated sprite JSON structure
|
82 |
+
manipulated_json = {}
|
83 |
+
|
84 |
+
# Final manipulated file (for captions)
|
85 |
+
final_json_path = output_json_path.replace(".json", "_sprites.json")
|
86 |
+
|
87 |
+
# If JSON already exists, load it and find the next available Sprite number
|
88 |
+
if os.path.exists(final_json_path):
|
89 |
+
with open(final_json_path, "r") as existing_file:
|
90 |
+
manipulated = json.load(existing_file)
|
91 |
+
# Determine the next available index (e.g., Sprite 4 if 1–3 already exist)
|
92 |
+
existing_keys = [int(k.replace("Sprite ", "")) for k in manipulated.keys()]
|
93 |
+
start_count = max(existing_keys, default=0) + 1
|
94 |
+
else:
|
95 |
+
start_count = 1
|
96 |
+
|
97 |
+
sprite_count = start_count
|
98 |
+
for i,element in enumerate(file_elements):
|
99 |
+
if "image_base64" in element["metadata"]:
|
100 |
+
image_data = base64.b64decode(element["metadata"]["image_base64"])
|
101 |
+
image = Image.open(io.BytesIO(image_data)).convert("RGB")
|
102 |
+
image.show(title=f"Extracted Image {i+1}")
|
103 |
+
image_path = os.path.join(extracted_image_subdir, f"Sprite_{i+1}.png")
|
104 |
+
image.save(image_path)
|
105 |
+
|
106 |
+
description = get_smolvlm_caption(image, prompt="Give a brief Description")
|
107 |
+
name = get_smolvlm_caption(image, prompt="give a short name/title of this Image.")
|
108 |
+
|
109 |
+
manipulated_json[f"Sprite {sprite_count}"] = {
|
110 |
+
"name": name,
|
111 |
+
"base64": element["metadata"]["image_base64"],
|
112 |
+
"file-path": pdf_dir_path,
|
113 |
+
"description":description
|
114 |
+
}
|
115 |
+
sprite_count += 1
|
116 |
+
|
117 |
+
# Save manipulated JSON
|
118 |
+
with open(final_json_path, "w") as sprite_file:
|
119 |
+
json.dump(manipulated_json, sprite_file, indent=4)
|
120 |
+
|
121 |
+
print(f"✅ Manipulated sprite JSON saved: {final_json_path}")
|
122 |
+
return final_json_path, manipulated_json
|
123 |
+
|
124 |
+
# API endpoint
|
125 |
+
@app.route('/process_static_pdf', methods=['POST'])
|
126 |
+
def process_static_pdf():
|
127 |
+
# Option 1: Use hardcoded static PDF
|
128 |
+
pdf_path = r"E:\Pratham\2025\Harsh Sir\Scratch Vision\images\scratch_crab.pdf"
|
129 |
+
|
130 |
+
# Optional: Allow override via JSON request body
|
131 |
+
if request.json and "pdf_path" in request.json:
|
132 |
+
pdf_path = request.json["pdf_path"]
|
133 |
+
|
134 |
+
if not os.path.isfile(pdf_path):
|
135 |
+
return jsonify({"error": f"File not found: {pdf_path}"}), 400
|
136 |
+
|
137 |
+
# json_path = os.path.join(JSON_FOLDER_PATH, "extracted.json")
|
138 |
+
json_path = None
|
139 |
+
output_path, result = extract_images_from_pdf(pdf_path, json_path)
|
140 |
+
|
141 |
+
return jsonify({
|
142 |
+
"message": "✅ PDF processed successfully",
|
143 |
+
"output_json": output_path,
|
144 |
+
"sprites": result
|
145 |
+
})
|
146 |
+
|
147 |
+
if __name__ == '__main__':
|
148 |
+
app.run(host='0.0.0.0', port=7860, debug=True)
|