prthm11 commited on
Commit
75c46c8
·
verified ·
1 Parent(s): 0b4bd44

Update app_main.py

Browse files
Files changed (1) hide show
  1. app_main.py +148 -148
app_main.py CHANGED
@@ -1,148 +1,148 @@
1
- from flask import Flask, render_template, Response, flash, redirect, url_for, request, jsonify
2
- import cv2
3
- import numpy as np
4
- from unstructured.partition.pdf import partition_pdf
5
- import json, base64, io, os
6
- from PIL import Image, ImageEnhance, ImageDraw
7
- from imutils.perspective import four_point_transform
8
- from dotenv import load_dotenv
9
- import pytesseract
10
- from transformers import AutoProcessor, AutoModelForImageTextToText
11
- from langchain_community.document_loaders.image_captions import ImageCaptionLoader
12
-
13
- app = Flask(__name__)
14
-
15
- pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
16
- poppler_path=r"C:\poppler-23.11.0\Library\bin"
17
-
18
- count = 0
19
- PDF_GET = r"E:\Pratham\2025\Harsh Sir\Scratch Vision\images\scratch_crab.pdf"
20
-
21
- OUTPUT_FOLDER = "OUTPUTS"
22
- DETECTED_IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER,"DETECTED_IMAGE")
23
- IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_IMAGE")
24
- JSON_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "EXTRACTED_JSON")
25
-
26
- for path in [OUTPUT_FOLDER, IMAGE_FOLDER_PATH, DETECTED_IMAGE_FOLDER_PATH, JSON_FOLDER_PATH]:
27
- os.makedirs(path, exist_ok=True)
28
-
29
- # Model Initialization
30
- smolvlm256m_processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-256M-Instruct")
31
- smolvlm256m_model = AutoModelForImageTextToText.from_pretrained("HuggingFaceTB/SmolVLM-256M-Instruct").to("cpu")
32
-
33
- # SmolVLM Image Captioning functioning
34
- def get_smolvlm_caption(image: Image.Image, prompt: str = "") -> str:
35
- # Ensure exactly one <image> token
36
- if "<image>" not in prompt:
37
- prompt = f"<image> {prompt.strip()}"
38
-
39
- num_image_tokens = prompt.count("<image>")
40
- if num_image_tokens != 1:
41
- raise ValueError(f"Prompt must contain exactly 1 <image> token. Found {num_image_tokens}")
42
-
43
- inputs = smolvlm256m_processor(images=[image], text=[prompt], return_tensors="pt").to("cpu")
44
- output_ids = smolvlm256m_model.generate(**inputs, max_new_tokens=100)
45
- return smolvlm256m_processor.decode(output_ids[0], skip_special_tokens=True)
46
-
47
- # --- FUNCTION: Extract images from saved PDF ---
48
- def extract_images_from_pdf(pdf_path, output_json_path):
49
- ''' Extract images from PDF and generate structured sprite JSON '''
50
-
51
- pdf_filename = os.path.splitext(os.path.basename(pdf_path))[0] # e.g., "scratch_crab"
52
- pdf_dir_path = os.path.dirname(pdf_path).replace("/", "\\")
53
-
54
- # Create subfolders
55
- extracted_image_subdir = os.path.join(DETECTED_IMAGE_FOLDER_PATH, pdf_filename)
56
- json_subdir = os.path.join(JSON_FOLDER_PATH, pdf_filename)
57
- os.makedirs(extracted_image_subdir, exist_ok=True)
58
- os.makedirs(json_subdir, exist_ok=True)
59
-
60
- # Output paths
61
- output_json_path = os.path.join(json_subdir, "extracted.json")
62
- final_json_path = os.path.join(json_subdir, "extracted_sprites.json")
63
-
64
- elements = partition_pdf(
65
- filename=pdf_path,
66
- strategy="hi_res",
67
- extract_image_block_types=["Image"],
68
- extract_image_block_to_payload=True, # Set to True to get base64 in output
69
- )
70
-
71
- with open(output_json_path, "w") as f:
72
- json.dump([element.to_dict() for element in elements], f, indent=4)
73
-
74
- # Display extracted images
75
- with open(output_json_path, 'r') as file:
76
- file_elements = json.load(file)
77
-
78
- # extracted_images_dir = os.path.join(os.path.dirname(output_json_path), "extracted_images")
79
- # os.makedirs(extracted_images_dir, exist_ok=True)
80
-
81
- # Prepare manipulated sprite JSON structure
82
- manipulated_json = {}
83
-
84
- # Final manipulated file (for captions)
85
- final_json_path = output_json_path.replace(".json", "_sprites.json")
86
-
87
- # If JSON already exists, load it and find the next available Sprite number
88
- if os.path.exists(final_json_path):
89
- with open(final_json_path, "r") as existing_file:
90
- manipulated = json.load(existing_file)
91
- # Determine the next available index (e.g., Sprite 4 if 1–3 already exist)
92
- existing_keys = [int(k.replace("Sprite ", "")) for k in manipulated.keys()]
93
- start_count = max(existing_keys, default=0) + 1
94
- else:
95
- start_count = 1
96
-
97
- sprite_count = start_count
98
- for i,element in enumerate(file_elements):
99
- if "image_base64" in element["metadata"]:
100
- image_data = base64.b64decode(element["metadata"]["image_base64"])
101
- image = Image.open(io.BytesIO(image_data)).convert("RGB")
102
- image.show(title=f"Extracted Image {i+1}")
103
- image_path = os.path.join(extracted_image_subdir, f"Sprite_{i+1}.png")
104
- image.save(image_path)
105
-
106
- description = get_smolvlm_caption(image, prompt="Give a brief Description")
107
- name = get_smolvlm_caption(image, prompt="give a short name/title of this Image.")
108
-
109
- manipulated_json[f"Sprite {sprite_count}"] = {
110
- "name": name,
111
- "base64": element["metadata"]["image_base64"],
112
- "file-path": pdf_dir_path,
113
- "description":description
114
- }
115
- sprite_count += 1
116
-
117
- # Save manipulated JSON
118
- with open(final_json_path, "w") as sprite_file:
119
- json.dump(manipulated_json, sprite_file, indent=4)
120
-
121
- print(f"✅ Manipulated sprite JSON saved: {final_json_path}")
122
- return final_json_path, manipulated_json
123
-
124
- # API endpoint
125
- @app.route('/process_static_pdf', methods=['POST'])
126
- def process_static_pdf():
127
- # Option 1: Use hardcoded static PDF
128
- pdf_path = r"E:\Pratham\2025\Harsh Sir\Scratch Vision\images\scratch_crab.pdf"
129
-
130
- # Optional: Allow override via JSON request body
131
- if request.json and "pdf_path" in request.json:
132
- pdf_path = request.json["pdf_path"]
133
-
134
- if not os.path.isfile(pdf_path):
135
- return jsonify({"error": f"File not found: {pdf_path}"}), 400
136
-
137
- # json_path = os.path.join(JSON_FOLDER_PATH, "extracted.json")
138
- json_path = None
139
- output_path, result = extract_images_from_pdf(pdf_path, json_path)
140
-
141
- return jsonify({
142
- "message": "✅ PDF processed successfully",
143
- "output_json": output_path,
144
- "sprites": result
145
- })
146
-
147
- if __name__ == '__main__':
148
- app.run(debug=True)
 
1
+ from flask import Flask, render_template, Response, flash, redirect, url_for, request, jsonify
2
+ import cv2
3
+ import numpy as np
4
+ from unstructured.partition.pdf import partition_pdf
5
+ import json, base64, io, os
6
+ from PIL import Image, ImageEnhance, ImageDraw
7
+ from imutils.perspective import four_point_transform
8
+ from dotenv import load_dotenv
9
+ import pytesseract
10
+ from transformers import AutoProcessor, AutoModelForImageTextToText
11
+ from langchain_community.document_loaders.image_captions import ImageCaptionLoader
12
+
13
+ app = Flask(__name__)
14
+
15
+ pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
16
+ poppler_path=r"C:\poppler-23.11.0\Library\bin"
17
+
18
+ count = 0
19
+ PDF_GET = r"E:\Pratham\2025\Harsh Sir\Scratch Vision\images\scratch_crab.pdf"
20
+
21
+ OUTPUT_FOLDER = "OUTPUTS"
22
+ DETECTED_IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER,"DETECTED_IMAGE")
23
+ IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_IMAGE")
24
+ JSON_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "EXTRACTED_JSON")
25
+
26
+ for path in [OUTPUT_FOLDER, IMAGE_FOLDER_PATH, DETECTED_IMAGE_FOLDER_PATH, JSON_FOLDER_PATH]:
27
+ os.makedirs(path, exist_ok=True)
28
+
29
+ # Model Initialization
30
+ smolvlm256m_processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-256M-Instruct")
31
+ smolvlm256m_model = AutoModelForImageTextToText.from_pretrained("HuggingFaceTB/SmolVLM-256M-Instruct").to("cpu")
32
+
33
+ # SmolVLM Image Captioning functioning
34
+ def get_smolvlm_caption(image: Image.Image, prompt: str = "") -> str:
35
+ # Ensure exactly one <image> token
36
+ if "<image>" not in prompt:
37
+ prompt = f"<image> {prompt.strip()}"
38
+
39
+ num_image_tokens = prompt.count("<image>")
40
+ if num_image_tokens != 1:
41
+ raise ValueError(f"Prompt must contain exactly 1 <image> token. Found {num_image_tokens}")
42
+
43
+ inputs = smolvlm256m_processor(images=[image], text=[prompt], return_tensors="pt").to("cpu")
44
+ output_ids = smolvlm256m_model.generate(**inputs, max_new_tokens=100)
45
+ return smolvlm256m_processor.decode(output_ids[0], skip_special_tokens=True)
46
+
47
+ # --- FUNCTION: Extract images from saved PDF ---
48
+ def extract_images_from_pdf(pdf_path, output_json_path):
49
+ ''' Extract images from PDF and generate structured sprite JSON '''
50
+
51
+ pdf_filename = os.path.splitext(os.path.basename(pdf_path))[0] # e.g., "scratch_crab"
52
+ pdf_dir_path = os.path.dirname(pdf_path).replace("/", "\\")
53
+
54
+ # Create subfolders
55
+ extracted_image_subdir = os.path.join(DETECTED_IMAGE_FOLDER_PATH, pdf_filename)
56
+ json_subdir = os.path.join(JSON_FOLDER_PATH, pdf_filename)
57
+ os.makedirs(extracted_image_subdir, exist_ok=True)
58
+ os.makedirs(json_subdir, exist_ok=True)
59
+
60
+ # Output paths
61
+ output_json_path = os.path.join(json_subdir, "extracted.json")
62
+ final_json_path = os.path.join(json_subdir, "extracted_sprites.json")
63
+
64
+ elements = partition_pdf(
65
+ filename=pdf_path,
66
+ strategy="hi_res",
67
+ extract_image_block_types=["Image"],
68
+ extract_image_block_to_payload=True, # Set to True to get base64 in output
69
+ )
70
+
71
+ with open(output_json_path, "w") as f:
72
+ json.dump([element.to_dict() for element in elements], f, indent=4)
73
+
74
+ # Display extracted images
75
+ with open(output_json_path, 'r') as file:
76
+ file_elements = json.load(file)
77
+
78
+ # extracted_images_dir = os.path.join(os.path.dirname(output_json_path), "extracted_images")
79
+ # os.makedirs(extracted_images_dir, exist_ok=True)
80
+
81
+ # Prepare manipulated sprite JSON structure
82
+ manipulated_json = {}
83
+
84
+ # Final manipulated file (for captions)
85
+ final_json_path = output_json_path.replace(".json", "_sprites.json")
86
+
87
+ # If JSON already exists, load it and find the next available Sprite number
88
+ if os.path.exists(final_json_path):
89
+ with open(final_json_path, "r") as existing_file:
90
+ manipulated = json.load(existing_file)
91
+ # Determine the next available index (e.g., Sprite 4 if 1–3 already exist)
92
+ existing_keys = [int(k.replace("Sprite ", "")) for k in manipulated.keys()]
93
+ start_count = max(existing_keys, default=0) + 1
94
+ else:
95
+ start_count = 1
96
+
97
+ sprite_count = start_count
98
+ for i,element in enumerate(file_elements):
99
+ if "image_base64" in element["metadata"]:
100
+ image_data = base64.b64decode(element["metadata"]["image_base64"])
101
+ image = Image.open(io.BytesIO(image_data)).convert("RGB")
102
+ image.show(title=f"Extracted Image {i+1}")
103
+ image_path = os.path.join(extracted_image_subdir, f"Sprite_{i+1}.png")
104
+ image.save(image_path)
105
+
106
+ description = get_smolvlm_caption(image, prompt="Give a brief Description")
107
+ name = get_smolvlm_caption(image, prompt="give a short name/title of this Image.")
108
+
109
+ manipulated_json[f"Sprite {sprite_count}"] = {
110
+ "name": name,
111
+ "base64": element["metadata"]["image_base64"],
112
+ "file-path": pdf_dir_path,
113
+ "description":description
114
+ }
115
+ sprite_count += 1
116
+
117
+ # Save manipulated JSON
118
+ with open(final_json_path, "w") as sprite_file:
119
+ json.dump(manipulated_json, sprite_file, indent=4)
120
+
121
+ print(f"✅ Manipulated sprite JSON saved: {final_json_path}")
122
+ return final_json_path, manipulated_json
123
+
124
+ # API endpoint
125
+ @app.route('/process_static_pdf', methods=['POST'])
126
+ def process_static_pdf():
127
+ # Option 1: Use hardcoded static PDF
128
+ pdf_path = r"E:\Pratham\2025\Harsh Sir\Scratch Vision\images\scratch_crab.pdf"
129
+
130
+ # Optional: Allow override via JSON request body
131
+ if request.json and "pdf_path" in request.json:
132
+ pdf_path = request.json["pdf_path"]
133
+
134
+ if not os.path.isfile(pdf_path):
135
+ return jsonify({"error": f"File not found: {pdf_path}"}), 400
136
+
137
+ # json_path = os.path.join(JSON_FOLDER_PATH, "extracted.json")
138
+ json_path = None
139
+ output_path, result = extract_images_from_pdf(pdf_path, json_path)
140
+
141
+ return jsonify({
142
+ "message": "✅ PDF processed successfully",
143
+ "output_json": output_path,
144
+ "sprites": result
145
+ })
146
+
147
+ if __name__ == '__main__':
148
+ app.run(host='0.0.0.0', port=7860, debug=True)