prthm11 commited on
Commit
1eb2852
·
verified ·
1 Parent(s): 3585865

Upload live_streaming_flask.py

Browse files
Files changed (1) hide show
  1. live_streaming_flask.py +259 -151
live_streaming_flask.py CHANGED
@@ -1,151 +1,259 @@
1
- from flask import Flask, render_template, Response, flash, redirect, url_for
2
- import cv2
3
- from unstructured.partition.pdf import partition_pdf
4
- import json, base64, io, os
5
- from PIL import Image
6
- from imutils.perspective import four_point_transform
7
- from dotenv import load_dotenv
8
- import pytesseract
9
-
10
- load_dotenv()
11
-
12
- app = Flask(__name__)
13
- app.secret_key = os.getenv("SECRET_KEY")
14
- pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
15
- poppler_path=r"C:\poppler-23.11.0\Library\bin"
16
-
17
- count = 0
18
- OUTPUT_FOLDER = "OUTPUTS"
19
- IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_IMAGE")
20
- DETECTED_IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER,"DETECTED_IMAGE")
21
- PDF_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_PDF")
22
- JSON_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "EXTRACTED_JSON")
23
-
24
- for path in [OUTPUT_FOLDER, IMAGE_FOLDER_PATH, DETECTED_IMAGE_FOLDER_PATH, PDF_FOLDER_PATH, JSON_FOLDER_PATH]:
25
- os.makedirs(path, exist_ok=True)
26
-
27
- camera = cv2.VideoCapture('rtsp://freja.hiof.no:1935/rtplive/_definst_/hessdalen03.stream') # use 0 for web camera
28
- # for cctv camera use rtsp://username:password@ip_address:554/user=username_password='password'_channel=channel_number_stream=0.sdp' instead of camera
29
- # for local webcam use
30
- # camera= cv2.VideoCapture(0)
31
-
32
- # Increase resolution if supported by the webcam
33
- camera.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
34
- camera.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
35
- camera.set(cv2.CAP_PROP_FPS, 30)
36
-
37
- # --- FUNCTION: Detect document contour ---
38
- def detect_document_contour(image):
39
- gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
40
- blur = cv2.GaussianBlur(gray, (5, 5), 0)
41
- _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
42
-
43
- contours, _ = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
44
- contours = sorted(contours, key=cv2.contourArea, reverse=True)
45
-
46
- for contour in contours:
47
- area = cv2.contourArea(contour)
48
- if area > 1000:
49
- peri = cv2.arcLength(contour, True)
50
- approx = cv2.approxPolyDP(contour, 0.02 * peri, True)
51
- if len(approx) == 4:
52
- return approx
53
- return None
54
-
55
- # --- FUNCTION: Extract images from saved PDF ---
56
- def extract_images_from_pdf(pdf_path, output_json_path):
57
- elements = partition_pdf(
58
- filename=pdf_path,
59
- strategy="hi_res",
60
- extract_image_block_types=["Image"], # or ["Image", "Table"]
61
- extract_image_block_to_payload=True, # Set to True to get base64 in output
62
- )
63
- with open(output_json_path, "w") as f:
64
- json.dump([element.to_dict() for element in elements], f, indent=4)
65
-
66
- # Display extracted images
67
- with open(output_json_path, 'r') as file:
68
- file_elements = json.load(file)
69
-
70
- extracted_images_dir = os.path.join(os.path.dirname(output_json_path), "extracted_images")
71
- os.makedirs(extracted_images_dir, exist_ok=True)
72
-
73
- for i, element in enumerate(file_elements):
74
- if "image_base64" in element["metadata"]:
75
- image_data = base64.b64decode(element["metadata"]["image_base64"])
76
- image = Image.open(io.BytesIO(image_data))
77
- image.show(title=f"Extracted Image {i+1}")
78
- # image.save(DETECTED_IMAGE_FOLDER_PATH, f"Extracted Image {i+1}.png")
79
-
80
- display = None
81
- scale = 0.5
82
- contour = None
83
-
84
- def gen_frames(): # generate frame by frame from camera
85
- global display
86
-
87
- while True:
88
- # Capture frame-by-frame
89
- success, frame = camera.read() # read the camera frame
90
- if not success:
91
- break
92
- else:
93
- display = frame.copy()
94
- contour = detect_document_contour(display)
95
-
96
- if contour is not None:
97
- cv2.drawContours(display, [contour], -1, (0, 255, 0), 3)
98
-
99
- resized = cv2.resize(display, (int(scale * display.shape[1]), int(scale * display.shape[0])))
100
- cv2.imshow("📷 Scan Document - Press 's' to Save, ESC to Exit", resized)
101
-
102
- ret, buffer = cv2.imencode('.jpg', resized)
103
-
104
- frame = buffer.tobytes()
105
- yield (b'--frame\r\n'
106
- b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n') # concat frame one by one and show result
107
-
108
- # --- Route: Scan Document ---
109
- @app.route("/capture", methods=['POST'])
110
- def capture_document():
111
- global count, display
112
-
113
- if display is None:
114
- flash(" No frame captured!", "error")
115
- return redirect(url_for("index"))
116
-
117
- frame = display.copy()
118
- contour = detect_document_contour(frame)
119
-
120
- if contour is None:
121
- flash(" No document contour found!", "error")
122
- return redirect(url_for("index"))
123
-
124
- warped = four_point_transform(frame, contour.reshape(4, 2))
125
- image_path = os.path.join(IMAGE_FOLDER_PATH, f"scanned_colored_{count}.jpg")
126
- pdf_path = os.path.join(PDF_FOLDER_PATH, f"scanned_colored_{count}.pdf")
127
- json_path = os.path.join(JSON_FOLDER_PATH, f"scanned_{count}.json")
128
- # json_path = os.path.join(DETECTED_IMAGE_FOLDER_PATH, f"scanned_{count}.json")
129
-
130
- cv2.imwrite(image_path, warped)
131
- img = Image.open(image_path).convert("RGB")
132
- img.save(pdf_path)
133
-
134
- extract_images_from_pdf(pdf_path, json_path)
135
-
136
- flash("✅ Document scanned and saved!", "success")
137
- count += 1
138
- return redirect(url_for("index"))
139
-
140
- @app.route('/video_feed')
141
- def video_feed():
142
- #Video streaming route. Put this in the src attribute of an img tag
143
- return Response(gen_frames(), mimetype='multipart/x-mixed-replace; boundary=frame')
144
-
145
- @app.route('/')
146
- def index():
147
- """Video streaming home page."""
148
- return render_template('live_streaming_index.html')
149
-
150
- if __name__ == '__main__':
151
- app.run(host="0.0.0.0", port=7860, debug=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, Response, flash, redirect, url_for
2
+ import cv2
3
+ import numpy as np
4
+ from unstructured.partition.pdf import partition_pdf
5
+ import json, base64, io, os
6
+ from PIL import Image, ImageEnhance, ImageDraw
7
+ from imutils.perspective import four_point_transform
8
+ from dotenv import load_dotenv
9
+ import pytesseract
10
+ from transformers import BlipProcessor, BlipForConditionalGeneration
11
+
12
+ load_dotenv()
13
+
14
+ app = Flask(__name__)
15
+ app.secret_key = os.getenv("SECRET_KEY")
16
+ pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
17
+ poppler_path=r"C:\poppler-23.11.0\Library\bin"
18
+
19
+ count = 0
20
+ OUTPUT_FOLDER = "OUTPUTS"
21
+ IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_IMAGE")
22
+ DETECTED_IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER,"DETECTED_IMAGE")
23
+ PDF_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_PDF")
24
+ JSON_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "EXTRACTED_JSON")
25
+
26
+ for path in [OUTPUT_FOLDER, IMAGE_FOLDER_PATH, DETECTED_IMAGE_FOLDER_PATH, PDF_FOLDER_PATH, JSON_FOLDER_PATH]:
27
+ os.makedirs(path, exist_ok=True)
28
+
29
+ # camera = cv2.VideoCapture('rtsp://freja.hiof.no:1935/rtplive/_definst_/hessdalen03.stream') # use 0 for web camera
30
+ # for cctv camera use rtsp://username:password@ip_address:554/user=username_password='password'_channel=channel_number_stream=0.sdp' instead of camera
31
+ # for local webcam use
32
+ camera= cv2.VideoCapture(0)
33
+
34
+ # Increase resolution if supported by the webcam
35
+ camera.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
36
+ camera.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
37
+ camera.set(cv2.CAP_PROP_FPS, 30)
38
+
39
+ camera.set(cv2.CAP_PROP_AUTOFOCUS, 1) # Enable autofocus
40
+
41
+ # --- FUNCTION: Detect document contour ---
42
+ def detect_document_contour(image):
43
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
44
+ blur = cv2.GaussianBlur(gray, (5, 5), 0)
45
+ _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
46
+
47
+ contours, _ = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
48
+ contours = sorted(contours, key=cv2.contourArea, reverse=True)
49
+
50
+ for contour in contours:
51
+ area = cv2.contourArea(contour)
52
+ if area > 1000:
53
+ peri = cv2.arcLength(contour, True)
54
+ approx = cv2.approxPolyDP(contour, 0.02 * peri, True)
55
+ if len(approx) == 4:
56
+ return approx
57
+ return None
58
+
59
+ def load_image(image_path):
60
+ ext = os.path.splitext(image_path)[1].lower()
61
+ if ext in ['.png', '.jpg', '.jpeg', '.webp', '.tiff']:
62
+ image = cv2.imread(image_path)
63
+ cv2.imshow("Original Image",image)
64
+ print(f"Image : {image}")
65
+ if image is None:
66
+ raise ValueError(f"Failed to load image from {image_path}. The file may be corrupted or unreadable.")
67
+ return image
68
+ else:
69
+ raise ValueError(f"Unsupported image format: {ext}")
70
+
71
+ # Function for upscaling image using OpenCV's INTER_CUBIC
72
+ def upscale_image(image, scale=2):
73
+ height, width = image.shape[:2]
74
+ upscaled_image = cv2.resize(image, (width * scale, height * scale), interpolation=cv2.INTER_CUBIC)
75
+ print(f"UPSCALE IMAGE : {upscaled_image}")
76
+ return upscaled_image
77
+
78
+ # Function to denoise the image (reduce noise)
79
+ def reduce_noise(image):
80
+ return cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
81
+
82
+ # Function to sharpen the image
83
+ def sharpen_image(image):
84
+ kernel = np.array([[0, -1, 0],
85
+ [-1, 5, -1],
86
+ [0, -1, 0]])
87
+ sharpened_image = cv2.filter2D(image, -1, kernel)
88
+ return sharpened_image
89
+
90
+ # Function to increase contrast and enhance details without changing color
91
+ def enhance_image(image):
92
+ pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
93
+ enhancer = ImageEnhance.Contrast(pil_img)
94
+ enhanced_image = enhancer.enhance(1.5)
95
+ enhanced_image_bgr = cv2.cvtColor(np.array(enhanced_image), cv2.COLOR_RGB2BGR)
96
+ return enhanced_image_bgr
97
+
98
+ # Complete function to process image
99
+ def process_image(image_path, scale=2):
100
+ # Load the image
101
+ image = load_image(image_path)
102
+
103
+ # Upscale the image
104
+ upscaled_image = upscale_image(image, scale)
105
+
106
+ # Reduce noise
107
+ denoised_image = reduce_noise(upscaled_image)
108
+
109
+ # Sharpen the image
110
+ sharpened_image = sharpen_image(denoised_image)
111
+
112
+ # Enhance the image contrast and details without changing color
113
+ final_image = enhance_image(sharpened_image)
114
+ print(f"FINAL IMAGE : {final_image}")
115
+ cv2.imshow("Final Image",final_image)
116
+ return final_image
117
+
118
+ # BLIP : Bootstrapped Language-Image Pretraining
119
+ """ BlipProcessor: converts Image into tensor format"""
120
+ blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
121
+ # print(f"BLIP Processor: {blip_processor}")
122
+ """ BlipForConditionalGeneration: Generates the Image Caption(text)"""
123
+ blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to("cpu")
124
+ print(f"BLIP Model: {blip_model}")
125
+
126
+ def get_blip_description(image: Image.Image) -> str:
127
+ inputs = blip_processor(image, return_tensors="pt").to("cpu")
128
+ output = blip_model.generate(**inputs, max_new_tokens=100)
129
+ caption = blip_processor.decode(output[0], skip_special_tokens=True)
130
+ return caption
131
+
132
+ # --- FUNCTION: Extract images from saved PDF ---
133
+ def extract_images_from_pdf(pdf_path, output_json_path):
134
+ elements = partition_pdf(
135
+ filename=pdf_path,
136
+ strategy="hi_res",
137
+ extract_image_block_types=["Image"], # or ["Image", "Table"]
138
+ extract_image_block_to_payload=True, # Set to True to get base64 in output
139
+ )
140
+ with open(output_json_path, "w") as f:
141
+ json.dump([element.to_dict() for element in elements], f, indent=4)
142
+
143
+ # Display extracted images
144
+ with open(output_json_path, 'r') as file:
145
+ file_elements = json.load(file)
146
+
147
+ extracted_images_dir = os.path.join(os.path.dirname(output_json_path), "extracted_images")
148
+ os.makedirs(extracted_images_dir, exist_ok=True)
149
+
150
+ # Prepare manipulated sprite JSON structure
151
+ manipulated_json = {}
152
+ pdf_filename = os.path.basename(pdf_path)
153
+ pdf_dir_path = os.path.dirname(pdf_path).replace("/", "\\") # windows-style
154
+
155
+ sprite_count = 1
156
+
157
+ for i, element in enumerate(file_elements):
158
+ if "image_base64" in element["metadata"]:
159
+ image_data = base64.b64decode(element["metadata"]["image_base64"])
160
+ # image = Image.open(io.BytesIO(image_data))
161
+ image = Image.open(io.BytesIO(image_data)).convert("RGB")
162
+ image.show(title=f"Extracted Image {i+1}")
163
+ # image.save(DETECTED_IMAGE_FOLDER_PATH, f"Extracted Image {i+1}.png")
164
+
165
+ description = get_blip_description(image)
166
+
167
+ manipulated_json[f"Sprite {sprite_count}"] = {
168
+ "name": pdf_filename,
169
+ "base64": element["metadata"]["image_base64"],
170
+ "file-path": pdf_dir_path,
171
+ "description":description
172
+ }
173
+ sprite_count += 1
174
+
175
+ # Save manipulated JSON
176
+ manipulated_json_path = output_json_path.replace(".json", "_sprites.json")
177
+ with open(manipulated_json_path, "w") as sprite_file:
178
+ json.dump(manipulated_json, sprite_file, indent=4)
179
+
180
+ print(f"✅ Manipulated sprite JSON saved: {manipulated_json_path}")
181
+
182
+ display = None
183
+ scale = 0.5
184
+ contour = None
185
+
186
+ def gen_frames(): # generate frame by frame from camera
187
+ global display
188
+
189
+ while True:
190
+ # Capture frame-by-frame
191
+ success, frame = camera.read() # read the camera frame
192
+ if not success:
193
+ break
194
+ else:
195
+ display = frame.copy()
196
+ contour = detect_document_contour(display)
197
+
198
+ if contour is not None:
199
+ cv2.drawContours(display, [contour], -1, (0, 255, 0), 3)
200
+
201
+ resized = cv2.resize(display, (int(scale * display.shape[1]), int(scale * display.shape[0])))
202
+ cv2.imshow("📷 Scan Document - Press 's' to Save, ESC to Exit", resized)
203
+
204
+ ret, buffer = cv2.imencode('.jpg', resized)
205
+
206
+ frame = buffer.tobytes()
207
+ yield (b'--frame\r\n'
208
+ b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n') # concat frame one by one and show result
209
+
210
+ # --- Route: Scan Document ---
211
+ @app.route("/capture", methods=['POST'])
212
+ def capture_document():
213
+ global count, display
214
+
215
+ if display is None:
216
+ flash("❌ No frame captured!", "error")
217
+ return redirect(url_for("index"))
218
+
219
+ frame = display.copy()
220
+ contour = detect_document_contour(frame)
221
+
222
+ if contour is None:
223
+ flash("❌ No document contour found!", "error")
224
+ return redirect(url_for("index"))
225
+
226
+ warped = four_point_transform(frame, contour.reshape(4, 2))
227
+ image_path = os.path.join(IMAGE_FOLDER_PATH, f"scanned_colored_{count}.jpg")
228
+
229
+ pdf_path = os.path.join(PDF_FOLDER_PATH, f"scanned_colored_{count}.pdf")
230
+ json_path = os.path.join(JSON_FOLDER_PATH, f"scanned_{count}.json")
231
+ # json_path = os.path.join(DETECTED_IMAGE_FOLDER_PATH, f"scanned_{count}.json")
232
+
233
+ cv2.imwrite(image_path, warped)
234
+ # img = process_image(image_path)
235
+ # # img = Image.open(image_path).convert("RGB")
236
+ # img.save(pdf_path)
237
+
238
+ img = process_image(image_path)
239
+ pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
240
+ pil_img.save(pdf_path)
241
+
242
+ extract_images_from_pdf(pdf_path, json_path)
243
+
244
+ flash("✅ Document scanned and saved!", "success")
245
+ count += 1
246
+ return redirect(url_for("index"))
247
+
248
+ @app.route('/video_feed')
249
+ def video_feed():
250
+ #Video streaming route. Put this in the src attribute of an img tag
251
+ return Response(gen_frames(), mimetype='multipart/x-mixed-replace; boundary=frame')
252
+
253
+ @app.route('/')
254
+ def index():
255
+ """Video streaming home page."""
256
+ return render_template('live_streaming_index.html')
257
+
258
+ if __name__ == '__main__':
259
+ app.run(debug=True)