Spaces:
Sleeping
Sleeping
Update live_streaming_flask.py
Browse files- live_streaming_flask.py +262 -258
live_streaming_flask.py
CHANGED
@@ -1,259 +1,263 @@
|
|
1 |
-
from flask import Flask, render_template, Response, flash, redirect, url_for
|
2 |
-
import cv2
|
3 |
-
import numpy as np
|
4 |
-
from unstructured.partition.pdf import partition_pdf
|
5 |
-
import json, base64, io, os
|
6 |
-
from PIL import Image, ImageEnhance, ImageDraw
|
7 |
-
from imutils.perspective import four_point_transform
|
8 |
-
from dotenv import load_dotenv
|
9 |
-
import pytesseract
|
10 |
-
from transformers import BlipProcessor, BlipForConditionalGeneration
|
11 |
-
|
12 |
-
load_dotenv()
|
13 |
-
|
14 |
-
app = Flask(__name__)
|
15 |
-
app.secret_key = os.getenv("SECRET_KEY")
|
16 |
-
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
|
17 |
-
poppler_path=r"C:\poppler-23.11.0\Library\bin"
|
18 |
-
|
19 |
-
count = 0
|
20 |
-
OUTPUT_FOLDER = "OUTPUTS"
|
21 |
-
IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_IMAGE")
|
22 |
-
DETECTED_IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER,"DETECTED_IMAGE")
|
23 |
-
PDF_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_PDF")
|
24 |
-
JSON_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "EXTRACTED_JSON")
|
25 |
-
|
26 |
-
for path in [OUTPUT_FOLDER, IMAGE_FOLDER_PATH, DETECTED_IMAGE_FOLDER_PATH, PDF_FOLDER_PATH, JSON_FOLDER_PATH]:
|
27 |
-
os.makedirs(path, exist_ok=True)
|
28 |
-
|
29 |
-
|
30 |
-
# for cctv camera use rtsp://username:password@ip_address:554/user=username_password='password'_channel=channel_number_stream=0.sdp' instead of camera
|
31 |
-
# for local webcam use
|
32 |
-
camera= cv2.VideoCapture(0)
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
camera.set(cv2.
|
40 |
-
|
41 |
-
#
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
return
|
81 |
-
|
82 |
-
# Function to
|
83 |
-
def
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
"
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
with open(output_json_path,
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
#
|
236 |
-
|
237 |
-
|
238 |
-
img = process_image(image_path)
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
|
|
|
|
|
|
|
|
259 |
app.run(debug=True)
|
|
|
1 |
+
from flask import Flask, render_template, Response, flash, redirect, url_for
|
2 |
+
import cv2
|
3 |
+
import numpy as np
|
4 |
+
from unstructured.partition.pdf import partition_pdf
|
5 |
+
import json, base64, io, os
|
6 |
+
from PIL import Image, ImageEnhance, ImageDraw
|
7 |
+
from imutils.perspective import four_point_transform
|
8 |
+
from dotenv import load_dotenv
|
9 |
+
import pytesseract
|
10 |
+
from transformers import BlipProcessor, BlipForConditionalGeneration
|
11 |
+
|
12 |
+
load_dotenv()
|
13 |
+
|
14 |
+
app = Flask(__name__)
|
15 |
+
app.secret_key = os.getenv("SECRET_KEY")
|
16 |
+
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
|
17 |
+
poppler_path=r"C:\poppler-23.11.0\Library\bin"
|
18 |
+
|
19 |
+
count = 0
|
20 |
+
OUTPUT_FOLDER = "OUTPUTS"
|
21 |
+
IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_IMAGE")
|
22 |
+
DETECTED_IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER,"DETECTED_IMAGE")
|
23 |
+
PDF_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_PDF")
|
24 |
+
JSON_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "EXTRACTED_JSON")
|
25 |
+
|
26 |
+
for path in [OUTPUT_FOLDER, IMAGE_FOLDER_PATH, DETECTED_IMAGE_FOLDER_PATH, PDF_FOLDER_PATH, JSON_FOLDER_PATH]:
|
27 |
+
os.makedirs(path, exist_ok=True)
|
28 |
+
|
29 |
+
camera = cv2.VideoCapture('rtsp://freja.hiof.no:1935/rtplive/_definst_/hessdalen03.stream') # use 0 for web camera
|
30 |
+
# for cctv camera use rtsp://username:password@ip_address:554/user=username_password='password'_channel=channel_number_stream=0.sdp' instead of camera
|
31 |
+
# for local webcam use
|
32 |
+
# camera= cv2.VideoCapture(0)
|
33 |
+
|
34 |
+
ret, frame = camera.read()
|
35 |
+
if not ret:
|
36 |
+
raise RuntimeError("❌ Failed to connect to RTSP stream. Check URL or connectivity.")
|
37 |
+
|
38 |
+
# Increase resolution if supported by the webcam
|
39 |
+
# camera.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
|
40 |
+
# camera.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
|
41 |
+
# camera.set(cv2.CAP_PROP_FPS, 30)
|
42 |
+
|
43 |
+
# camera.set(cv2.CAP_PROP_AUTOFOCUS, 1) # Enable autofocus
|
44 |
+
|
45 |
+
# --- FUNCTION: Detect document contour ---
|
46 |
+
def detect_document_contour(image):
|
47 |
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
48 |
+
blur = cv2.GaussianBlur(gray, (5, 5), 0)
|
49 |
+
_, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
50 |
+
|
51 |
+
contours, _ = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
|
52 |
+
contours = sorted(contours, key=cv2.contourArea, reverse=True)
|
53 |
+
|
54 |
+
for contour in contours:
|
55 |
+
area = cv2.contourArea(contour)
|
56 |
+
if area > 1000:
|
57 |
+
peri = cv2.arcLength(contour, True)
|
58 |
+
approx = cv2.approxPolyDP(contour, 0.02 * peri, True)
|
59 |
+
if len(approx) == 4:
|
60 |
+
return approx
|
61 |
+
return None
|
62 |
+
|
63 |
+
def load_image(image_path):
|
64 |
+
ext = os.path.splitext(image_path)[1].lower()
|
65 |
+
if ext in ['.png', '.jpg', '.jpeg', '.webp', '.tiff']:
|
66 |
+
image = cv2.imread(image_path)
|
67 |
+
cv2.imshow("Original Image",image)
|
68 |
+
print(f"Image : {image}")
|
69 |
+
if image is None:
|
70 |
+
raise ValueError(f"Failed to load image from {image_path}. The file may be corrupted or unreadable.")
|
71 |
+
return image
|
72 |
+
else:
|
73 |
+
raise ValueError(f"Unsupported image format: {ext}")
|
74 |
+
|
75 |
+
# Function for upscaling image using OpenCV's INTER_CUBIC
|
76 |
+
def upscale_image(image, scale=2):
|
77 |
+
height, width = image.shape[:2]
|
78 |
+
upscaled_image = cv2.resize(image, (width * scale, height * scale), interpolation=cv2.INTER_CUBIC)
|
79 |
+
print(f"UPSCALE IMAGE : {upscaled_image}")
|
80 |
+
return upscaled_image
|
81 |
+
|
82 |
+
# Function to denoise the image (reduce noise)
|
83 |
+
def reduce_noise(image):
|
84 |
+
return cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
|
85 |
+
|
86 |
+
# Function to sharpen the image
|
87 |
+
def sharpen_image(image):
|
88 |
+
kernel = np.array([[0, -1, 0],
|
89 |
+
[-1, 5, -1],
|
90 |
+
[0, -1, 0]])
|
91 |
+
sharpened_image = cv2.filter2D(image, -1, kernel)
|
92 |
+
return sharpened_image
|
93 |
+
|
94 |
+
# Function to increase contrast and enhance details without changing color
|
95 |
+
def enhance_image(image):
|
96 |
+
pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
97 |
+
enhancer = ImageEnhance.Contrast(pil_img)
|
98 |
+
enhanced_image = enhancer.enhance(1.5)
|
99 |
+
enhanced_image_bgr = cv2.cvtColor(np.array(enhanced_image), cv2.COLOR_RGB2BGR)
|
100 |
+
return enhanced_image_bgr
|
101 |
+
|
102 |
+
# Complete function to process image
|
103 |
+
def process_image(image_path, scale=2):
|
104 |
+
# Load the image
|
105 |
+
image = load_image(image_path)
|
106 |
+
|
107 |
+
# Upscale the image
|
108 |
+
upscaled_image = upscale_image(image, scale)
|
109 |
+
|
110 |
+
# Reduce noise
|
111 |
+
denoised_image = reduce_noise(upscaled_image)
|
112 |
+
|
113 |
+
# Sharpen the image
|
114 |
+
sharpened_image = sharpen_image(denoised_image)
|
115 |
+
|
116 |
+
# Enhance the image contrast and details without changing color
|
117 |
+
final_image = enhance_image(sharpened_image)
|
118 |
+
print(f"FINAL IMAGE : {final_image}")
|
119 |
+
cv2.imshow("Final Image",final_image)
|
120 |
+
return final_image
|
121 |
+
|
122 |
+
# BLIP : Bootstrapped Language-Image Pretraining
|
123 |
+
""" BlipProcessor: converts Image into tensor format"""
|
124 |
+
blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
125 |
+
# print(f"BLIP Processor: {blip_processor}")
|
126 |
+
""" BlipForConditionalGeneration: Generates the Image Caption(text)"""
|
127 |
+
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to("cpu")
|
128 |
+
print(f"BLIP Model: {blip_model}")
|
129 |
+
|
130 |
+
def get_blip_description(image: Image.Image) -> str:
|
131 |
+
inputs = blip_processor(image, return_tensors="pt").to("cpu")
|
132 |
+
output = blip_model.generate(**inputs, max_new_tokens=100)
|
133 |
+
caption = blip_processor.decode(output[0], skip_special_tokens=True)
|
134 |
+
return caption
|
135 |
+
|
136 |
+
# --- FUNCTION: Extract images from saved PDF ---
|
137 |
+
def extract_images_from_pdf(pdf_path, output_json_path):
|
138 |
+
elements = partition_pdf(
|
139 |
+
filename=pdf_path,
|
140 |
+
strategy="hi_res",
|
141 |
+
extract_image_block_types=["Image"], # or ["Image", "Table"]
|
142 |
+
extract_image_block_to_payload=True, # Set to True to get base64 in output
|
143 |
+
)
|
144 |
+
with open(output_json_path, "w") as f:
|
145 |
+
json.dump([element.to_dict() for element in elements], f, indent=4)
|
146 |
+
|
147 |
+
# Display extracted images
|
148 |
+
with open(output_json_path, 'r') as file:
|
149 |
+
file_elements = json.load(file)
|
150 |
+
|
151 |
+
extracted_images_dir = os.path.join(os.path.dirname(output_json_path), "extracted_images")
|
152 |
+
os.makedirs(extracted_images_dir, exist_ok=True)
|
153 |
+
|
154 |
+
# Prepare manipulated sprite JSON structure
|
155 |
+
manipulated_json = {}
|
156 |
+
pdf_filename = os.path.basename(pdf_path)
|
157 |
+
pdf_dir_path = os.path.dirname(pdf_path).replace("/", "\\") # windows-style
|
158 |
+
|
159 |
+
sprite_count = 1
|
160 |
+
|
161 |
+
for i, element in enumerate(file_elements):
|
162 |
+
if "image_base64" in element["metadata"]:
|
163 |
+
image_data = base64.b64decode(element["metadata"]["image_base64"])
|
164 |
+
# image = Image.open(io.BytesIO(image_data))
|
165 |
+
image = Image.open(io.BytesIO(image_data)).convert("RGB")
|
166 |
+
image.show(title=f"Extracted Image {i+1}")
|
167 |
+
# image.save(DETECTED_IMAGE_FOLDER_PATH, f"Extracted Image {i+1}.png")
|
168 |
+
|
169 |
+
description = get_blip_description(image)
|
170 |
+
|
171 |
+
manipulated_json[f"Sprite {sprite_count}"] = {
|
172 |
+
"name": pdf_filename,
|
173 |
+
"base64": element["metadata"]["image_base64"],
|
174 |
+
"file-path": pdf_dir_path,
|
175 |
+
"description":description
|
176 |
+
}
|
177 |
+
sprite_count += 1
|
178 |
+
|
179 |
+
# Save manipulated JSON
|
180 |
+
manipulated_json_path = output_json_path.replace(".json", "_sprites.json")
|
181 |
+
with open(manipulated_json_path, "w") as sprite_file:
|
182 |
+
json.dump(manipulated_json, sprite_file, indent=4)
|
183 |
+
|
184 |
+
print(f"✅ Manipulated sprite JSON saved: {manipulated_json_path}")
|
185 |
+
|
186 |
+
display = None
|
187 |
+
scale = 0.5
|
188 |
+
contour = None
|
189 |
+
|
190 |
+
def gen_frames(): # generate frame by frame from camera
|
191 |
+
global display
|
192 |
+
|
193 |
+
while True:
|
194 |
+
# Capture frame-by-frame
|
195 |
+
success, frame = camera.read() # read the camera frame
|
196 |
+
if not success:
|
197 |
+
break
|
198 |
+
else:
|
199 |
+
display = frame.copy()
|
200 |
+
contour = detect_document_contour(display)
|
201 |
+
|
202 |
+
if contour is not None:
|
203 |
+
cv2.drawContours(display, [contour], -1, (0, 255, 0), 3)
|
204 |
+
|
205 |
+
resized = cv2.resize(display, (int(scale * display.shape[1]), int(scale * display.shape[0])))
|
206 |
+
cv2.imshow("📷 Scan Document - Press 's' to Save, ESC to Exit", resized)
|
207 |
+
|
208 |
+
ret, buffer = cv2.imencode('.jpg', resized)
|
209 |
+
|
210 |
+
frame = buffer.tobytes()
|
211 |
+
yield (b'--frame\r\n'
|
212 |
+
b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n') # concat frame one by one and show result
|
213 |
+
|
214 |
+
# --- Route: Scan Document ---
|
215 |
+
@app.route("/capture", methods=['POST'])
|
216 |
+
def capture_document():
|
217 |
+
global count, display
|
218 |
+
|
219 |
+
if display is None:
|
220 |
+
flash("❌ No frame captured!", "error")
|
221 |
+
return redirect(url_for("index"))
|
222 |
+
|
223 |
+
frame = display.copy()
|
224 |
+
contour = detect_document_contour(frame)
|
225 |
+
|
226 |
+
if contour is None:
|
227 |
+
flash("❌ No document contour found!", "error")
|
228 |
+
return redirect(url_for("index"))
|
229 |
+
|
230 |
+
warped = four_point_transform(frame, contour.reshape(4, 2))
|
231 |
+
image_path = os.path.join(IMAGE_FOLDER_PATH, f"scanned_colored_{count}.jpg")
|
232 |
+
|
233 |
+
pdf_path = os.path.join(PDF_FOLDER_PATH, f"scanned_colored_{count}.pdf")
|
234 |
+
json_path = os.path.join(JSON_FOLDER_PATH, f"scanned_{count}.json")
|
235 |
+
# json_path = os.path.join(DETECTED_IMAGE_FOLDER_PATH, f"scanned_{count}.json")
|
236 |
+
|
237 |
+
cv2.imwrite(image_path, warped)
|
238 |
+
# img = process_image(image_path)
|
239 |
+
# # img = Image.open(image_path).convert("RGB")
|
240 |
+
# img.save(pdf_path)
|
241 |
+
|
242 |
+
img = process_image(image_path)
|
243 |
+
pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
|
244 |
+
pil_img.save(pdf_path)
|
245 |
+
|
246 |
+
extract_images_from_pdf(pdf_path, json_path)
|
247 |
+
|
248 |
+
flash("✅ Document scanned and saved!", "success")
|
249 |
+
count += 1
|
250 |
+
return redirect(url_for("index"))
|
251 |
+
|
252 |
+
@app.route('/video_feed')
|
253 |
+
def video_feed():
|
254 |
+
#Video streaming route. Put this in the src attribute of an img tag
|
255 |
+
return Response(gen_frames(), mimetype='multipart/x-mixed-replace; boundary=frame')
|
256 |
+
|
257 |
+
@app.route('/')
|
258 |
+
def index():
|
259 |
+
"""Video streaming home page."""
|
260 |
+
return render_template('live_streaming_index.html')
|
261 |
+
|
262 |
+
if __name__ == '__main__':
|
263 |
app.run(debug=True)
|