prthm11 commited on
Commit
baeac7c
·
verified ·
1 Parent(s): fb09742

Delete live_streaming_flask.py

Browse files
Files changed (1) hide show
  1. live_streaming_flask.py +0 -263
live_streaming_flask.py DELETED
@@ -1,263 +0,0 @@
1
- from flask import Flask, render_template, Response, flash, redirect, url_for
2
- import cv2
3
- import numpy as np
4
- from unstructured.partition.pdf import partition_pdf
5
- import json, base64, io, os
6
- from PIL import Image, ImageEnhance, ImageDraw
7
- from imutils.perspective import four_point_transform
8
- from dotenv import load_dotenv
9
- import pytesseract
10
- from transformers import BlipProcessor, BlipForConditionalGeneration
11
-
12
- load_dotenv()
13
-
14
- app = Flask(__name__)
15
- app.secret_key = os.getenv("SECRET_KEY")
16
- pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
17
- poppler_path=r"C:\poppler-23.11.0\Library\bin"
18
-
19
- count = 0
20
- OUTPUT_FOLDER = "OUTPUTS"
21
- IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_IMAGE")
22
- DETECTED_IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER,"DETECTED_IMAGE")
23
- PDF_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_PDF")
24
- JSON_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "EXTRACTED_JSON")
25
-
26
- for path in [OUTPUT_FOLDER, IMAGE_FOLDER_PATH, DETECTED_IMAGE_FOLDER_PATH, PDF_FOLDER_PATH, JSON_FOLDER_PATH]:
27
- os.makedirs(path, exist_ok=True)
28
-
29
- # camera = cv2.VideoCapture('rtsp://freja.hiof.no:1935/rtplive/_definst_/hessdalen03.stream') # use 0 for web camera
30
- # for cctv camera use rtsp://username:password@ip_address:554/user=username_password='password'_channel=channel_number_stream=0.sdp' instead of camera
31
- # for local webcam use
32
- camera= cv2.VideoCapture(0)
33
- # camera = cv2.VideoCapture("http://wmccpinetop.axiscam.net/mjpg/video.mjpg")
34
- # ret, frame = camera.read()
35
- # if not ret:
36
- # raise RuntimeError("❌ Failed to connect to RTSP stream. Check URL or connectivity.")
37
-
38
- # Increase resolution if supported by the webcam
39
- # camera.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
40
- # camera.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
41
- # camera.set(cv2.CAP_PROP_FPS, 30)
42
-
43
- # camera.set(cv2.CAP_PROP_AUTOFOCUS, 1) # Enable autofocus
44
-
45
- # --- FUNCTION: Detect document contour ---
46
- def detect_document_contour(image):
47
- gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
48
- blur = cv2.GaussianBlur(gray, (5, 5), 0)
49
- _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
50
-
51
- contours, _ = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
52
- contours = sorted(contours, key=cv2.contourArea, reverse=True)
53
-
54
- for contour in contours:
55
- area = cv2.contourArea(contour)
56
- if area > 1000:
57
- peri = cv2.arcLength(contour, True)
58
- approx = cv2.approxPolyDP(contour, 0.02 * peri, True)
59
- if len(approx) == 4:
60
- return approx
61
- return None
62
-
63
- def load_image(image_path):
64
- ext = os.path.splitext(image_path)[1].lower()
65
- if ext in ['.png', '.jpg', '.jpeg', '.webp', '.tiff']:
66
- image = cv2.imread(image_path)
67
- cv2.imshow("Original Image",image)
68
- print(f"Image : {image}")
69
- if image is None:
70
- raise ValueError(f"Failed to load image from {image_path}. The file may be corrupted or unreadable.")
71
- return image
72
- else:
73
- raise ValueError(f"Unsupported image format: {ext}")
74
-
75
- # Function for upscaling image using OpenCV's INTER_CUBIC
76
- def upscale_image(image, scale=2):
77
- height, width = image.shape[:2]
78
- upscaled_image = cv2.resize(image, (width * scale, height * scale), interpolation=cv2.INTER_CUBIC)
79
- print(f"UPSCALE IMAGE : {upscaled_image}")
80
- return upscaled_image
81
-
82
- # Function to denoise the image (reduce noise)
83
- def reduce_noise(image):
84
- return cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
85
-
86
- # Function to sharpen the image
87
- def sharpen_image(image):
88
- kernel = np.array([[0, -1, 0],
89
- [-1, 5, -1],
90
- [0, -1, 0]])
91
- sharpened_image = cv2.filter2D(image, -1, kernel)
92
- return sharpened_image
93
-
94
- # Function to increase contrast and enhance details without changing color
95
- def enhance_image(image):
96
- pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
97
- enhancer = ImageEnhance.Contrast(pil_img)
98
- enhanced_image = enhancer.enhance(1.5)
99
- enhanced_image_bgr = cv2.cvtColor(np.array(enhanced_image), cv2.COLOR_RGB2BGR)
100
- return enhanced_image_bgr
101
-
102
- # Complete function to process image
103
- def process_image(image_path, scale=2):
104
- # Load the image
105
- image = load_image(image_path)
106
-
107
- # Upscale the image
108
- upscaled_image = upscale_image(image, scale)
109
-
110
- # Reduce noise
111
- denoised_image = reduce_noise(upscaled_image)
112
-
113
- # Sharpen the image
114
- sharpened_image = sharpen_image(denoised_image)
115
-
116
- # Enhance the image contrast and details without changing color
117
- final_image = enhance_image(sharpened_image)
118
- print(f"FINAL IMAGE : {final_image}")
119
- cv2.imshow("Final Image",final_image)
120
- return final_image
121
-
122
- # BLIP : Bootstrapped Language-Image Pretraining
123
- """ BlipProcessor: converts Image into tensor format"""
124
- blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
125
- # print(f"BLIP Processor: {blip_processor}")
126
- """ BlipForConditionalGeneration: Generates the Image Caption(text)"""
127
- blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to("cpu")
128
- print(f"BLIP Model: {blip_model}")
129
-
130
- def get_blip_description(image: Image.Image) -> str:
131
- inputs = blip_processor(image, return_tensors="pt").to("cpu")
132
- output = blip_model.generate(**inputs, max_new_tokens=100)
133
- caption = blip_processor.decode(output[0], skip_special_tokens=True)
134
- return caption
135
-
136
- # --- FUNCTION: Extract images from saved PDF ---
137
- def extract_images_from_pdf(pdf_path, output_json_path):
138
- elements = partition_pdf(
139
- filename=pdf_path,
140
- strategy="hi_res",
141
- extract_image_block_types=["Image"], # or ["Image", "Table"]
142
- extract_image_block_to_payload=True, # Set to True to get base64 in output
143
- )
144
- with open(output_json_path, "w") as f:
145
- json.dump([element.to_dict() for element in elements], f, indent=4)
146
-
147
- # Display extracted images
148
- with open(output_json_path, 'r') as file:
149
- file_elements = json.load(file)
150
-
151
- extracted_images_dir = os.path.join(os.path.dirname(output_json_path), "extracted_images")
152
- os.makedirs(extracted_images_dir, exist_ok=True)
153
-
154
- # Prepare manipulated sprite JSON structure
155
- manipulated_json = {}
156
- pdf_filename = os.path.basename(pdf_path)
157
- pdf_dir_path = os.path.dirname(pdf_path).replace("/", "\\") # windows-style
158
-
159
- sprite_count = 1
160
-
161
- for i, element in enumerate(file_elements):
162
- if "image_base64" in element["metadata"]:
163
- image_data = base64.b64decode(element["metadata"]["image_base64"])
164
- # image = Image.open(io.BytesIO(image_data))
165
- image = Image.open(io.BytesIO(image_data)).convert("RGB")
166
- image.show(title=f"Extracted Image {i+1}")
167
- # image.save(DETECTED_IMAGE_FOLDER_PATH, f"Extracted Image {i+1}.png")
168
-
169
- description = get_blip_description(image)
170
-
171
- manipulated_json[f"Sprite {sprite_count}"] = {
172
- "name": pdf_filename,
173
- "base64": element["metadata"]["image_base64"],
174
- "file-path": pdf_dir_path,
175
- "description":description
176
- }
177
- sprite_count += 1
178
-
179
- # Save manipulated JSON
180
- manipulated_json_path = output_json_path.replace(".json", "_sprites.json")
181
- with open(manipulated_json_path, "w") as sprite_file:
182
- json.dump(manipulated_json, sprite_file, indent=4)
183
-
184
- print(f"✅ Manipulated sprite JSON saved: {manipulated_json_path}")
185
-
186
- display = None
187
- scale = 0.5
188
- contour = None
189
-
190
- def gen_frames(): # generate frame by frame from camera
191
- global display
192
-
193
- while True:
194
- # Capture frame-by-frame
195
- success, frame = camera.read() # read the camera frame
196
- if not success:
197
- break
198
- else:
199
- display = frame.copy()
200
- contour = detect_document_contour(display)
201
-
202
- if contour is not None:
203
- cv2.drawContours(display, [contour], -1, (0, 255, 0), 3)
204
-
205
- resized = cv2.resize(display, (int(scale * display.shape[1]), int(scale * display.shape[0])))
206
- cv2.imshow("📷 Scan Document - Press 's' to Save, ESC to Exit", resized)
207
-
208
- ret, buffer = cv2.imencode('.jpg', resized)
209
-
210
- frame = buffer.tobytes()
211
- yield (b'--frame\r\n'
212
- b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n') # concat frame one by one and show result
213
-
214
- # --- Route: Scan Document ---
215
- @app.route("/capture", methods=['POST'])
216
- def capture_document():
217
- global count, display
218
-
219
- if display is None:
220
- flash("❌ No frame captured!", "error")
221
- return redirect(url_for("index"))
222
-
223
- frame = display.copy()
224
- contour = detect_document_contour(frame)
225
-
226
- if contour is None:
227
- flash("❌ No document contour found!", "error")
228
- return redirect(url_for("index"))
229
-
230
- warped = four_point_transform(frame, contour.reshape(4, 2))
231
- image_path = os.path.join(IMAGE_FOLDER_PATH, f"scanned_colored_{count}.jpg")
232
-
233
- pdf_path = os.path.join(PDF_FOLDER_PATH, f"scanned_colored_{count}.pdf")
234
- json_path = os.path.join(JSON_FOLDER_PATH, f"scanned_{count}.json")
235
- # json_path = os.path.join(DETECTED_IMAGE_FOLDER_PATH, f"scanned_{count}.json")
236
-
237
- cv2.imwrite(image_path, warped)
238
- # img = process_image(image_path)
239
- # # img = Image.open(image_path).convert("RGB")
240
- # img.save(pdf_path)
241
-
242
- img = process_image(image_path)
243
- pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
244
- pil_img.save(pdf_path)
245
-
246
- extract_images_from_pdf(pdf_path, json_path)
247
-
248
- flash("✅ Document scanned and saved!", "success")
249
- count += 1
250
- return redirect(url_for("index"))
251
-
252
- @app.route('/video_feed')
253
- def video_feed():
254
- #Video streaming route. Put this in the src attribute of an img tag
255
- return Response(gen_frames(), mimetype='multipart/x-mixed-replace; boundary=frame')
256
-
257
- @app.route('/')
258
- def index():
259
- """Video streaming home page."""
260
- return render_template('live_streaming_index.html')
261
-
262
- if __name__ == '__main__':
263
- app.run(host="0.0.0.0", port=7860, debug=False)