assentian1970 commited on
Commit
a9db5e7
·
verified ·
1 Parent(s): 6518544

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +217 -17
app.py CHANGED
@@ -13,6 +13,9 @@ import numpy as np
13
  import cv2
14
  from modelscope.hub.snapshot_download import snapshot_download
15
 
 
 
 
16
  # Initialize GPU
17
  @spaces.GPU
18
  def initialize_gpu():
@@ -20,14 +23,18 @@ def initialize_gpu():
20
  torch.randn(10).cuda()
21
  initialize_gpu()
22
 
23
- # Load YOLO model with relative path
24
- YOLO_MODEL = YOLO('best_yolov11.pt')
 
 
 
25
 
26
- # Model configuration with quantization
27
  MODEL_NAME = 'iic/mPLUG-Owl3-7B-240728'
28
- model_dir = snapshot_download(MODEL_NAME,
29
- revision='v1.0.0', # Specific revision
30
- cache_dir='./models')
 
31
 
32
  # Device setup
33
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
@@ -47,7 +54,7 @@ def is_video(filename):
47
 
48
  @spaces.GPU
49
  def load_model_and_tokenizer():
50
- """Load 4-bit quantized model"""
51
  try:
52
  torch.cuda.empty_cache()
53
  gc.collect()
@@ -56,9 +63,9 @@ def load_model_and_tokenizer():
56
  model_dir,
57
  attn_implementation='sdpa',
58
  trust_remote_code=True,
59
- load_in_4bit=True,
60
  device_map="auto",
61
- torch_dtype=torch.bfloat16
62
  )
63
 
64
  tokenizer = AutoTokenizer.from_pretrained(
@@ -72,10 +79,201 @@ def load_model_and_tokenizer():
72
  print(f"Model loading error: {str(e)}")
73
  raise
74
 
75
- # ... [Keep the rest of your existing functions unchanged] ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
  def process_diary(day, date, media):
78
- """Simplified processing pipeline"""
79
  try:
80
  if not media:
81
  return [day, date, "No data", "No data", "No data", "No data", None]
@@ -87,11 +285,14 @@ def process_diary(day, date, media):
87
  detected_people, detected_machinery, machine_types = detect_people_and_machinery(media_path)
88
  annotated_video = None
89
 
90
- if is_image(media.name):
91
- activities = analyze_image_activities(media_path)
92
- else:
93
- activities = analyze_video_activities(media_path)
94
- annotated_video = annotate_video_with_bboxes(media_path)
 
 
 
95
 
96
  os.remove(media_path)
97
  return [
@@ -130,7 +331,6 @@ with gr.Blocks(title="Digital Site Diary", css="video {height: auto !important;}
130
  model_activities = gr.Textbox(label="Activity Analysis", lines=4)
131
  model_video = gr.Video(label="Safety Annotations")
132
 
133
- # Fixed input mapping
134
  submit_btn.click(
135
  process_diary,
136
  inputs=[day, date, media],
 
13
  import cv2
14
  from modelscope.hub.snapshot_download import snapshot_download
15
 
16
+ # Fix GLIBCXX dependency
17
+ os.environ['LD_LIBRARY_PATH'] = '/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH'
18
+
19
  # Initialize GPU
20
  @spaces.GPU
21
  def initialize_gpu():
 
23
  torch.randn(10).cuda()
24
  initialize_gpu()
25
 
26
+ # Load YOLO model with error handling
27
+ try:
28
+ YOLO_MODEL = YOLO('best_yolov11.pt')
29
+ except Exception as e:
30
+ raise RuntimeError(f"YOLO model loading failed: {str(e)}")
31
 
32
+ # Model configuration
33
  MODEL_NAME = 'iic/mPLUG-Owl3-7B-240728'
34
+ try:
35
+ model_dir = snapshot_download(MODEL_NAME, cache_dir='./models')
36
+ except Exception as e:
37
+ raise RuntimeError(f"Model download failed: {str(e)}")
38
 
39
  # Device setup
40
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 
54
 
55
  @spaces.GPU
56
  def load_model_and_tokenizer():
57
+ """Load 8-bit quantized model with memory optimizations"""
58
  try:
59
  torch.cuda.empty_cache()
60
  gc.collect()
 
63
  model_dir,
64
  attn_implementation='sdpa',
65
  trust_remote_code=True,
66
+ load_in_8bit=True,
67
  device_map="auto",
68
+ torch_dtype=torch.float16
69
  )
70
 
71
  tokenizer = AutoTokenizer.from_pretrained(
 
79
  print(f"Model loading error: {str(e)}")
80
  raise
81
 
82
+ def process_yolo_results(results):
83
+ """Process YOLO detection results with safety checks"""
84
+ machinery_mapping = {
85
+ 'tower_crane': "Tower Crane",
86
+ 'mobile_crane': "Mobile Crane",
87
+ 'compactor': "Compactor/Roller",
88
+ 'roller': "Compactor/Roller",
89
+ 'bulldozer': "Bulldozer",
90
+ 'dozer': "Bulldozer",
91
+ 'excavator': "Excavator",
92
+ 'dump_truck': "Dump Truck",
93
+ 'truck': "Dump Truck",
94
+ 'concrete_mixer_truck': "Concrete Mixer",
95
+ 'loader': "Loader",
96
+ 'pump_truck': "Pump Truck",
97
+ 'pile_driver': "Pile Driver",
98
+ 'grader': "Grader",
99
+ 'other_vehicle': "Other Vehicle"
100
+ }
101
+
102
+ counts = {"Worker": 0, **{v: 0 for v in machinery_mapping.values()}}
103
+
104
+ try:
105
+ for r in results:
106
+ for box in r.boxes:
107
+ if box.conf.item() < 0.5:
108
+ continue
109
+
110
+ cls_name = YOLO_MODEL.names[int(box.cls.item())].lower()
111
+ if cls_name == 'worker':
112
+ counts["Worker"] += 1
113
+ continue
114
+
115
+ for key, value in machinery_mapping.items():
116
+ if key in cls_name:
117
+ counts[value] += 1
118
+ break
119
+ except Exception as e:
120
+ print(f"YOLO processing error: {str(e)}")
121
+
122
+ return counts["Worker"], sum(counts.values()) - counts["Worker"], counts
123
+
124
+ @spaces.GPU
125
+ def detect_people_and_machinery(media_path):
126
+ """GPU-accelerated detection with memory management"""
127
+ try:
128
+ max_people = 0
129
+ max_machines = {k: 0 for k in [
130
+ "Tower Crane", "Mobile Crane", "Compactor/Roller", "Bulldozer",
131
+ "Excavator", "Dump Truck", "Concrete Mixer", "Loader",
132
+ "Pump Truck", "Pile Driver", "Grader", "Other Vehicle"
133
+ ]}
134
+
135
+ if isinstance(media_path, str) and is_video(media_path):
136
+ cap = cv2.VideoCapture(media_path)
137
+ fps = cap.get(cv2.CAP_PROP_FPS)
138
+ sample_rate = max(1, int(fps))
139
+
140
+ while cap.isOpened():
141
+ ret, frame = cap.read()
142
+ if not ret:
143
+ break
144
+
145
+ if cap.get(cv2.CAP_PROP_POS_FRAMES) % sample_rate == 0:
146
+ results = YOLO_MODEL(frame)
147
+ people, machines, types = process_yolo_results(results)
148
+
149
+ max_people = max(max_people, people)
150
+ for k in max_machines:
151
+ max_machines[k] = max(max_machines[k], types.get(k, 0))
152
+
153
+ cap.release()
154
+ else:
155
+ img = cv2.imread(media_path) if isinstance(media_path, str) else cv2.cvtColor(np.array(media_path), cv2.COLOR_RGB2BGR)
156
+ results = YOLO_MODEL(img)
157
+ max_people, _, types = process_yolo_results(results)
158
+ for k in max_machines:
159
+ max_machines[k] = types.get(k, 0)
160
+
161
+ filtered = {k: v for k, v in max_machines.items() if v > 0}
162
+ return max_people, sum(filtered.values()), filtered
163
+
164
+ except Exception as e:
165
+ print(f"Detection error: {str(e)}")
166
+ return 0, 0, {}
167
+
168
+ @spaces.GPU
169
+ def analyze_video_activities(video_path):
170
+ """Video analysis with chunk processing and memory cleanup"""
171
+ try:
172
+ model, tokenizer, processor = load_model_and_tokenizer()
173
+ responses = []
174
+
175
+ vr = VideoReader(video_path, ctx=cpu(0))
176
+ frame_step = max(1, int(vr.get_avg_fps()))
177
+ total_frames = len(vr)
178
+
179
+ # Process in 16-frame chunks
180
+ for i in range(0, total_frames, 16):
181
+ end_idx = min(i+16, total_frames)
182
+ frames = [Image.fromarray(vr[j].asnumpy()) for j in range(i, end_idx)]
183
+
184
+ inputs = processor(
185
+ [{"role": "user", "content": "Analyze construction activities", "video_frames": frames}],
186
+ videos=[frames]
187
+ ).to(DEVICE)
188
+
189
+ response = model.generate(**inputs, max_new_tokens=200)
190
+ responses.append(response[0])
191
+
192
+ del frames, inputs
193
+ torch.cuda.empty_cache()
194
+
195
+ del model, tokenizer, processor
196
+ return "\n".join(responses)
197
+
198
+ except Exception as e:
199
+ print(f"Video analysis error: {str(e)}")
200
+ return "Activity analysis unavailable"
201
+
202
+ @spaces.GPU
203
+ def analyze_image_activities(image_path):
204
+ """Image analysis with memory cleanup"""
205
+ try:
206
+ model, tokenizer, processor = load_model_and_tokenizer()
207
+ image = Image.open(image_path).convert("RGB")
208
+
209
+ inputs = processor(
210
+ [{"role": "user", "content": "Analyze construction site", "images": [image]}],
211
+ images=[image]
212
+ ).to(DEVICE)
213
+
214
+ response = model.generate(**inputs, max_new_tokens=200)
215
+
216
+ del model, tokenizer, processor, image, inputs
217
+ torch.cuda.empty_cache()
218
+ return response[0]
219
+
220
+ except Exception as e:
221
+ print(f"Image analysis error: {str(e)}")
222
+ return "Activity analysis unavailable"
223
+
224
+ @spaces.GPU
225
+ def annotate_video_with_bboxes(video_path):
226
+ """Video annotation with efficient frame processing"""
227
+ try:
228
+ cap = cv2.VideoCapture(video_path)
229
+ fps = cap.get(cv2.CAP_PROP_FPS)
230
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
231
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
232
+
233
+ temp_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
234
+ writer = cv2.VideoWriter(temp_file.name, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
235
+
236
+ frame_count = 0
237
+ while cap.isOpened():
238
+ ret, frame = cap.read()
239
+ if not ret:
240
+ break
241
+
242
+ # Process every 5th frame to reduce load
243
+ if frame_count % 5 == 0:
244
+ results = YOLO_MODEL(frame)
245
+ counts = {}
246
+
247
+ for r in results:
248
+ for box in r.boxes:
249
+ if box.conf.item() < 0.5:
250
+ continue
251
+
252
+ cls_id = int(box.cls.item())
253
+ class_name = YOLO_MODEL.names[cls_id]
254
+ counts[class_name] = counts.get(class_name, 0) + 1
255
+
256
+ x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
257
+ cv2.rectangle(frame, (x1, y1), (x2, y2), (0,255,0), 2)
258
+ cv2.putText(frame, f"{class_name} {box.conf.item():.2f}",
259
+ (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1)
260
+
261
+ summary = ", ".join([f"{k}:{v}" for k,v in counts.items()])
262
+ cv2.putText(frame, summary, (10,30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,255), 2)
263
+
264
+ writer.write(frame)
265
+ frame_count += 1
266
+
267
+ cap.release()
268
+ writer.release()
269
+ return temp_file.name
270
+
271
+ except Exception as e:
272
+ print(f"Video annotation error: {str(e)}")
273
+ return None
274
 
275
  def process_diary(day, date, media):
276
+ """Main processing pipeline with error handling"""
277
  try:
278
  if not media:
279
  return [day, date, "No data", "No data", "No data", "No data", None]
 
285
  detected_people, detected_machinery, machine_types = detect_people_and_machinery(media_path)
286
  annotated_video = None
287
 
288
+ try:
289
+ if is_image(media.name):
290
+ activities = analyze_image_activities(media_path)
291
+ else:
292
+ activities = analyze_video_activities(media_path)
293
+ annotated_video = annotate_video_with_bboxes(media_path)
294
+ except Exception as e:
295
+ activities = f"Analysis error: {str(e)}"
296
 
297
  os.remove(media_path)
298
  return [
 
331
  model_activities = gr.Textbox(label="Activity Analysis", lines=4)
332
  model_video = gr.Video(label="Safety Annotations")
333
 
 
334
  submit_btn.click(
335
  process_diary,
336
  inputs=[day, date, media],