Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -13,6 +13,9 @@ import numpy as np
|
|
13 |
import cv2
|
14 |
from modelscope.hub.snapshot_download import snapshot_download
|
15 |
|
|
|
|
|
|
|
16 |
# Initialize GPU
|
17 |
@spaces.GPU
|
18 |
def initialize_gpu():
|
@@ -20,14 +23,18 @@ def initialize_gpu():
|
|
20 |
torch.randn(10).cuda()
|
21 |
initialize_gpu()
|
22 |
|
23 |
-
# Load YOLO model with
|
24 |
-
|
|
|
|
|
|
|
25 |
|
26 |
-
# Model configuration
|
27 |
MODEL_NAME = 'iic/mPLUG-Owl3-7B-240728'
|
28 |
-
|
29 |
-
|
30 |
-
|
|
|
31 |
|
32 |
# Device setup
|
33 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
@@ -47,7 +54,7 @@ def is_video(filename):
|
|
47 |
|
48 |
@spaces.GPU
|
49 |
def load_model_and_tokenizer():
|
50 |
-
"""Load
|
51 |
try:
|
52 |
torch.cuda.empty_cache()
|
53 |
gc.collect()
|
@@ -56,9 +63,9 @@ def load_model_and_tokenizer():
|
|
56 |
model_dir,
|
57 |
attn_implementation='sdpa',
|
58 |
trust_remote_code=True,
|
59 |
-
|
60 |
device_map="auto",
|
61 |
-
torch_dtype=torch.
|
62 |
)
|
63 |
|
64 |
tokenizer = AutoTokenizer.from_pretrained(
|
@@ -72,10 +79,201 @@ def load_model_and_tokenizer():
|
|
72 |
print(f"Model loading error: {str(e)}")
|
73 |
raise
|
74 |
|
75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
def process_diary(day, date, media):
|
78 |
-
"""
|
79 |
try:
|
80 |
if not media:
|
81 |
return [day, date, "No data", "No data", "No data", "No data", None]
|
@@ -87,11 +285,14 @@ def process_diary(day, date, media):
|
|
87 |
detected_people, detected_machinery, machine_types = detect_people_and_machinery(media_path)
|
88 |
annotated_video = None
|
89 |
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
|
|
|
|
|
|
95 |
|
96 |
os.remove(media_path)
|
97 |
return [
|
@@ -130,7 +331,6 @@ with gr.Blocks(title="Digital Site Diary", css="video {height: auto !important;}
|
|
130 |
model_activities = gr.Textbox(label="Activity Analysis", lines=4)
|
131 |
model_video = gr.Video(label="Safety Annotations")
|
132 |
|
133 |
-
# Fixed input mapping
|
134 |
submit_btn.click(
|
135 |
process_diary,
|
136 |
inputs=[day, date, media],
|
|
|
13 |
import cv2
|
14 |
from modelscope.hub.snapshot_download import snapshot_download
|
15 |
|
16 |
+
# Fix GLIBCXX dependency
|
17 |
+
os.environ['LD_LIBRARY_PATH'] = '/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH'
|
18 |
+
|
19 |
# Initialize GPU
|
20 |
@spaces.GPU
|
21 |
def initialize_gpu():
|
|
|
23 |
torch.randn(10).cuda()
|
24 |
initialize_gpu()
|
25 |
|
26 |
+
# Load YOLO model with error handling
|
27 |
+
try:
|
28 |
+
YOLO_MODEL = YOLO('best_yolov11.pt')
|
29 |
+
except Exception as e:
|
30 |
+
raise RuntimeError(f"YOLO model loading failed: {str(e)}")
|
31 |
|
32 |
+
# Model configuration
|
33 |
MODEL_NAME = 'iic/mPLUG-Owl3-7B-240728'
|
34 |
+
try:
|
35 |
+
model_dir = snapshot_download(MODEL_NAME, cache_dir='./models')
|
36 |
+
except Exception as e:
|
37 |
+
raise RuntimeError(f"Model download failed: {str(e)}")
|
38 |
|
39 |
# Device setup
|
40 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
54 |
|
55 |
@spaces.GPU
|
56 |
def load_model_and_tokenizer():
|
57 |
+
"""Load 8-bit quantized model with memory optimizations"""
|
58 |
try:
|
59 |
torch.cuda.empty_cache()
|
60 |
gc.collect()
|
|
|
63 |
model_dir,
|
64 |
attn_implementation='sdpa',
|
65 |
trust_remote_code=True,
|
66 |
+
load_in_8bit=True,
|
67 |
device_map="auto",
|
68 |
+
torch_dtype=torch.float16
|
69 |
)
|
70 |
|
71 |
tokenizer = AutoTokenizer.from_pretrained(
|
|
|
79 |
print(f"Model loading error: {str(e)}")
|
80 |
raise
|
81 |
|
82 |
+
def process_yolo_results(results):
|
83 |
+
"""Process YOLO detection results with safety checks"""
|
84 |
+
machinery_mapping = {
|
85 |
+
'tower_crane': "Tower Crane",
|
86 |
+
'mobile_crane': "Mobile Crane",
|
87 |
+
'compactor': "Compactor/Roller",
|
88 |
+
'roller': "Compactor/Roller",
|
89 |
+
'bulldozer': "Bulldozer",
|
90 |
+
'dozer': "Bulldozer",
|
91 |
+
'excavator': "Excavator",
|
92 |
+
'dump_truck': "Dump Truck",
|
93 |
+
'truck': "Dump Truck",
|
94 |
+
'concrete_mixer_truck': "Concrete Mixer",
|
95 |
+
'loader': "Loader",
|
96 |
+
'pump_truck': "Pump Truck",
|
97 |
+
'pile_driver': "Pile Driver",
|
98 |
+
'grader': "Grader",
|
99 |
+
'other_vehicle': "Other Vehicle"
|
100 |
+
}
|
101 |
+
|
102 |
+
counts = {"Worker": 0, **{v: 0 for v in machinery_mapping.values()}}
|
103 |
+
|
104 |
+
try:
|
105 |
+
for r in results:
|
106 |
+
for box in r.boxes:
|
107 |
+
if box.conf.item() < 0.5:
|
108 |
+
continue
|
109 |
+
|
110 |
+
cls_name = YOLO_MODEL.names[int(box.cls.item())].lower()
|
111 |
+
if cls_name == 'worker':
|
112 |
+
counts["Worker"] += 1
|
113 |
+
continue
|
114 |
+
|
115 |
+
for key, value in machinery_mapping.items():
|
116 |
+
if key in cls_name:
|
117 |
+
counts[value] += 1
|
118 |
+
break
|
119 |
+
except Exception as e:
|
120 |
+
print(f"YOLO processing error: {str(e)}")
|
121 |
+
|
122 |
+
return counts["Worker"], sum(counts.values()) - counts["Worker"], counts
|
123 |
+
|
124 |
+
@spaces.GPU
|
125 |
+
def detect_people_and_machinery(media_path):
|
126 |
+
"""GPU-accelerated detection with memory management"""
|
127 |
+
try:
|
128 |
+
max_people = 0
|
129 |
+
max_machines = {k: 0 for k in [
|
130 |
+
"Tower Crane", "Mobile Crane", "Compactor/Roller", "Bulldozer",
|
131 |
+
"Excavator", "Dump Truck", "Concrete Mixer", "Loader",
|
132 |
+
"Pump Truck", "Pile Driver", "Grader", "Other Vehicle"
|
133 |
+
]}
|
134 |
+
|
135 |
+
if isinstance(media_path, str) and is_video(media_path):
|
136 |
+
cap = cv2.VideoCapture(media_path)
|
137 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
138 |
+
sample_rate = max(1, int(fps))
|
139 |
+
|
140 |
+
while cap.isOpened():
|
141 |
+
ret, frame = cap.read()
|
142 |
+
if not ret:
|
143 |
+
break
|
144 |
+
|
145 |
+
if cap.get(cv2.CAP_PROP_POS_FRAMES) % sample_rate == 0:
|
146 |
+
results = YOLO_MODEL(frame)
|
147 |
+
people, machines, types = process_yolo_results(results)
|
148 |
+
|
149 |
+
max_people = max(max_people, people)
|
150 |
+
for k in max_machines:
|
151 |
+
max_machines[k] = max(max_machines[k], types.get(k, 0))
|
152 |
+
|
153 |
+
cap.release()
|
154 |
+
else:
|
155 |
+
img = cv2.imread(media_path) if isinstance(media_path, str) else cv2.cvtColor(np.array(media_path), cv2.COLOR_RGB2BGR)
|
156 |
+
results = YOLO_MODEL(img)
|
157 |
+
max_people, _, types = process_yolo_results(results)
|
158 |
+
for k in max_machines:
|
159 |
+
max_machines[k] = types.get(k, 0)
|
160 |
+
|
161 |
+
filtered = {k: v for k, v in max_machines.items() if v > 0}
|
162 |
+
return max_people, sum(filtered.values()), filtered
|
163 |
+
|
164 |
+
except Exception as e:
|
165 |
+
print(f"Detection error: {str(e)}")
|
166 |
+
return 0, 0, {}
|
167 |
+
|
168 |
+
@spaces.GPU
|
169 |
+
def analyze_video_activities(video_path):
|
170 |
+
"""Video analysis with chunk processing and memory cleanup"""
|
171 |
+
try:
|
172 |
+
model, tokenizer, processor = load_model_and_tokenizer()
|
173 |
+
responses = []
|
174 |
+
|
175 |
+
vr = VideoReader(video_path, ctx=cpu(0))
|
176 |
+
frame_step = max(1, int(vr.get_avg_fps()))
|
177 |
+
total_frames = len(vr)
|
178 |
+
|
179 |
+
# Process in 16-frame chunks
|
180 |
+
for i in range(0, total_frames, 16):
|
181 |
+
end_idx = min(i+16, total_frames)
|
182 |
+
frames = [Image.fromarray(vr[j].asnumpy()) for j in range(i, end_idx)]
|
183 |
+
|
184 |
+
inputs = processor(
|
185 |
+
[{"role": "user", "content": "Analyze construction activities", "video_frames": frames}],
|
186 |
+
videos=[frames]
|
187 |
+
).to(DEVICE)
|
188 |
+
|
189 |
+
response = model.generate(**inputs, max_new_tokens=200)
|
190 |
+
responses.append(response[0])
|
191 |
+
|
192 |
+
del frames, inputs
|
193 |
+
torch.cuda.empty_cache()
|
194 |
+
|
195 |
+
del model, tokenizer, processor
|
196 |
+
return "\n".join(responses)
|
197 |
+
|
198 |
+
except Exception as e:
|
199 |
+
print(f"Video analysis error: {str(e)}")
|
200 |
+
return "Activity analysis unavailable"
|
201 |
+
|
202 |
+
@spaces.GPU
|
203 |
+
def analyze_image_activities(image_path):
|
204 |
+
"""Image analysis with memory cleanup"""
|
205 |
+
try:
|
206 |
+
model, tokenizer, processor = load_model_and_tokenizer()
|
207 |
+
image = Image.open(image_path).convert("RGB")
|
208 |
+
|
209 |
+
inputs = processor(
|
210 |
+
[{"role": "user", "content": "Analyze construction site", "images": [image]}],
|
211 |
+
images=[image]
|
212 |
+
).to(DEVICE)
|
213 |
+
|
214 |
+
response = model.generate(**inputs, max_new_tokens=200)
|
215 |
+
|
216 |
+
del model, tokenizer, processor, image, inputs
|
217 |
+
torch.cuda.empty_cache()
|
218 |
+
return response[0]
|
219 |
+
|
220 |
+
except Exception as e:
|
221 |
+
print(f"Image analysis error: {str(e)}")
|
222 |
+
return "Activity analysis unavailable"
|
223 |
+
|
224 |
+
@spaces.GPU
|
225 |
+
def annotate_video_with_bboxes(video_path):
|
226 |
+
"""Video annotation with efficient frame processing"""
|
227 |
+
try:
|
228 |
+
cap = cv2.VideoCapture(video_path)
|
229 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
230 |
+
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
231 |
+
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
232 |
+
|
233 |
+
temp_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
|
234 |
+
writer = cv2.VideoWriter(temp_file.name, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
|
235 |
+
|
236 |
+
frame_count = 0
|
237 |
+
while cap.isOpened():
|
238 |
+
ret, frame = cap.read()
|
239 |
+
if not ret:
|
240 |
+
break
|
241 |
+
|
242 |
+
# Process every 5th frame to reduce load
|
243 |
+
if frame_count % 5 == 0:
|
244 |
+
results = YOLO_MODEL(frame)
|
245 |
+
counts = {}
|
246 |
+
|
247 |
+
for r in results:
|
248 |
+
for box in r.boxes:
|
249 |
+
if box.conf.item() < 0.5:
|
250 |
+
continue
|
251 |
+
|
252 |
+
cls_id = int(box.cls.item())
|
253 |
+
class_name = YOLO_MODEL.names[cls_id]
|
254 |
+
counts[class_name] = counts.get(class_name, 0) + 1
|
255 |
+
|
256 |
+
x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
|
257 |
+
cv2.rectangle(frame, (x1, y1), (x2, y2), (0,255,0), 2)
|
258 |
+
cv2.putText(frame, f"{class_name} {box.conf.item():.2f}",
|
259 |
+
(x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1)
|
260 |
+
|
261 |
+
summary = ", ".join([f"{k}:{v}" for k,v in counts.items()])
|
262 |
+
cv2.putText(frame, summary, (10,30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,255), 2)
|
263 |
+
|
264 |
+
writer.write(frame)
|
265 |
+
frame_count += 1
|
266 |
+
|
267 |
+
cap.release()
|
268 |
+
writer.release()
|
269 |
+
return temp_file.name
|
270 |
+
|
271 |
+
except Exception as e:
|
272 |
+
print(f"Video annotation error: {str(e)}")
|
273 |
+
return None
|
274 |
|
275 |
def process_diary(day, date, media):
|
276 |
+
"""Main processing pipeline with error handling"""
|
277 |
try:
|
278 |
if not media:
|
279 |
return [day, date, "No data", "No data", "No data", "No data", None]
|
|
|
285 |
detected_people, detected_machinery, machine_types = detect_people_and_machinery(media_path)
|
286 |
annotated_video = None
|
287 |
|
288 |
+
try:
|
289 |
+
if is_image(media.name):
|
290 |
+
activities = analyze_image_activities(media_path)
|
291 |
+
else:
|
292 |
+
activities = analyze_video_activities(media_path)
|
293 |
+
annotated_video = annotate_video_with_bboxes(media_path)
|
294 |
+
except Exception as e:
|
295 |
+
activities = f"Analysis error: {str(e)}"
|
296 |
|
297 |
os.remove(media_path)
|
298 |
return [
|
|
|
331 |
model_activities = gr.Textbox(label="Activity Analysis", lines=4)
|
332 |
model_video = gr.Video(label="Safety Annotations")
|
333 |
|
|
|
334 |
submit_btn.click(
|
335 |
process_diary,
|
336 |
inputs=[day, date, media],
|