Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
|
2 |
import spaces
|
3 |
import torch
|
4 |
import argparse
|
@@ -8,6 +7,7 @@ import pickle # For serializing frames
|
|
8 |
import gc
|
9 |
import tempfile
|
10 |
import subprocess
|
|
|
11 |
from datetime import datetime
|
12 |
from transformers import AutoModel, AutoTokenizer
|
13 |
from modelscope.hub.snapshot_download import snapshot_download
|
@@ -25,6 +25,7 @@ subprocess.run(
|
|
25 |
env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': 'TRUE'},
|
26 |
shell=True
|
27 |
)
|
|
|
28 |
# --------------------------------------------------------------------
|
29 |
# Command-line arguments
|
30 |
# --------------------------------------------------------------------
|
@@ -32,7 +33,7 @@ parser = argparse.ArgumentParser(description='demo')
|
|
32 |
parser.add_argument('--device', type=str, default='cuda', help='cuda or mps')
|
33 |
parser.add_argument("--host", type=str, default="0.0.0.0")
|
34 |
parser.add_argument("--port", type=int)
|
35 |
-
#
|
36 |
parser.add_argument("--chunk_inference", action="store_true", help="Run inference on a chunk (subprocess mode).")
|
37 |
parser.add_argument("--input_file", type=str, help="Path to serialized input chunk frames.")
|
38 |
parser.add_argument("--output_file", type=str, help="Path to file where inference result is written.")
|
@@ -130,11 +131,11 @@ def process_video_chunk(video_frames, model, tokenizer, processor, prompt):
|
|
130 |
})
|
131 |
with torch.no_grad():
|
132 |
response = model.generate(**inputs)
|
133 |
-
del inputs #
|
134 |
return response[0]
|
135 |
|
136 |
# --------------------------------------------------------------------
|
137 |
-
# Video and YOLO
|
138 |
# --------------------------------------------------------------------
|
139 |
def encode_video_in_chunks(video_path):
|
140 |
"""Extract frames from a video in chunks."""
|
@@ -324,36 +325,78 @@ def annotate_video_with_bboxes(video_path):
|
|
324 |
return annotated_video_path
|
325 |
|
326 |
# --------------------------------------------------------------------
|
327 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
328 |
# --------------------------------------------------------------------
|
329 |
@spaces.GPU
|
330 |
-
def
|
331 |
-
"""Analyze video
|
332 |
-
|
333 |
try:
|
334 |
all_responses = []
|
335 |
chunk_generator = encode_video_in_chunks(video_path)
|
336 |
-
|
337 |
-
# Load model instance once
|
338 |
-
model, tokenizer, processor = load_model_and_tokenizer()
|
339 |
-
|
340 |
for chunk_idx, video_frames in chunk_generator:
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
347 |
all_responses.append(f"Time period {chunk_idx + 1}:\n{response}")
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
del model, tokenizer, processor
|
352 |
-
torch.cuda.empty_cache()
|
353 |
-
gc.collect()
|
354 |
return "\n\n".join(all_responses)
|
355 |
except Exception as e:
|
356 |
-
print(f"Error
|
357 |
return "Error analyzing video activities"
|
358 |
|
359 |
# --------------------------------------------------------------------
|
@@ -383,7 +426,8 @@ def process_diary(day, date, total_people, total_machinery, machinery_types, act
|
|
383 |
if is_image(media.name):
|
384 |
detected_activities = analyze_image_activities(temp_path)
|
385 |
else:
|
386 |
-
|
|
|
387 |
annotated_video_path = annotate_video_with_bboxes(temp_path)
|
388 |
if os.path.exists(temp_path):
|
389 |
os.remove(temp_path)
|
|
|
|
|
1 |
import spaces
|
2 |
import torch
|
3 |
import argparse
|
|
|
7 |
import gc
|
8 |
import tempfile
|
9 |
import subprocess
|
10 |
+
import time
|
11 |
from datetime import datetime
|
12 |
from transformers import AutoModel, AutoTokenizer
|
13 |
from modelscope.hub.snapshot_download import snapshot_download
|
|
|
25 |
env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': 'TRUE'},
|
26 |
shell=True
|
27 |
)
|
28 |
+
|
29 |
# --------------------------------------------------------------------
|
30 |
# Command-line arguments
|
31 |
# --------------------------------------------------------------------
|
|
|
33 |
parser.add_argument('--device', type=str, default='cuda', help='cuda or mps')
|
34 |
parser.add_argument("--host", type=str, default="0.0.0.0")
|
35 |
parser.add_argument("--port", type=int)
|
36 |
+
# Arguments for subprocess inference mode
|
37 |
parser.add_argument("--chunk_inference", action="store_true", help="Run inference on a chunk (subprocess mode).")
|
38 |
parser.add_argument("--input_file", type=str, help="Path to serialized input chunk frames.")
|
39 |
parser.add_argument("--output_file", type=str, help="Path to file where inference result is written.")
|
|
|
131 |
})
|
132 |
with torch.no_grad():
|
133 |
response = model.generate(**inputs)
|
134 |
+
del inputs # Free temporary memory
|
135 |
return response[0]
|
136 |
|
137 |
# --------------------------------------------------------------------
|
138 |
+
# Video and YOLO Functions (Unchanged)
|
139 |
# --------------------------------------------------------------------
|
140 |
def encode_video_in_chunks(video_path):
|
141 |
"""Extract frames from a video in chunks."""
|
|
|
325 |
return annotated_video_path
|
326 |
|
327 |
# --------------------------------------------------------------------
|
328 |
+
# Subprocess Worker: Executed when --chunk_inference flag is provided
|
329 |
+
# --------------------------------------------------------------------
|
330 |
+
if args.chunk_inference:
|
331 |
+
# In worker mode, load the serialized frames from the input file
|
332 |
+
try:
|
333 |
+
with open(args.input_file, "rb") as f:
|
334 |
+
frames_serialized = pickle.load(f)
|
335 |
+
video_frames = []
|
336 |
+
for img_bytes in frames_serialized:
|
337 |
+
video_frames.append(Image.open(io.BytesIO(img_bytes)))
|
338 |
+
except Exception as e:
|
339 |
+
print(f"Error reading input frames: {str(e)}")
|
340 |
+
sys.exit(1)
|
341 |
+
try:
|
342 |
+
model, tokenizer, processor = load_model_and_tokenizer()
|
343 |
+
response = process_video_chunk(video_frames, model, tokenizer, processor, args.inference_prompt)
|
344 |
+
with open(args.output_file, "w") as f:
|
345 |
+
f.write(response)
|
346 |
+
del model, tokenizer, processor
|
347 |
+
torch.cuda.empty_cache()
|
348 |
+
gc.collect()
|
349 |
+
except Exception as e:
|
350 |
+
with open(args.output_file, "w") as f:
|
351 |
+
f.write(f"Error in chunk inference: {str(e)}")
|
352 |
+
sys.exit(0)
|
353 |
+
|
354 |
+
# --------------------------------------------------------------------
|
355 |
+
# Main Video Analysis Function Using Subprocess Isolation
|
356 |
# --------------------------------------------------------------------
|
357 |
@spaces.GPU
|
358 |
+
def analyze_video_activities_subprocess(video_path):
|
359 |
+
"""Analyze video by processing each chunk in a separate subprocess.
|
360 |
+
Each subprocess loads a fresh model instance to avoid GPU memory buildup."""
|
361 |
try:
|
362 |
all_responses = []
|
363 |
chunk_generator = encode_video_in_chunks(video_path)
|
|
|
|
|
|
|
|
|
364 |
for chunk_idx, video_frames in chunk_generator:
|
365 |
+
# Serialize each frame in the chunk to bytes
|
366 |
+
temp_input = tempfile.NamedTemporaryFile(suffix=".pkl", delete=False)
|
367 |
+
frames_serializable = []
|
368 |
+
for img in video_frames:
|
369 |
+
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tf:
|
370 |
+
img.save(tf, format="PNG")
|
371 |
+
tf.seek(0)
|
372 |
+
frames_serializable.append(tf.read())
|
373 |
+
os.remove(tf.name)
|
374 |
+
with open(temp_input.name, "wb") as f:
|
375 |
+
pickle.dump(frames_serializable, f)
|
376 |
+
# Create a temporary file for subprocess output
|
377 |
+
temp_output = tempfile.NamedTemporaryFile(suffix=".txt", delete=False)
|
378 |
+
temp_output.close()
|
379 |
+
prompt = ("Analyze this construction site video chunk and describe the activities happening. "
|
380 |
+
"Focus on construction activities, machinery usage, and worker actions.")
|
381 |
+
# Launch subprocess for this chunk
|
382 |
+
subprocess.run([
|
383 |
+
sys.executable, __file__,
|
384 |
+
"--chunk_inference",
|
385 |
+
"--input_file", temp_input.name,
|
386 |
+
"--output_file", temp_output.name,
|
387 |
+
"--inference_prompt", prompt,
|
388 |
+
"--model_path_arg", model_path,
|
389 |
+
"--device", device
|
390 |
+
], check=True)
|
391 |
+
with open(temp_output.name, "r") as f:
|
392 |
+
response = f.read().strip()
|
393 |
all_responses.append(f"Time period {chunk_idx + 1}:\n{response}")
|
394 |
+
os.remove(temp_input.name)
|
395 |
+
os.remove(temp_output.name)
|
396 |
+
time.sleep(2) # Allow time for GPU memory to fully clear before next chunk
|
|
|
|
|
|
|
397 |
return "\n\n".join(all_responses)
|
398 |
except Exception as e:
|
399 |
+
print(f"Error in subprocess chunk inference: {str(e)}")
|
400 |
return "Error analyzing video activities"
|
401 |
|
402 |
# --------------------------------------------------------------------
|
|
|
426 |
if is_image(media.name):
|
427 |
detected_activities = analyze_image_activities(temp_path)
|
428 |
else:
|
429 |
+
# Use the subprocess-based video analysis for each chunk
|
430 |
+
detected_activities = analyze_video_activities_subprocess(temp_path)
|
431 |
annotated_video_path = annotate_video_with_bboxes(temp_path)
|
432 |
if os.path.exists(temp_path):
|
433 |
os.remove(temp_path)
|