assentian1970 commited on
Commit
6d1a54e
·
verified ·
1 Parent(s): aa25b19

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -26
app.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import spaces
3
  import torch
4
  import argparse
@@ -8,6 +7,7 @@ import pickle # For serializing frames
8
  import gc
9
  import tempfile
10
  import subprocess
 
11
  from datetime import datetime
12
  from transformers import AutoModel, AutoTokenizer
13
  from modelscope.hub.snapshot_download import snapshot_download
@@ -25,6 +25,7 @@ subprocess.run(
25
  env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': 'TRUE'},
26
  shell=True
27
  )
 
28
  # --------------------------------------------------------------------
29
  # Command-line arguments
30
  # --------------------------------------------------------------------
@@ -32,7 +33,7 @@ parser = argparse.ArgumentParser(description='demo')
32
  parser.add_argument('--device', type=str, default='cuda', help='cuda or mps')
33
  parser.add_argument("--host", type=str, default="0.0.0.0")
34
  parser.add_argument("--port", type=int)
35
- # New arguments for subprocess inference (unused in this version)
36
  parser.add_argument("--chunk_inference", action="store_true", help="Run inference on a chunk (subprocess mode).")
37
  parser.add_argument("--input_file", type=str, help="Path to serialized input chunk frames.")
38
  parser.add_argument("--output_file", type=str, help="Path to file where inference result is written.")
@@ -130,11 +131,11 @@ def process_video_chunk(video_frames, model, tokenizer, processor, prompt):
130
  })
131
  with torch.no_grad():
132
  response = model.generate(**inputs)
133
- del inputs # delete inputs to free temporary memory
134
  return response[0]
135
 
136
  # --------------------------------------------------------------------
137
- # Video and YOLO functions (unchanged)
138
  # --------------------------------------------------------------------
139
  def encode_video_in_chunks(video_path):
140
  """Extract frames from a video in chunks."""
@@ -324,36 +325,78 @@ def annotate_video_with_bboxes(video_path):
324
  return annotated_video_path
325
 
326
  # --------------------------------------------------------------------
327
- # Adjusted Video Analysis with Single mPLUG Instance (No Reload)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
  # --------------------------------------------------------------------
329
  @spaces.GPU
330
- def analyze_video_activities_single_instance(video_path):
331
- """Analyze video using mPLUG model with chunking.
332
- Use a single mPLUG model instance for all chunks without any per-chunk cleanup."""
333
  try:
334
  all_responses = []
335
  chunk_generator = encode_video_in_chunks(video_path)
336
-
337
- # Load model instance once
338
- model, tokenizer, processor = load_model_and_tokenizer()
339
-
340
  for chunk_idx, video_frames in chunk_generator:
341
- prompt = (
342
- "Analyze this construction site video chunk and describe the activities happening. "
343
- "Focus on construction activities, machinery usage, and worker actions."
344
- )
345
- with torch.no_grad():
346
- response = process_video_chunk(video_frames, model, tokenizer, processor, prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
  all_responses.append(f"Time period {chunk_idx + 1}:\n{response}")
348
- # No per-chunk cache clearing is performed here
349
-
350
- # Final cleanup after processing all chunks
351
- del model, tokenizer, processor
352
- torch.cuda.empty_cache()
353
- gc.collect()
354
  return "\n\n".join(all_responses)
355
  except Exception as e:
356
- print(f"Error analyzing video: {str(e)}")
357
  return "Error analyzing video activities"
358
 
359
  # --------------------------------------------------------------------
@@ -383,7 +426,8 @@ def process_diary(day, date, total_people, total_machinery, machinery_types, act
383
  if is_image(media.name):
384
  detected_activities = analyze_image_activities(temp_path)
385
  else:
386
- detected_activities = analyze_video_activities_single_instance(temp_path)
 
387
  annotated_video_path = annotate_video_with_bboxes(temp_path)
388
  if os.path.exists(temp_path):
389
  os.remove(temp_path)
 
 
1
  import spaces
2
  import torch
3
  import argparse
 
7
  import gc
8
  import tempfile
9
  import subprocess
10
+ import time
11
  from datetime import datetime
12
  from transformers import AutoModel, AutoTokenizer
13
  from modelscope.hub.snapshot_download import snapshot_download
 
25
  env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': 'TRUE'},
26
  shell=True
27
  )
28
+
29
  # --------------------------------------------------------------------
30
  # Command-line arguments
31
  # --------------------------------------------------------------------
 
33
  parser.add_argument('--device', type=str, default='cuda', help='cuda or mps')
34
  parser.add_argument("--host", type=str, default="0.0.0.0")
35
  parser.add_argument("--port", type=int)
36
+ # Arguments for subprocess inference mode
37
  parser.add_argument("--chunk_inference", action="store_true", help="Run inference on a chunk (subprocess mode).")
38
  parser.add_argument("--input_file", type=str, help="Path to serialized input chunk frames.")
39
  parser.add_argument("--output_file", type=str, help="Path to file where inference result is written.")
 
131
  })
132
  with torch.no_grad():
133
  response = model.generate(**inputs)
134
+ del inputs # Free temporary memory
135
  return response[0]
136
 
137
  # --------------------------------------------------------------------
138
+ # Video and YOLO Functions (Unchanged)
139
  # --------------------------------------------------------------------
140
  def encode_video_in_chunks(video_path):
141
  """Extract frames from a video in chunks."""
 
325
  return annotated_video_path
326
 
327
  # --------------------------------------------------------------------
328
+ # Subprocess Worker: Executed when --chunk_inference flag is provided
329
+ # --------------------------------------------------------------------
330
+ if args.chunk_inference:
331
+ # In worker mode, load the serialized frames from the input file
332
+ try:
333
+ with open(args.input_file, "rb") as f:
334
+ frames_serialized = pickle.load(f)
335
+ video_frames = []
336
+ for img_bytes in frames_serialized:
337
+ video_frames.append(Image.open(io.BytesIO(img_bytes)))
338
+ except Exception as e:
339
+ print(f"Error reading input frames: {str(e)}")
340
+ sys.exit(1)
341
+ try:
342
+ model, tokenizer, processor = load_model_and_tokenizer()
343
+ response = process_video_chunk(video_frames, model, tokenizer, processor, args.inference_prompt)
344
+ with open(args.output_file, "w") as f:
345
+ f.write(response)
346
+ del model, tokenizer, processor
347
+ torch.cuda.empty_cache()
348
+ gc.collect()
349
+ except Exception as e:
350
+ with open(args.output_file, "w") as f:
351
+ f.write(f"Error in chunk inference: {str(e)}")
352
+ sys.exit(0)
353
+
354
+ # --------------------------------------------------------------------
355
+ # Main Video Analysis Function Using Subprocess Isolation
356
  # --------------------------------------------------------------------
357
  @spaces.GPU
358
+ def analyze_video_activities_subprocess(video_path):
359
+ """Analyze video by processing each chunk in a separate subprocess.
360
+ Each subprocess loads a fresh model instance to avoid GPU memory buildup."""
361
  try:
362
  all_responses = []
363
  chunk_generator = encode_video_in_chunks(video_path)
 
 
 
 
364
  for chunk_idx, video_frames in chunk_generator:
365
+ # Serialize each frame in the chunk to bytes
366
+ temp_input = tempfile.NamedTemporaryFile(suffix=".pkl", delete=False)
367
+ frames_serializable = []
368
+ for img in video_frames:
369
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tf:
370
+ img.save(tf, format="PNG")
371
+ tf.seek(0)
372
+ frames_serializable.append(tf.read())
373
+ os.remove(tf.name)
374
+ with open(temp_input.name, "wb") as f:
375
+ pickle.dump(frames_serializable, f)
376
+ # Create a temporary file for subprocess output
377
+ temp_output = tempfile.NamedTemporaryFile(suffix=".txt", delete=False)
378
+ temp_output.close()
379
+ prompt = ("Analyze this construction site video chunk and describe the activities happening. "
380
+ "Focus on construction activities, machinery usage, and worker actions.")
381
+ # Launch subprocess for this chunk
382
+ subprocess.run([
383
+ sys.executable, __file__,
384
+ "--chunk_inference",
385
+ "--input_file", temp_input.name,
386
+ "--output_file", temp_output.name,
387
+ "--inference_prompt", prompt,
388
+ "--model_path_arg", model_path,
389
+ "--device", device
390
+ ], check=True)
391
+ with open(temp_output.name, "r") as f:
392
+ response = f.read().strip()
393
  all_responses.append(f"Time period {chunk_idx + 1}:\n{response}")
394
+ os.remove(temp_input.name)
395
+ os.remove(temp_output.name)
396
+ time.sleep(2) # Allow time for GPU memory to fully clear before next chunk
 
 
 
397
  return "\n\n".join(all_responses)
398
  except Exception as e:
399
+ print(f"Error in subprocess chunk inference: {str(e)}")
400
  return "Error analyzing video activities"
401
 
402
  # --------------------------------------------------------------------
 
426
  if is_image(media.name):
427
  detected_activities = analyze_image_activities(temp_path)
428
  else:
429
+ # Use the subprocess-based video analysis for each chunk
430
+ detected_activities = analyze_video_activities_subprocess(temp_path)
431
  annotated_video_path = annotate_video_with_bboxes(temp_path)
432
  if os.path.exists(temp_path):
433
  os.remove(temp_path)