assentian1970 commited on
Commit
56084f9
·
verified ·
1 Parent(s): a35db3d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -21
app.py CHANGED
@@ -21,7 +21,8 @@ import io
21
  # Install flash-attn (using prebuilt wheel mode if needed)
22
  subprocess.run(
23
  'pip install flash-attn --no-build-isolation',
24
- env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': 'TRUE'}, shell=True
 
25
  )
26
 
27
  # --------------------------------------------------------------------
@@ -54,7 +55,6 @@ if not args.chunk_inference:
54
  print(f"Error downloading model: {str(e)}")
55
  model_path = os.path.join(MODEL_CACHE_DIR, MODEL_NAME)
56
  else:
57
- # In worker mode, use the passed model path (unused here)
58
  model_path = args.model_path_arg
59
 
60
  MAX_NUM_FRAMES = 64
@@ -81,7 +81,7 @@ def is_video(filename):
81
  def load_model_and_tokenizer():
82
  """Load a fresh instance of the model and tokenizer."""
83
  try:
84
- # Clear GPU memory if using CUDA
85
  if device == "cuda":
86
  torch.cuda.empty_cache()
87
  gc.collect()
@@ -122,16 +122,15 @@ def process_video_chunk(video_frames, model, tokenizer, processor, prompt):
122
  videos=videos if videos else None
123
  )
124
  inputs.to('cuda')
125
- # Update inputs: disable caching to prevent memory buildup.
126
  inputs.update({
127
  'tokenizer': tokenizer,
128
  'max_new_tokens': 100,
129
  'decode_text': True,
130
- 'use_cache': False
131
  })
132
- response = model.generate(**inputs)
133
- # Explicitly delete inputs to free memory
134
- del inputs
135
  return response[0]
136
 
137
  # --------------------------------------------------------------------
@@ -254,9 +253,10 @@ def process_image(image_path, model, tokenizer, processor, prompt):
254
  'tokenizer': tokenizer,
255
  'max_new_tokens': 100,
256
  'decode_text': True,
257
- 'use_cache': False # disable cache for memory efficiency
258
  })
259
- response = model.generate(**inputs)
 
260
  del inputs
261
  return response[0]
262
  except Exception as e:
@@ -271,9 +271,8 @@ def analyze_image_activities(image_path):
271
  "Focus on construction activities, machinery usage, and worker actions.")
272
  response = process_image(image_path, model, tokenizer, processor, prompt)
273
  del model, tokenizer, processor
274
- if device == "cuda":
275
- torch.cuda.empty_cache()
276
- gc.collect()
277
  return response
278
  except Exception as e:
279
  print(f"Error analyzing image: {str(e)}")
@@ -330,12 +329,12 @@ def annotate_video_with_bboxes(video_path):
330
  @spaces.GPU
331
  def analyze_video_activities_single_instance(video_path):
332
  """Analyze video using mPLUG model with chunking.
333
- Reuse a single mPLUG model instance for all chunks (no reloading)."""
334
  try:
335
  all_responses = []
336
  chunk_generator = encode_video_in_chunks(video_path)
337
 
338
- # Load model instance once and keep it for all chunks
339
  model, tokenizer, processor = load_model_and_tokenizer()
340
 
341
  for chunk_idx, video_frames in chunk_generator:
@@ -343,15 +342,12 @@ def analyze_video_activities_single_instance(video_path):
343
  "Analyze this construction site video chunk and describe the activities happening. "
344
  "Focus on construction activities, machinery usage, and worker actions."
345
  )
346
- # Wrap inference in torch.no_grad() to prevent gradient accumulation
347
  with torch.no_grad():
348
  response = process_video_chunk(video_frames, model, tokenizer, processor, prompt)
349
  all_responses.append(f"Time period {chunk_idx + 1}:\n{response}")
350
- # Attempt to free temporary memory after each chunk
351
- torch.cuda.empty_cache()
352
- gc.collect()
353
-
354
- # Final cleanup: free the model instance
355
  del model, tokenizer, processor
356
  torch.cuda.empty_cache()
357
  gc.collect()
 
21
  # Install flash-attn (using prebuilt wheel mode if needed)
22
  subprocess.run(
23
  'pip install flash-attn --no-build-isolation',
24
+ env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': 'TRUE'},
25
+ shell=True
26
  )
27
 
28
  # --------------------------------------------------------------------
 
55
  print(f"Error downloading model: {str(e)}")
56
  model_path = os.path.join(MODEL_CACHE_DIR, MODEL_NAME)
57
  else:
 
58
  model_path = args.model_path_arg
59
 
60
  MAX_NUM_FRAMES = 64
 
81
  def load_model_and_tokenizer():
82
  """Load a fresh instance of the model and tokenizer."""
83
  try:
84
+ # Clear GPU memory if using CUDA (only at initial load)
85
  if device == "cuda":
86
  torch.cuda.empty_cache()
87
  gc.collect()
 
122
  videos=videos if videos else None
123
  )
124
  inputs.to('cuda')
 
125
  inputs.update({
126
  'tokenizer': tokenizer,
127
  'max_new_tokens': 100,
128
  'decode_text': True,
129
+ 'use_cache': False # disable caching to reduce memory buildup
130
  })
131
+ with torch.no_grad():
132
+ response = model.generate(**inputs)
133
+ del inputs # delete inputs to free temporary memory
134
  return response[0]
135
 
136
  # --------------------------------------------------------------------
 
253
  'tokenizer': tokenizer,
254
  'max_new_tokens': 100,
255
  'decode_text': True,
256
+ 'use_cache': False
257
  })
258
+ with torch.no_grad():
259
+ response = model.generate(**inputs)
260
  del inputs
261
  return response[0]
262
  except Exception as e:
 
271
  "Focus on construction activities, machinery usage, and worker actions.")
272
  response = process_image(image_path, model, tokenizer, processor, prompt)
273
  del model, tokenizer, processor
274
+ torch.cuda.empty_cache() # Final cleanup after image processing
275
+ gc.collect()
 
276
  return response
277
  except Exception as e:
278
  print(f"Error analyzing image: {str(e)}")
 
329
  @spaces.GPU
330
  def analyze_video_activities_single_instance(video_path):
331
  """Analyze video using mPLUG model with chunking.
332
+ Use a single mPLUG model instance for all chunks without any per-chunk cleanup."""
333
  try:
334
  all_responses = []
335
  chunk_generator = encode_video_in_chunks(video_path)
336
 
337
+ # Load model instance once
338
  model, tokenizer, processor = load_model_and_tokenizer()
339
 
340
  for chunk_idx, video_frames in chunk_generator:
 
342
  "Analyze this construction site video chunk and describe the activities happening. "
343
  "Focus on construction activities, machinery usage, and worker actions."
344
  )
 
345
  with torch.no_grad():
346
  response = process_video_chunk(video_frames, model, tokenizer, processor, prompt)
347
  all_responses.append(f"Time period {chunk_idx + 1}:\n{response}")
348
+ # No per-chunk cache clearing is performed here
349
+
350
+ # Final cleanup after processing all chunks
 
 
351
  del model, tokenizer, processor
352
  torch.cuda.empty_cache()
353
  gc.collect()