Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -21,7 +21,8 @@ import io
|
|
21 |
# Install flash-attn (using prebuilt wheel mode if needed)
|
22 |
subprocess.run(
|
23 |
'pip install flash-attn --no-build-isolation',
|
24 |
-
env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': 'TRUE'},
|
|
|
25 |
)
|
26 |
|
27 |
# --------------------------------------------------------------------
|
@@ -54,7 +55,6 @@ if not args.chunk_inference:
|
|
54 |
print(f"Error downloading model: {str(e)}")
|
55 |
model_path = os.path.join(MODEL_CACHE_DIR, MODEL_NAME)
|
56 |
else:
|
57 |
-
# In worker mode, use the passed model path (unused here)
|
58 |
model_path = args.model_path_arg
|
59 |
|
60 |
MAX_NUM_FRAMES = 64
|
@@ -81,7 +81,7 @@ def is_video(filename):
|
|
81 |
def load_model_and_tokenizer():
|
82 |
"""Load a fresh instance of the model and tokenizer."""
|
83 |
try:
|
84 |
-
# Clear GPU memory if using CUDA
|
85 |
if device == "cuda":
|
86 |
torch.cuda.empty_cache()
|
87 |
gc.collect()
|
@@ -122,16 +122,15 @@ def process_video_chunk(video_frames, model, tokenizer, processor, prompt):
|
|
122 |
videos=videos if videos else None
|
123 |
)
|
124 |
inputs.to('cuda')
|
125 |
-
# Update inputs: disable caching to prevent memory buildup.
|
126 |
inputs.update({
|
127 |
'tokenizer': tokenizer,
|
128 |
'max_new_tokens': 100,
|
129 |
'decode_text': True,
|
130 |
-
'use_cache': False
|
131 |
})
|
132 |
-
|
133 |
-
|
134 |
-
del inputs
|
135 |
return response[0]
|
136 |
|
137 |
# --------------------------------------------------------------------
|
@@ -254,9 +253,10 @@ def process_image(image_path, model, tokenizer, processor, prompt):
|
|
254 |
'tokenizer': tokenizer,
|
255 |
'max_new_tokens': 100,
|
256 |
'decode_text': True,
|
257 |
-
'use_cache': False
|
258 |
})
|
259 |
-
|
|
|
260 |
del inputs
|
261 |
return response[0]
|
262 |
except Exception as e:
|
@@ -271,9 +271,8 @@ def analyze_image_activities(image_path):
|
|
271 |
"Focus on construction activities, machinery usage, and worker actions.")
|
272 |
response = process_image(image_path, model, tokenizer, processor, prompt)
|
273 |
del model, tokenizer, processor
|
274 |
-
|
275 |
-
|
276 |
-
gc.collect()
|
277 |
return response
|
278 |
except Exception as e:
|
279 |
print(f"Error analyzing image: {str(e)}")
|
@@ -330,12 +329,12 @@ def annotate_video_with_bboxes(video_path):
|
|
330 |
@spaces.GPU
|
331 |
def analyze_video_activities_single_instance(video_path):
|
332 |
"""Analyze video using mPLUG model with chunking.
|
333 |
-
|
334 |
try:
|
335 |
all_responses = []
|
336 |
chunk_generator = encode_video_in_chunks(video_path)
|
337 |
|
338 |
-
# Load model instance once
|
339 |
model, tokenizer, processor = load_model_and_tokenizer()
|
340 |
|
341 |
for chunk_idx, video_frames in chunk_generator:
|
@@ -343,15 +342,12 @@ def analyze_video_activities_single_instance(video_path):
|
|
343 |
"Analyze this construction site video chunk and describe the activities happening. "
|
344 |
"Focus on construction activities, machinery usage, and worker actions."
|
345 |
)
|
346 |
-
# Wrap inference in torch.no_grad() to prevent gradient accumulation
|
347 |
with torch.no_grad():
|
348 |
response = process_video_chunk(video_frames, model, tokenizer, processor, prompt)
|
349 |
all_responses.append(f"Time period {chunk_idx + 1}:\n{response}")
|
350 |
-
#
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
# Final cleanup: free the model instance
|
355 |
del model, tokenizer, processor
|
356 |
torch.cuda.empty_cache()
|
357 |
gc.collect()
|
|
|
21 |
# Install flash-attn (using prebuilt wheel mode if needed)
|
22 |
subprocess.run(
|
23 |
'pip install flash-attn --no-build-isolation',
|
24 |
+
env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': 'TRUE'},
|
25 |
+
shell=True
|
26 |
)
|
27 |
|
28 |
# --------------------------------------------------------------------
|
|
|
55 |
print(f"Error downloading model: {str(e)}")
|
56 |
model_path = os.path.join(MODEL_CACHE_DIR, MODEL_NAME)
|
57 |
else:
|
|
|
58 |
model_path = args.model_path_arg
|
59 |
|
60 |
MAX_NUM_FRAMES = 64
|
|
|
81 |
def load_model_and_tokenizer():
|
82 |
"""Load a fresh instance of the model and tokenizer."""
|
83 |
try:
|
84 |
+
# Clear GPU memory if using CUDA (only at initial load)
|
85 |
if device == "cuda":
|
86 |
torch.cuda.empty_cache()
|
87 |
gc.collect()
|
|
|
122 |
videos=videos if videos else None
|
123 |
)
|
124 |
inputs.to('cuda')
|
|
|
125 |
inputs.update({
|
126 |
'tokenizer': tokenizer,
|
127 |
'max_new_tokens': 100,
|
128 |
'decode_text': True,
|
129 |
+
'use_cache': False # disable caching to reduce memory buildup
|
130 |
})
|
131 |
+
with torch.no_grad():
|
132 |
+
response = model.generate(**inputs)
|
133 |
+
del inputs # delete inputs to free temporary memory
|
134 |
return response[0]
|
135 |
|
136 |
# --------------------------------------------------------------------
|
|
|
253 |
'tokenizer': tokenizer,
|
254 |
'max_new_tokens': 100,
|
255 |
'decode_text': True,
|
256 |
+
'use_cache': False
|
257 |
})
|
258 |
+
with torch.no_grad():
|
259 |
+
response = model.generate(**inputs)
|
260 |
del inputs
|
261 |
return response[0]
|
262 |
except Exception as e:
|
|
|
271 |
"Focus on construction activities, machinery usage, and worker actions.")
|
272 |
response = process_image(image_path, model, tokenizer, processor, prompt)
|
273 |
del model, tokenizer, processor
|
274 |
+
torch.cuda.empty_cache() # Final cleanup after image processing
|
275 |
+
gc.collect()
|
|
|
276 |
return response
|
277 |
except Exception as e:
|
278 |
print(f"Error analyzing image: {str(e)}")
|
|
|
329 |
@spaces.GPU
|
330 |
def analyze_video_activities_single_instance(video_path):
|
331 |
"""Analyze video using mPLUG model with chunking.
|
332 |
+
Use a single mPLUG model instance for all chunks without any per-chunk cleanup."""
|
333 |
try:
|
334 |
all_responses = []
|
335 |
chunk_generator = encode_video_in_chunks(video_path)
|
336 |
|
337 |
+
# Load model instance once
|
338 |
model, tokenizer, processor = load_model_and_tokenizer()
|
339 |
|
340 |
for chunk_idx, video_frames in chunk_generator:
|
|
|
342 |
"Analyze this construction site video chunk and describe the activities happening. "
|
343 |
"Focus on construction activities, machinery usage, and worker actions."
|
344 |
)
|
|
|
345 |
with torch.no_grad():
|
346 |
response = process_video_chunk(video_frames, model, tokenizer, processor, prompt)
|
347 |
all_responses.append(f"Time period {chunk_idx + 1}:\n{response}")
|
348 |
+
# No per-chunk cache clearing is performed here
|
349 |
+
|
350 |
+
# Final cleanup after processing all chunks
|
|
|
|
|
351 |
del model, tokenizer, processor
|
352 |
torch.cuda.empty_cache()
|
353 |
gc.collect()
|