V3Test

Sleeping

App Files Files Community

assentian1970 commited on Mar 5

Commit

6d1a54e

verified ·

1 Parent(s): aa25b19

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -26

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
 import spaces
 import torch
 import argparse
@@ -8,6 +7,7 @@ import pickle  # For serializing frames
 import gc
 import tempfile
 import subprocess
 from datetime import datetime
 from transformers import AutoModel, AutoTokenizer
 from modelscope.hub.snapshot_download import snapshot_download
@@ -25,6 +25,7 @@ subprocess.run(
     env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': 'TRUE'},
     shell=True
 )
 # --------------------------------------------------------------------
 # Command-line arguments
 # --------------------------------------------------------------------
@@ -32,7 +33,7 @@ parser = argparse.ArgumentParser(description='demo')
 parser.add_argument('--device', type=str, default='cuda', help='cuda or mps')
 parser.add_argument("--host", type=str, default="0.0.0.0")
 parser.add_argument("--port", type=int)
-# New arguments for subprocess inference (unused in this version)
 parser.add_argument("--chunk_inference", action="store_true", help="Run inference on a chunk (subprocess mode).")
 parser.add_argument("--input_file", type=str, help="Path to serialized input chunk frames.")
 parser.add_argument("--output_file", type=str, help="Path to file where inference result is written.")
@@ -130,11 +131,11 @@ def process_video_chunk(video_frames, model, tokenizer, processor, prompt):
     })
     with torch.no_grad():
         response = model.generate(**inputs)
-    del inputs  # delete inputs to free temporary memory
     return response[0]
 # --------------------------------------------------------------------
-# Video and YOLO functions (unchanged)
 # --------------------------------------------------------------------
 def encode_video_in_chunks(video_path):
     """Extract frames from a video in chunks."""
@@ -324,36 +325,78 @@ def annotate_video_with_bboxes(video_path):
     return annotated_video_path
 # --------------------------------------------------------------------
-# Adjusted Video Analysis with Single mPLUG Instance (No Reload)
 # --------------------------------------------------------------------
 @spaces.GPU
-def analyze_video_activities_single_instance(video_path):
-    """Analyze video using mPLUG model with chunking.
-       Use a single mPLUG model instance for all chunks without any per-chunk cleanup."""
     try:
         all_responses = []
         chunk_generator = encode_video_in_chunks(video_path)
-        # Load model instance once
-        model, tokenizer, processor = load_model_and_tokenizer()
         for chunk_idx, video_frames in chunk_generator:
-            prompt = (
-                "Analyze this construction site video chunk and describe the activities happening. "
-                "Focus on construction activities, machinery usage, and worker actions."
-            )
-            with torch.no_grad():
-                response = process_video_chunk(video_frames, model, tokenizer, processor, prompt)
             all_responses.append(f"Time period {chunk_idx + 1}:\n{response}")
-            # No per-chunk cache clearing is performed here
-        # Final cleanup after processing all chunks
-        del model, tokenizer, processor
-        torch.cuda.empty_cache()
-        gc.collect()
         return "\n\n".join(all_responses)
     except Exception as e:
-        print(f"Error analyzing video: {str(e)}")
         return "Error analyzing video activities"
 # --------------------------------------------------------------------
@@ -383,7 +426,8 @@ def process_diary(day, date, total_people, total_machinery, machinery_types, act
         if is_image(media.name):
             detected_activities = analyze_image_activities(temp_path)
         else:
-            detected_activities = analyze_video_activities_single_instance(temp_path)
             annotated_video_path = annotate_video_with_bboxes(temp_path)
         if os.path.exists(temp_path):
             os.remove(temp_path)

 import spaces
 import torch
 import argparse
 import gc
 import tempfile
 import subprocess
+import time
 from datetime import datetime
 from transformers import AutoModel, AutoTokenizer
 from modelscope.hub.snapshot_download import snapshot_download
     env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': 'TRUE'},
     shell=True
 )
 # --------------------------------------------------------------------
 # Command-line arguments
 # --------------------------------------------------------------------
 parser.add_argument('--device', type=str, default='cuda', help='cuda or mps')
 parser.add_argument("--host", type=str, default="0.0.0.0")
 parser.add_argument("--port", type=int)
+# Arguments for subprocess inference mode
 parser.add_argument("--chunk_inference", action="store_true", help="Run inference on a chunk (subprocess mode).")
 parser.add_argument("--input_file", type=str, help="Path to serialized input chunk frames.")
 parser.add_argument("--output_file", type=str, help="Path to file where inference result is written.")
     })
     with torch.no_grad():
         response = model.generate(**inputs)
+    del inputs  # Free temporary memory
     return response[0]
 # --------------------------------------------------------------------
+# Video and YOLO Functions (Unchanged)
 # --------------------------------------------------------------------
 def encode_video_in_chunks(video_path):
     """Extract frames from a video in chunks."""
     return annotated_video_path
 # --------------------------------------------------------------------
+# Subprocess Worker: Executed when --chunk_inference flag is provided
+# --------------------------------------------------------------------
+if args.chunk_inference:
+    # In worker mode, load the serialized frames from the input file
+    try:
+        with open(args.input_file, "rb") as f:
+            frames_serialized = pickle.load(f)
+        video_frames = []
+        for img_bytes in frames_serialized:
+            video_frames.append(Image.open(io.BytesIO(img_bytes)))
+    except Exception as e:
+        print(f"Error reading input frames: {str(e)}")
+        sys.exit(1)
+    try:
+        model, tokenizer, processor = load_model_and_tokenizer()
+        response = process_video_chunk(video_frames, model, tokenizer, processor, args.inference_prompt)
+        with open(args.output_file, "w") as f:
+            f.write(response)
+        del model, tokenizer, processor
+        torch.cuda.empty_cache()
+        gc.collect()
+    except Exception as e:
+        with open(args.output_file, "w") as f:
+            f.write(f"Error in chunk inference: {str(e)}")
+    sys.exit(0)
+# --------------------------------------------------------------------
+# Main Video Analysis Function Using Subprocess Isolation
 # --------------------------------------------------------------------
 @spaces.GPU
+def analyze_video_activities_subprocess(video_path):
+    """Analyze video by processing each chunk in a separate subprocess.
+       Each subprocess loads a fresh model instance to avoid GPU memory buildup."""
     try:
         all_responses = []
         chunk_generator = encode_video_in_chunks(video_path)
         for chunk_idx, video_frames in chunk_generator:
+            # Serialize each frame in the chunk to bytes
+            temp_input = tempfile.NamedTemporaryFile(suffix=".pkl", delete=False)
+            frames_serializable = []
+            for img in video_frames:
+                with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tf:
+                    img.save(tf, format="PNG")
+                    tf.seek(0)
+                    frames_serializable.append(tf.read())
+                os.remove(tf.name)
+            with open(temp_input.name, "wb") as f:
+                pickle.dump(frames_serializable, f)
+            # Create a temporary file for subprocess output
+            temp_output = tempfile.NamedTemporaryFile(suffix=".txt", delete=False)
+            temp_output.close()
+            prompt = ("Analyze this construction site video chunk and describe the activities happening. "
+                      "Focus on construction activities, machinery usage, and worker actions.")
+            # Launch subprocess for this chunk
+            subprocess.run([
+                sys.executable, __file__,
+                "--chunk_inference",
+                "--input_file", temp_input.name,
+                "--output_file", temp_output.name,
+                "--inference_prompt", prompt,
+                "--model_path_arg", model_path,
+                "--device", device
+            ], check=True)
+            with open(temp_output.name, "r") as f:
+                response = f.read().strip()
             all_responses.append(f"Time period {chunk_idx + 1}:\n{response}")
+            os.remove(temp_input.name)
+            os.remove(temp_output.name)
+            time.sleep(2)  # Allow time for GPU memory to fully clear before next chunk
         return "\n\n".join(all_responses)
     except Exception as e:
+        print(f"Error in subprocess chunk inference: {str(e)}")
         return "Error analyzing video activities"
 # --------------------------------------------------------------------
         if is_image(media.name):
             detected_activities = analyze_image_activities(temp_path)
         else:
+            # Use the subprocess-based video analysis for each chunk
+            detected_activities = analyze_video_activities_subprocess(temp_path)
             annotated_video_path = annotate_video_with_bboxes(temp_path)
         if os.path.exists(temp_path):
             os.remove(temp_path)