Spaces:

Quazim0t0
/

FastVLMBoxes

Running

File size: 24,333 Bytes

d6ad5d9
1a9c884
 
 
 
 
 
 
 
 
 
5bcc8b4
 
 
 
 
 
 
1a9c884
5bcc8b4
1a9c884
 
 
 
 
 
5bcc8b4
 
5a03d8c
 
6869157
 
26adbba
 
 
c5dbb8d
1a9c884
 
 
5bcc8b4
1a9c884
c5dbb8d
1a9c884
5bcc8b4
1a9c884
 
 
bf84bd5
1a9c884
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bf84bd5
1a9c884
 
 
 
 
 
5bcc8b4
1a9c884
 
bf84bd5
1a9c884
 
 
 
 
 
 
 
 
 
 
 
 
c5dbb8d
1a9c884
 
 
 
bf84bd5
1a9c884
26adbba
c5dbb8d
1a9c884
 
c5dbb8d
1a9c884
 
 
c5dbb8d
1a9c884
 
 
 
 
bf84bd5
1a9c884
 
 
 
 
 
6869157
1a9c884
6869157
bf84bd5
1a9c884
 
 
 
 
 
 
 
 
 
 
 
 
c5dbb8d
1a9c884
 
 
 
bf84bd5
1a9c884
26adbba
c5dbb8d
1a9c884
 
c5dbb8d
1a9c884
 
 
c5dbb8d
1a9c884
 
 
 
 
bf84bd5
1a9c884
 
 
 
6869157
1a9c884
5a03d8c
 
 
5bcc8b4
 
5a03d8c
 
26adbba
 
5a03d8c
 
 
 
 
 
c5dbb8d
5a03d8c
 
 
 
 
 
26adbba
c5dbb8d
5bcc8b4
5a03d8c
c5dbb8d
5a03d8c
 
 
 
c5dbb8d
5a03d8c
 
 
 
5bcc8b4
5a03d8c
5bcc8b4
5a03d8c
5bcc8b4
 
 
 
 
 
 
 
 
 
 
 
 
 
c5dbb8d
5bcc8b4
 
 
 
 
 
26adbba
c5dbb8d
5bcc8b4
 
c5dbb8d
5bcc8b4
 
 
c5dbb8d
5bcc8b4
 
 
 
 
 
 
 
 
 
5a03d8c
5bcc8b4
6869157
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c5dbb8d
6869157
 
 
 
 
 
26adbba
c5dbb8d
6869157
 
c5dbb8d
6869157
 
 
c5dbb8d
6869157
 
 
 
 
 
 
 
 
 
 
 
5bcc8b4
5a03d8c
5bcc8b4
 
 
 
5a03d8c
 
6869157
5a03d8c
 
 
 
 
5bcc8b4
 
6869157
 
 
 
 
 
 
 
 
 
26adbba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a9c884
 
c5dbb8d
 
 
 
1a9c884
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6869157
 
 
 
 
 
1a9c884
 
 
26adbba
 
 
 
 
 
 
 
 
 
 
1a9c884
 
 
5bcc8b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5a03d8c
 
 
 
 
 
 
5bcc8b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5a03d8c
 
 
 
 
 
5bcc8b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6869157
 
 
 
 
 
5bcc8b4
 
 
 
26adbba
 
 
 
 
 
 
 
 
 
 
1a9c884

import { useState, useRef, useEffect } from "react";
import { useVLMContext } from "../context/useVLMContext";
import { extractJsonFromMarkdown, drawBoundingBoxesOnCanvas } from "./BoxAnnotator";

const MODES = ["Webcam", "URL", "File"] as const;
type Mode = typeof MODES[number];

const EXAMPLE_VIDEO_URL =
  "https://dm0qx8t0i9gc9.cloudfront.net/watermarks/video/47Fj2US_gijjhliil/large-group-of-people-walking-at-city_rpem-bqvu__f51e7e41cf28b832502c9709c8eb2fd8__P360.mp4";
const EXAMPLE_PROMPT = "Find as many objects in the video and box them.";

function isImageFile(file: File) {
  return file.type.startsWith("image/");
}
function isVideoFile(file: File) {
  return file.type.startsWith("video/");
}

export default function MultiSourceCaptioningView() {
  const [mode, setMode] = useState<Mode>("File");
  const [videoUrl, setVideoUrl] = useState<string>(EXAMPLE_VIDEO_URL);
  const [inputUrl, setInputUrl] = useState<string>(EXAMPLE_VIDEO_URL);
  const [prompt, setPrompt] = useState<string>(EXAMPLE_PROMPT);
  const [processing, setProcessing] = useState(false);
  const [error, setError] = useState<string | null>(null);
  const [webcamActive, setWebcamActive] = useState(false);
  const [uploadedFile, setUploadedFile] = useState<File | null>(null);
  const [uploadedUrl, setUploadedUrl] = useState<string>("");
  const [videoProcessing, setVideoProcessing] = useState(false);
  const [imageProcessed, setImageProcessed] = useState(false);
  const [exampleProcessing, setExampleProcessing] = useState(false);
  const [urlProcessing, setUrlProcessing] = useState(false);
  const [debugOutput, setDebugOutput] = useState<string>("");
  const [canvasDims, setCanvasDims] = useState<{w:number,h:number}|null>(null);
  const [videoDims, setVideoDims] = useState<{w:number,h:number}|null>(null);
  const [inferenceStatus, setInferenceStatus] = useState<string>("");

  const videoRef = useRef<HTMLVideoElement | null>(null);
  const canvasRef = useRef<HTMLCanvasElement | null>(null);
  const imageRef = useRef<HTMLImageElement | null>(null);
  const webcamStreamRef = useRef<MediaStream | null>(null);
  const { isLoaded, isLoading, error: modelError, runInference } = useVLMContext();

  // Webcam setup and teardown (unchanged)
  useEffect(() => {
    if (mode !== "Webcam") {
      if (webcamStreamRef.current) {
        webcamStreamRef.current.getTracks().forEach((track: MediaStreamTrack) => track.stop());
        webcamStreamRef.current = null;
      }
      setWebcamActive(false);
      return;
    }
    const setupWebcam = async () => {
      try {
        setError(null);
        const stream = await navigator.mediaDevices.getUserMedia({ video: true });
        webcamStreamRef.current = stream;
        if (videoRef.current) {
          videoRef.current.srcObject = stream;
          setWebcamActive(true);
        }
      } catch (e) {
        setError("Could not access webcam: " + (e instanceof Error ? e.message : String(e)));
        setWebcamActive(false);
      }
    };
    setupWebcam();
    return () => {
      if (webcamStreamRef.current) {
        webcamStreamRef.current.getTracks().forEach((track: MediaStreamTrack) => track.stop());
        webcamStreamRef.current = null;
      }
      setWebcamActive(false);
    };
  }, [mode]);

  // Process webcam frames (unchanged)
  useEffect(() => {
    if (mode !== "Webcam" || !isLoaded || !webcamActive) return;
    let interval: ReturnType<typeof setInterval> | null = null;
    const processFrame = async () => {
      if (!videoRef.current || !canvasRef.current) return;
      const video = videoRef.current;
      const canvas = canvasRef.current;
      if (video.videoWidth === 0) return;
      canvas.width = video.videoWidth;
      canvas.height = video.videoHeight;
      const ctx = canvas.getContext("2d");
      if (!ctx) return;
      ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
      try {
        setProcessing(true);
        setError(null);
        setInferenceStatus("Running inference...");
        const fakeVideo = {
          videoWidth: canvas.width,
          videoHeight: canvas.height,
          getContext: () => ctx,
        } as unknown as HTMLVideoElement;
        const result = await runInference(fakeVideo, prompt);
        setDebugOutput(result);
        setInferenceStatus("Inference complete.");
        ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
        const boxes = extractJsonFromMarkdown(result) || [];
        if (boxes.length === 0) setInferenceStatus("No boxes detected or model output invalid.");
        drawBoundingBoxesOnCanvas(ctx, boxes);
      } catch (e) {
        setError(e instanceof Error ? e.message : String(e));
        setInferenceStatus("Inference error: " + (e instanceof Error ? e.message : String(e)));
      } finally {
        setProcessing(false);
      }
    };
    interval = setInterval(() => {
      processFrame();
    }, 1000);
    return () => {
      if (interval) clearInterval(interval);
    };
  }, [mode, isLoaded, prompt, runInference, webcamActive]);

  // URL mode: process video frames only when urlProcessing is true
  useEffect(() => {
    if (mode !== "URL" || !isLoaded || !urlProcessing) return;
    let interval: ReturnType<typeof setInterval> | null = null;
    const processFrame = async () => {
      if (!videoRef.current || !canvasRef.current) return;
      const video = videoRef.current;
      const canvas = canvasRef.current;
      if (video.paused || video.ended || video.videoWidth === 0) return;
      canvas.width = video.videoWidth;
      canvas.height = video.videoHeight;
      const ctx = canvas.getContext("2d");
      if (!ctx) return;
      ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
      try {
        setProcessing(true);
        setError(null);
        setInferenceStatus("Running inference...");
        const fakeVideo = {
          videoWidth: canvas.width,
          videoHeight: canvas.height,
          getContext: () => ctx,
        } as unknown as HTMLVideoElement;
        const result = await runInference(fakeVideo, prompt);
        setDebugOutput(result);
        setInferenceStatus("Inference complete.");
        ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
        const boxes = extractJsonFromMarkdown(result) || [];
        if (boxes.length === 0) setInferenceStatus("No boxes detected or model output invalid.");
        drawBoundingBoxesOnCanvas(ctx, boxes);
      } catch (e) {
        setError(e instanceof Error ? e.message : String(e));
        setInferenceStatus("Inference error: " + (e instanceof Error ? e.message : String(e)));
      } finally {
        setProcessing(false);
      }
    };
    interval = setInterval(() => {
      processFrame();
    }, 1000);
    return () => {
      if (interval) clearInterval(interval);
    };
  }, [mode, isLoaded, prompt, runInference, urlProcessing]);

  // File mode: process uploaded image (only on button click)
  const handleProcessImage = async () => {
    if (!isLoaded || !uploadedFile || !isImageFile(uploadedFile) || !imageRef.current || !canvasRef.current) return;
    const img = imageRef.current;
    const canvas = canvasRef.current;
    canvas.width = img.naturalWidth;
    canvas.height = img.naturalHeight;
    setCanvasDims({w:canvas.width,h:canvas.height});
    setVideoDims({w:img.naturalWidth,h:img.naturalHeight});
    const ctx = canvas.getContext("2d");
    if (!ctx) return;
    ctx.drawImage(img, 0, 0, canvas.width, canvas.height);
    try {
      setProcessing(true);
      setError(null);
      setInferenceStatus("Running inference...");
      const fakeVideo = {
        videoWidth: canvas.width,
        videoHeight: canvas.height,
        getContext: () => ctx,
      } as unknown as HTMLVideoElement;
      const result = await runInference(fakeVideo, prompt);
      setDebugOutput(result);
      setInferenceStatus("Inference complete.");
      ctx.drawImage(img, 0, 0, canvas.width, canvas.height);
      const boxes = extractJsonFromMarkdown(result) || [];
      if (boxes.length === 0) setInferenceStatus("No boxes detected or model output invalid.");
      drawBoundingBoxesOnCanvas(ctx, boxes);
      setImageProcessed(true);
    } catch (e) {
      setError(e instanceof Error ? e.message : String(e));
      setInferenceStatus("Inference error: " + (e instanceof Error ? e.message : String(e)));
    } finally {
      setProcessing(false);
    }
  };

  // File mode: process uploaded video frames (start/stop)
  useEffect(() => {
    if (mode !== "File" || !isLoaded || !uploadedFile || !isVideoFile(uploadedFile) || !videoProcessing) return;
    let interval: ReturnType<typeof setInterval> | null = null;
    const processFrame = async () => {
      if (!videoRef.current || !canvasRef.current) return;
      const video = videoRef.current;
      const canvas = canvasRef.current;
      if (video.paused || video.ended || video.videoWidth === 0) return;
      canvas.width = video.videoWidth;
      canvas.height = video.videoHeight;
      const ctx = canvas.getContext("2d");
      if (!ctx) return;
      ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
      try {
        setProcessing(true);
        setError(null);
        setInferenceStatus("Running inference...");
        const fakeVideo = {
          videoWidth: canvas.width,
          videoHeight: canvas.height,
          getContext: () => ctx,
        } as unknown as HTMLVideoElement;
        const result = await runInference(fakeVideo, prompt);
        setDebugOutput(result);
        setInferenceStatus("Inference complete.");
        ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
        const boxes = extractJsonFromMarkdown(result) || [];
        if (boxes.length === 0) setInferenceStatus("No boxes detected or model output invalid.");
        drawBoundingBoxesOnCanvas(ctx, boxes);
      } catch (e) {
        setError(e instanceof Error ? e.message : String(e));
        setInferenceStatus("Inference error: " + (e instanceof Error ? e.message : String(e)));
      } finally {
        setProcessing(false);
      }
    };
    interval = setInterval(() => {
      processFrame();
    }, 1000);
    return () => {
      if (interval) clearInterval(interval);
    };
  }, [mode, isLoaded, prompt, runInference, uploadedFile, videoProcessing]);

  // File mode: process example video frames (start/stop)
  useEffect(() => {
    if (mode !== "File" || uploadedFile || !isLoaded || !exampleProcessing) return;
    let interval: ReturnType<typeof setInterval> | null = null;
    const processFrame = async () => {
      if (!videoRef.current || !canvasRef.current) return;
      const video = videoRef.current;
      const canvas = canvasRef.current;
      if (video.paused || video.ended || video.videoWidth === 0) return;
      canvas.width = video.videoWidth;
      canvas.height = video.videoHeight;
      const ctx = canvas.getContext("2d");
      if (!ctx) return;
      ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
      try {
        setProcessing(true);
        setError(null);
        setInferenceStatus("Running inference...");
        const fakeVideo = {
          videoWidth: canvas.width,
          videoHeight: canvas.height,
          getContext: () => ctx,
        } as unknown as HTMLVideoElement;
        const result = await runInference(fakeVideo, prompt);
        setDebugOutput(result);
        setInferenceStatus("Inference complete.");
        ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
        const boxes = extractJsonFromMarkdown(result) || [];
        if (boxes.length === 0) setInferenceStatus("No boxes detected or model output invalid.");
        drawBoundingBoxesOnCanvas(ctx, boxes);
      } catch (e) {
        setError(e instanceof Error ? e.message : String(e));
        setInferenceStatus("Inference error: " + (e instanceof Error ? e.message : String(e)));
      } finally {
        setProcessing(false);
      }
    };
    interval = setInterval(() => {
      processFrame();
    }, 1000);
    return () => {
      if (interval) clearInterval(interval);
    };
  }, [mode, isLoaded, prompt, runInference, uploadedFile, exampleProcessing]);

  // Handle file upload
  const handleFileChange = (e: any) => {
    const file = e.target.files?.[0] || null;
    setUploadedFile(file);
    setUploadedUrl(file ? URL.createObjectURL(file) : "");
    setError(null);
    setImageProcessed(false);
    setVideoProcessing(false);
    setExampleProcessing(false);
  };

  // Handle start/stop for video processing
  const handleToggleVideoProcessing = () => {
    setVideoProcessing((prev) => !prev);
  };

  // Handle start/stop for example video processing
  const handleToggleExampleProcessing = () => {
    setExampleProcessing((prev) => !prev);
  };

  // Handle start/stop for URL video processing
  const handleToggleUrlProcessing = () => {
    setUrlProcessing((prev) => !prev);
  };

  // Test draw box function
  const handleTestDrawBox = () => {
    if (!canvasRef.current) return;
    const canvas = canvasRef.current;
    const ctx = canvas.getContext("2d");
    if (!ctx) return;
    ctx.clearRect(0, 0, canvas.width, canvas.height);
    ctx.strokeStyle = "#FF00FF";
    ctx.lineWidth = 4;
    ctx.strokeRect(40, 40, Math.max(40,canvas.width/4), Math.max(40,canvas.height/4));
    ctx.font = "20px Arial";
    ctx.fillStyle = "#FF00FF";
    ctx.fillText("Test Box", 50, 35);
  };

  return (
    <div className="absolute inset-0 text-white">

      <div className="fixed top-0 left-0 w-full bg-gray-900 text-white text-center py-2 z-50">

        {isLoading ? "Loading model..." : isLoaded ? "Model loaded" : modelError ? `Model error: ${modelError}` : "Model not loaded"}

      </div>

      <div className="text-center text-sm text-blue-300 mt-2">{inferenceStatus}</div>

      <div className="flex flex-col items-center justify-center h-full w-full">

        {/* Mode Selector */}

        <div className="mb-6">

          <div className="flex space-x-4">

            {MODES.map((m) => (

              <button

                key={m}

                className={`px-6 py-2 rounded-lg font-semibold transition-all duration-200 ${

                  mode === m ? "bg-blue-600 text-white" : "bg-gray-700 text-gray-300 hover:bg-blue-500"

                }`}

                onClick={() => setMode(m)}

              >

                {m}

              </button>

            ))}

          </div>

        </div>



        {/* Mode Content */}

        <div className="w-full max-w-2xl flex-1 flex flex-col items-center justify-center">

          {mode === "Webcam" && (

            <div className="w-full text-center flex flex-col items-center">

              <div className="mb-4 w-full max-w-xl">

                <label className="block text-left mb-2 font-medium">Detection Prompt:</label>

                <textarea

                  className="w-full p-2 rounded-lg text-black"

                  rows={3}

                  value={prompt}

                  onChange={(e) => setPrompt(e.target.value)}

                />

              </div>

              <div className="relative w-full max-w-xl">

                <video

                  ref={videoRef}

                  autoPlay

                  muted

                  playsInline

                  className="w-full rounded-lg shadow-lg mb-2"

                  style={{ background: "#222" }}

                />

                <canvas

                  ref={canvasRef}

                  className="absolute top-0 left-0 w-full h-full pointer-events-none"

                  style={{ zIndex: 10, pointerEvents: "none" }}

                />

              </div>

              {processing && <div className="text-blue-400 mt-2">Processing frame...</div>}

              {error && <div className="text-red-400 mt-2">Error: {error}</div>}

            </div>

          )}

          {mode === "URL" && (

            <div className="w-full text-center flex flex-col items-center">

              <p className="mb-4">Enter a video stream URL (e.g., HTTP MP4, MJPEG, HLS, etc.):</p>

              <div className="flex w-full max-w-xl mb-4">

                <input

                  type="text"

                  className="flex-1 px-4 py-2 rounded-l-lg text-black"

                  value={inputUrl}

                  onChange={(e) => setInputUrl(e.target.value)}

                  placeholder="Paste video URL here"

                />

                <button

                  className="px-4 py-2 rounded-r-lg bg-blue-600 text-white font-semibold"

                  onClick={() => setVideoUrl(inputUrl)}

                >

                  Load

                </button>

              </div>

              <div className="mb-4 w-full max-w-xl">

                <label className="block text-left mb-2 font-medium">Detection Prompt:</label>

                <textarea

                  className="w-full p-2 rounded-lg text-black"

                  rows={3}

                  value={prompt}

                  onChange={(e) => setPrompt(e.target.value)}

                />

              </div>

              <div className="relative w-full max-w-xl">

                <video

                  ref={videoRef}

                  src={videoUrl}

                  controls

                  autoPlay

                  loop

                  className="w-full rounded-lg shadow-lg mb-2"

                  style={{ background: "#222" }}

                />

                <canvas

                  ref={canvasRef}

                  className="absolute top-0 left-0 w-full h-full pointer-events-none"

                  style={{ zIndex: 10, pointerEvents: "none" }}

                />

                <button

                  className="mt-4 px-6 py-2 rounded-lg bg-blue-600 text-white font-semibold"

                  onClick={handleToggleUrlProcessing}

                >

                  {urlProcessing ? "Stop Processing" : "Start Processing"}

                </button>

              </div>

              {processing && <div className="text-blue-400 mt-2">Processing frame...</div>}

              {error && <div className="text-red-400 mt-2">Error: {error}</div>}

              <button

                className="mt-4 px-6 py-2 rounded-lg bg-gray-600 text-white font-semibold"

                onClick={handleTestDrawBox}

              >

                Test Draw Box

              </button>

              <div className="mt-2 p-2 bg-gray-800 rounded text-xs">

                <div>Canvas: {canvasDims ? `${canvasDims.w}x${canvasDims.h}` : "-"} | Video: {videoDims ? `${videoDims.w}x${videoDims.h}` : "-"}</div>

                <div>Raw Model Output:</div>

                <pre className="overflow-x-auto max-h-32 whitespace-pre-wrap">{debugOutput}</pre>

              </div>

            </div>

          )}

          {mode === "File" && (

            <div className="w-full text-center flex flex-col items-center">

              <div className="mb-4 w-full max-w-xl">

                <label className="block text-left mb-2 font-medium">Detection Prompt:</label>

                <textarea

                  className="w-full p-2 rounded-lg text-black"

                  rows={3}

                  value={prompt}

                  onChange={(e) => setPrompt(e.target.value)}

                />

              </div>

              <div className="mb-4 w-full max-w-xl">

                <input

                  type="file"

                  accept="image/*,video/*"

                  onChange={handleFileChange}

                  className="block w-full text-sm text-gray-300 file:mr-4 file:py-2 file:px-4 file:rounded-lg file:border-0 file:text-sm file:font-semibold file:bg-blue-600 file:text-white hover:file:bg-blue-700"

                />

              </div>

              {/* Show uploaded image */}

              {uploadedFile && isImageFile(uploadedFile) && (

                <div className="relative w-full max-w-xl">

                  <img

                    ref={imageRef}

                    src={uploadedUrl}

                    alt="Uploaded"

                    className="w-full rounded-lg shadow-lg mb-2"

                    style={{ background: "#222" }}

                  />

                  <canvas

                    ref={canvasRef}

                    className="absolute top-0 left-0 w-full h-full pointer-events-none"

                    style={{ zIndex: 10, pointerEvents: "none" }}

                  />

                  <button

                    className="mt-4 px-6 py-2 rounded-lg bg-blue-600 text-white font-semibold"

                    onClick={handleProcessImage}

                    disabled={processing}

                  >

                    {processing ? "Processing..." : imageProcessed ? "Reprocess Image" : "Process Image"}

                  </button>

                </div>

              )}

              {/* Show uploaded video */}

              {uploadedFile && isVideoFile(uploadedFile) && (

                <div className="relative w-full max-w-xl">

                  <video

                    ref={videoRef}

                    src={uploadedUrl}

                    controls

                    autoPlay

                    loop

                    className="w-full rounded-lg shadow-lg mb-2"

                    style={{ background: "#222" }}

                  />

                  <canvas

                    ref={canvasRef}

                    className="absolute top-0 left-0 w-full h-full pointer-events-none"

                    style={{ zIndex: 10, pointerEvents: "none" }}

                  />

                  <button

                    className="mt-4 px-6 py-2 rounded-lg bg-blue-600 text-white font-semibold"

                    onClick={handleToggleVideoProcessing}

                  >

                    {videoProcessing ? "Stop Processing" : "Start Processing"}

                  </button>

                </div>

              )}

              {/* Show example video if no file uploaded */}

              {!uploadedFile && (

                <div className="relative w-full max-w-xl">

                  <video

                    ref={videoRef}

                    src={EXAMPLE_VIDEO_URL}

                    controls

                    autoPlay

                    loop

                    className="w-full rounded-lg shadow-lg mb-2"

                    style={{ background: "#222" }}

                  />

                  <canvas

                    ref={canvasRef}

                    className="absolute top-0 left-0 w-full h-full pointer-events-none"

                    style={{ zIndex: 10, pointerEvents: "none" }}

                  />

                  <button

                    className="mt-4 px-6 py-2 rounded-lg bg-blue-600 text-white font-semibold"

                    onClick={handleToggleExampleProcessing}

                  >

                    {exampleProcessing ? "Stop Processing" : "Start Processing"}

                  </button>

                </div>

              )}

              {processing && <div className="text-blue-400 mt-2">Processing frame...</div>}

              {error && <div className="text-red-400 mt-2">Error: {error}</div>}

              <button

                className="mt-4 px-6 py-2 rounded-lg bg-gray-600 text-white font-semibold"

                onClick={handleTestDrawBox}

              >

                Test Draw Box

              </button>

              <div className="mt-2 p-2 bg-gray-800 rounded text-xs">

                <div>Canvas: {canvasDims ? `${canvasDims.w}x${canvasDims.h}` : "-"} | Video: {videoDims ? `${videoDims.w}x${videoDims.h}` : "-"}</div>

                <div>Raw Model Output:</div>

                <pre className="overflow-x-auto max-h-32 whitespace-pre-wrap">{debugOutput}</pre>

              </div>

            </div>

          )}

        </div>

      </div>

    </div>
  );
}