Quazim0t0 commited on
Commit
050d9a1
·
verified ·
1 Parent(s): eff3f6b

Upload 51 files

Browse files
src/components/MultiSourceCaptioningView.tsx CHANGED
@@ -1,4 +1,4 @@
1
- import React, { useState, useRef, useEffect } from "react";
2
  import { useVLMContext } from "../context/useVLMContext";
3
  import { extractJsonFromMarkdown, drawBoundingBoxesOnCanvas } from "./BoxAnnotator";
4
 
@@ -82,55 +82,12 @@ export default function MultiSourceCaptioningView() {
82
  const [inferenceStatus, setInferenceStatus] = useState<string>("");
83
 
84
  const videoRef = useRef<HTMLVideoElement | null>(null);
85
- const overlayVideoRef = useRef<HTMLVideoElement | null>(null); // NEW: overlay video
86
  const canvasRef = useRef<HTMLCanvasElement | null>(null);
87
  const imageRef = useRef<HTMLImageElement | null>(null);
88
  const webcamStreamRef = useRef<MediaStream | null>(null);
89
  const { isLoaded, isLoading, error: modelError, runInference } = useVLMContext();
90
 
91
- // Persistent boxes state: {boxes: [...], timestamp: number}
92
- const [persistentBoxes, setPersistentBoxes] = useState<{boxes: {label: string, bbox_2d: number[]}[], timestamp: number}[]>([]);
93
- const BOX_PERSIST_MS = 2000; // 2 seconds
94
-
95
- // Helper: Add new boxes with timestamp
96
- const addBoxesWithTimestamp = (boxes: {label: string, bbox_2d: number[]}[]) => {
97
- if (!boxes || boxes.length === 0) return;
98
- setPersistentBoxes((prev: {boxes: {label: string, bbox_2d: number[]}[], timestamp: number}[]) => [
99
- ...prev.filter((entry: {boxes: {label: string, bbox_2d: number[]}[], timestamp: number}) => Date.now() - entry.timestamp < BOX_PERSIST_MS),
100
- { boxes, timestamp: Date.now() }
101
- ]);
102
- };
103
-
104
- // Helper: Get all boxes from last 2 seconds
105
- const getCurrentBoxes = () => {
106
- const now = Date.now();
107
- return persistentBoxes
108
- .filter((entry: {boxes: {label: string, bbox_2d: number[]}[], timestamp: number}) => now - entry.timestamp < BOX_PERSIST_MS)
109
- .flatMap((entry: {boxes: {label: string, bbox_2d: number[]}[], timestamp: number}) => entry.boxes);
110
- };
111
-
112
- // Synchronize overlay video with main video
113
- useEffect(() => {
114
- const main = videoRef.current;
115
- const overlay = overlayVideoRef.current;
116
- if (!main || !overlay) return;
117
- // Sync play/pause
118
- main.addEventListener('play', () => overlay.play());
119
- main.addEventListener('pause', () => overlay.pause());
120
- // Sync seeking
121
- const syncTime = () => { if (Math.abs(main.currentTime - overlay.currentTime) > 0.05) overlay.currentTime = main.currentTime; };
122
- main.addEventListener('seeked', syncTime);
123
- main.addEventListener('timeupdate', syncTime);
124
- // Clean up
125
- return () => {
126
- main.removeEventListener('play', () => overlay.play());
127
- main.removeEventListener('pause', () => overlay.pause());
128
- main.removeEventListener('seeked', syncTime);
129
- main.removeEventListener('timeupdate', syncTime);
130
- };
131
- }, [videoRef, overlayVideoRef, uploadedUrl, videoUrl, mode]);
132
-
133
- // Update: processVideoFrame now adds boxes to persistentBoxes
134
  const processVideoFrame = async () => {
135
  if (!videoRef.current || !canvasRef.current) return;
136
  const video = videoRef.current;
@@ -142,13 +99,12 @@ export default function MultiSourceCaptioningView() {
142
  if (!ctx) return;
143
  ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
144
  await runInference(video, prompt, (output: string) => {
145
- setDebugOutput(output);
146
  let boxes = extractJsonFromMarkdown(output) || [];
147
  if (boxes.length === 0 && Array.isArray(output)) {
148
  boxes = parseFlatBoxArray(output);
149
  }
150
  boxes = normalizeBoxes(boxes);
151
- // Restore debug logging
152
  console.log("Model output:", output);
153
  console.log("Boxes after normalization:", boxes);
154
  console.log("Canvas size:", canvas.width, canvas.height);
@@ -158,35 +114,13 @@ export default function MultiSourceCaptioningView() {
158
  }
159
  if (boxes.length === 0) setInferenceStatus("No boxes detected or model output invalid.");
160
  if (Array.isArray(boxes) && boxes.length > 0) {
161
- addBoxesWithTimestamp(boxes); // <-- Add to persistent state
162
- }
163
- });
164
- };
165
-
166
- // Draw persistent boxes on every frame
167
- useEffect(() => {
168
- const draw = () => {
169
- // Use overlay video for dimensions if available, else fallback to main video
170
- const video = overlayVideoRef.current || videoRef.current;
171
- if (!video || !canvasRef.current) return;
172
- if (video.videoWidth === 0) return;
173
- const canvas = canvasRef.current;
174
- canvas.width = video.videoWidth;
175
- canvas.height = video.videoHeight;
176
- const ctx = canvas.getContext("2d");
177
- if (!ctx) return;
178
- ctx.clearRect(0, 0, canvas.width, canvas.height);
179
- const boxes = getCurrentBoxes();
180
- if (boxes.length > 0) {
181
  const scaleX = canvas.width / video.videoWidth;
182
  const scaleY = canvas.height / video.videoHeight;
183
- drawBoundingBoxesOnCanvas(ctx, boxes, { color: "#FF00FF", lineWidth: 4, font: "20px Arial", scaleX, scaleY });
 
184
  }
185
- };
186
- draw();
187
- const interval = setInterval(draw, 100);
188
- return () => clearInterval(interval);
189
- }, [persistentBoxes, videoRef, overlayVideoRef, canvasRef]);
190
 
191
  const handleFileChange = (e: React.ChangeEvent<HTMLInputElement>) => {
192
  const file = e.target.files?.[0] || null;
@@ -442,7 +376,7 @@ export default function MultiSourceCaptioningView() {
442
  controls
443
  autoPlay
444
  loop
445
- className="w-full rounded-lg shadow-lg mb-2 absolute top-0 left-0 z-0"
446
  style={{ background: "#222" }}
447
  />
448
  <video
@@ -452,8 +386,8 @@ export default function MultiSourceCaptioningView() {
452
  autoPlay
453
  loop
454
  muted
455
- className="w-full rounded-lg shadow-lg mb-2 absolute top-0 left-0 z-10 opacity-60 pointer-events-none"
456
- style={{ background: "#222" }}
457
  />
458
  <canvas
459
  ref={canvasRef}
@@ -461,7 +395,7 @@ export default function MultiSourceCaptioningView() {
461
  style={{ zIndex: 20, pointerEvents: "none" }}
462
  />
463
  <button
464
- className="mt-4 px-6 py-2 rounded-lg bg-blue-600 text-white font-semibold relative z-30"
465
  onClick={handleToggleUrlProcessing}
466
  >
467
  {urlProcessing ? "Stop Processing" : "Start Processing"}
@@ -475,6 +409,11 @@ export default function MultiSourceCaptioningView() {
475
  >
476
  Test Draw Box
477
  </button>
 
 
 
 
 
478
  </div>
479
  )}
480
  {mode === "File" && (
@@ -529,7 +468,7 @@ export default function MultiSourceCaptioningView() {
529
  controls
530
  autoPlay
531
  loop
532
- className="w-full rounded-lg shadow-lg mb-2 absolute top-0 left-0 z-0"
533
  style={{ background: "#222" }}
534
  />
535
  <video
@@ -539,8 +478,8 @@ export default function MultiSourceCaptioningView() {
539
  autoPlay
540
  loop
541
  muted
542
- className="w-full rounded-lg shadow-lg mb-2 absolute top-0 left-0 z-10 opacity-60 pointer-events-none"
543
- style={{ background: "#222" }}
544
  />
545
  <canvas
546
  ref={canvasRef}
@@ -548,7 +487,7 @@ export default function MultiSourceCaptioningView() {
548
  style={{ zIndex: 20, pointerEvents: "none" }}
549
  />
550
  <button
551
- className="mt-4 px-6 py-2 rounded-lg bg-blue-600 text-white font-semibold relative z-30"
552
  onClick={handleToggleVideoProcessing}
553
  >
554
  {videoProcessing ? "Stop Processing" : "Start Processing"}
@@ -564,7 +503,7 @@ export default function MultiSourceCaptioningView() {
564
  controls
565
  autoPlay
566
  loop
567
- className="w-full rounded-lg shadow-lg mb-2 absolute top-0 left-0 z-0"
568
  style={{ background: "#222" }}
569
  />
570
  <video
@@ -574,8 +513,8 @@ export default function MultiSourceCaptioningView() {
574
  autoPlay
575
  loop
576
  muted
577
- className="w-full rounded-lg shadow-lg mb-2 absolute top-0 left-0 z-10 opacity-60 pointer-events-none"
578
- style={{ background: "#222" }}
579
  />
580
  <canvas
581
  ref={canvasRef}
@@ -583,7 +522,7 @@ export default function MultiSourceCaptioningView() {
583
  style={{ zIndex: 20, pointerEvents: "none" }}
584
  />
585
  <button
586
- className="mt-4 px-6 py-2 rounded-lg bg-blue-600 text-white font-semibold relative z-30"
587
  onClick={handleToggleExampleProcessing}
588
  >
589
  {exampleProcessing ? "Stop Processing" : "Start Processing"}
@@ -598,14 +537,13 @@ export default function MultiSourceCaptioningView() {
598
  >
599
  Test Draw Box
600
  </button>
 
 
 
 
 
601
  </div>
602
  )}
603
- {/* Always show Raw Model Output at the bottom */}
604
- <div className="mt-2 p-2 bg-gray-800 rounded text-xs w-full max-w-xl">
605
- <div>Canvas: {canvasDims ? `${canvasDims.w}x${canvasDims.h}` : "-"} | Video: {videoDims ? `${videoDims.w}x${videoDims.h}` : "-"}</div>
606
- <div>Raw Model Output:</div>
607
- <pre className="overflow-x-auto max-h-32 whitespace-pre-wrap">{debugOutput}</pre>
608
- </div>
609
  </div>
610
  </div>
611
  </div>
 
1
+ import { useState, useRef, useEffect } from "react";
2
  import { useVLMContext } from "../context/useVLMContext";
3
  import { extractJsonFromMarkdown, drawBoundingBoxesOnCanvas } from "./BoxAnnotator";
4
 
 
82
  const [inferenceStatus, setInferenceStatus] = useState<string>("");
83
 
84
  const videoRef = useRef<HTMLVideoElement | null>(null);
85
+ const overlayVideoRef = useRef<HTMLVideoElement | null>(null);
86
  const canvasRef = useRef<HTMLCanvasElement | null>(null);
87
  const imageRef = useRef<HTMLImageElement | null>(null);
88
  const webcamStreamRef = useRef<MediaStream | null>(null);
89
  const { isLoaded, isLoading, error: modelError, runInference } = useVLMContext();
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  const processVideoFrame = async () => {
92
  if (!videoRef.current || !canvasRef.current) return;
93
  const video = videoRef.current;
 
99
  if (!ctx) return;
100
  ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
101
  await runInference(video, prompt, (output: string) => {
102
+ setDebugOutput(output); // <-- Ensure Raw Model Output is updated
103
  let boxes = extractJsonFromMarkdown(output) || [];
104
  if (boxes.length === 0 && Array.isArray(output)) {
105
  boxes = parseFlatBoxArray(output);
106
  }
107
  boxes = normalizeBoxes(boxes);
 
108
  console.log("Model output:", output);
109
  console.log("Boxes after normalization:", boxes);
110
  console.log("Canvas size:", canvas.width, canvas.height);
 
114
  }
115
  if (boxes.length === 0) setInferenceStatus("No boxes detected or model output invalid.");
116
  if (Array.isArray(boxes) && boxes.length > 0) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  const scaleX = canvas.width / video.videoWidth;
118
  const scaleY = canvas.height / video.videoHeight;
119
+ ctx.clearRect(0, 0, canvas.width, canvas.height); // Clear canvas before drawing boxes
120
+ drawBoundingBoxesOnCanvas(ctx, boxes, { color: "#FF00FF", lineWidth: 4, font: "20px Arial", scaleX, scaleY }); // Use visible color and thick line
121
  }
122
+ });
123
+ };
 
 
 
124
 
125
  const handleFileChange = (e: React.ChangeEvent<HTMLInputElement>) => {
126
  const file = e.target.files?.[0] || null;
 
376
  controls
377
  autoPlay
378
  loop
379
+ className="w-full rounded-lg shadow-lg mb-2"
380
  style={{ background: "#222" }}
381
  />
382
  <video
 
386
  autoPlay
387
  loop
388
  muted
389
+ className="w-full rounded-lg shadow-lg mb-2 absolute top-0 left-0 opacity-60 pointer-events-none"
390
+ style={{ background: "#222", zIndex: 10 }}
391
  />
392
  <canvas
393
  ref={canvasRef}
 
395
  style={{ zIndex: 20, pointerEvents: "none" }}
396
  />
397
  <button
398
+ className="mt-4 px-6 py-2 rounded-lg bg-blue-600 text-white font-semibold"
399
  onClick={handleToggleUrlProcessing}
400
  >
401
  {urlProcessing ? "Stop Processing" : "Start Processing"}
 
409
  >
410
  Test Draw Box
411
  </button>
412
+ <div className="mt-2 p-2 bg-gray-800 rounded text-xs">
413
+ <div>Canvas: {canvasDims ? `${canvasDims.w}x${canvasDims.h}` : "-"} | Video: {videoDims ? `${videoDims.w}x${videoDims.h}` : "-"}</div>
414
+ <div>Raw Model Output:</div>
415
+ <pre className="overflow-x-auto max-h-32 whitespace-pre-wrap">{debugOutput}</pre>
416
+ </div>
417
  </div>
418
  )}
419
  {mode === "File" && (
 
468
  controls
469
  autoPlay
470
  loop
471
+ className="w-full rounded-lg shadow-lg mb-2"
472
  style={{ background: "#222" }}
473
  />
474
  <video
 
478
  autoPlay
479
  loop
480
  muted
481
+ className="w-full rounded-lg shadow-lg mb-2 absolute top-0 left-0 opacity-60 pointer-events-none"
482
+ style={{ background: "#222", zIndex: 10 }}
483
  />
484
  <canvas
485
  ref={canvasRef}
 
487
  style={{ zIndex: 20, pointerEvents: "none" }}
488
  />
489
  <button
490
+ className="mt-4 px-6 py-2 rounded-lg bg-blue-600 text-white font-semibold"
491
  onClick={handleToggleVideoProcessing}
492
  >
493
  {videoProcessing ? "Stop Processing" : "Start Processing"}
 
503
  controls
504
  autoPlay
505
  loop
506
+ className="w-full rounded-lg shadow-lg mb-2"
507
  style={{ background: "#222" }}
508
  />
509
  <video
 
513
  autoPlay
514
  loop
515
  muted
516
+ className="w-full rounded-lg shadow-lg mb-2 absolute top-0 left-0 opacity-60 pointer-events-none"
517
+ style={{ background: "#222", zIndex: 10 }}
518
  />
519
  <canvas
520
  ref={canvasRef}
 
522
  style={{ zIndex: 20, pointerEvents: "none" }}
523
  />
524
  <button
525
+ className="mt-4 px-6 py-2 rounded-lg bg-blue-600 text-white font-semibold"
526
  onClick={handleToggleExampleProcessing}
527
  >
528
  {exampleProcessing ? "Stop Processing" : "Start Processing"}
 
537
  >
538
  Test Draw Box
539
  </button>
540
+ <div className="mt-2 p-2 bg-gray-800 rounded text-xs">
541
+ <div>Canvas: {canvasDims ? `${canvasDims.w}x${canvasDims.h}` : "-"} | Video: {videoDims ? `${videoDims.w}x${videoDims.h}` : "-"}</div>
542
+ <div>Raw Model Output:</div>
543
+ <pre className="overflow-x-auto max-h-32 whitespace-pre-wrap">{debugOutput}</pre>
544
+ </div>
545
  </div>
546
  )}
 
 
 
 
 
 
547
  </div>
548
  </div>
549
  </div>