Quazim0t0 commited on
Commit
9424f3d
·
verified ·
1 Parent(s): 770efaf

Upload 51 files

Browse files
src/components/MultiSourceCaptioningView.tsx CHANGED
@@ -6,7 +6,7 @@ const MODES = ["Webcam", "URL", "File"] as const;
6
  type Mode = typeof MODES[number];
7
 
8
  const EXAMPLE_VIDEO_URL = "/space/videos/1.mp4";
9
- const EXAMPLE_PROMPT = "Detect all people in the image. For each person, output a JSON array of objects with fields: 'label' (string) and 'bbox_2d' ([x1, y1, x2, y2]) where coordinates are in pixel values. Example: [{\"label\": \"person\", \"bbox_2d\": [100, 50, 200, 300]}]";
10
 
11
  function parseFlatBoxArray(arr: any[]): { label: string, bbox_2d: number[] }[] {
12
  if (typeof arr[0] === "string" && Array.isArray(arr[1])) {
@@ -321,6 +321,30 @@ export default function MultiSourceCaptioningView() {
321
  ctx.fillText("Test Box", 50, 35);
322
  };
323
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
  return (
325
  <div className="absolute inset-0 text-white">
326
  <div className="fixed top-0 left-0 w-full bg-gray-900 text-white text-center py-2 z-50">
 
6
  type Mode = typeof MODES[number];
7
 
8
  const EXAMPLE_VIDEO_URL = "/space/videos/1.mp4";
9
+ const EXAMPLE_PROMPT = "Detect all birds in the image. For each bird, output a JSON array of objects with fields: 'label' (string) and 'bbox_2d' ([x1, y1, x2, y2]) where coordinates are in pixel values. This is only an example use your own values (Do not use these values [100, 50, 200, 300]}) Example: [{"label": "bird", "bbox_2d": [100, 50, 200, 300]}]";
10
 
11
  function parseFlatBoxArray(arr: any[]): { label: string, bbox_2d: number[] }[] {
12
  if (typeof arr[0] === "string" && Array.isArray(arr[1])) {
 
321
  ctx.fillText("Test Box", 50, 35);
322
  };
323
 
324
+ useEffect(() => {
325
+ const draw = () => {
326
+ const overlayVideo = overlayVideoRef.current;
327
+ const canvas = canvasRef.current;
328
+ if (!overlayVideo || !canvas) return;
329
+ if (overlayVideo.videoWidth === 0) return;
330
+ canvas.width = overlayVideo.videoWidth;
331
+ canvas.height = overlayVideo.videoHeight;
332
+ const ctx = canvas.getContext("2d");
333
+ if (!ctx) return;
334
+ ctx.clearRect(0, 0, canvas.width, canvas.height);
335
+ const now = Date.now();
336
+ const boxHistory = boxHistoryRef.current.filter((b: any) => now - b.timestamp < 2000);
337
+ if (boxHistory.length > 0) {
338
+ const scaleX = canvas.width / overlayVideo.videoWidth;
339
+ const scaleY = canvas.height / overlayVideo.videoHeight;
340
+ drawBoundingBoxesOnCanvas(ctx, boxHistory, { color: "#FF00FF", lineWidth: 4, font: "20px Arial", scaleX, scaleY });
341
+ }
342
+ };
343
+ draw();
344
+ const interval = setInterval(draw, 100);
345
+ return () => clearInterval(interval);
346
+ }, [overlayVideoRef, canvasRef]);
347
+
348
  return (
349
  <div className="absolute inset-0 text-white">
350
  <div className="fixed top-0 left-0 w-full bg-gray-900 text-white text-center py-2 z-50">
videos/1.mp4 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af45772f7154d3e134f0033e65fec8379be7d6a22e847598a8c1a3ffa7b013c3
3
- size 26572724
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01ea1c79a0b83ee324c292b3f0f12214f5e9d8a42437c0d26bd3be71004b57c0
3
+ size 1754021