Quazim0t0 commited on
Commit
cf6cccc
·
verified ·
1 Parent(s): 43aa15a

Upload 51 files

Browse files
src/components/MultiSourceCaptioningView.tsx CHANGED
@@ -1,6 +1,6 @@
1
  import { useState, useRef, useEffect } from "react";
2
  import { useVLMContext } from "../context/useVLMContext";
3
- import { extractJsonFromMarkdown, drawBoundingBoxesOnCanvas } from "./BoxAnnotator";
4
 
5
  const MODES = ["Webcam", "URL", "File"] as const;
6
  type Mode = typeof MODES[number];
@@ -8,53 +8,6 @@ type Mode = typeof MODES[number];
8
  const EXAMPLE_VIDEO_URL = "/space/videos/1.mp4";
9
  const EXAMPLE_PROMPT = "Detect all birds in the image. For each bird, output a JSON array of objects with fields: 'label' (string) and 'bbox_2d' ([x1, y1, x2, y2]) where coordinates are in pixel values. Example: [{\"label\": \"bird\", \"bbox_2d\": [x1, y1, x2, y2]}]";
10
 
11
- function parseFlatBoxArray(arr: any[]): { label: string, bbox_2d: number[] }[] {
12
- if (typeof arr[0] === "string" && Array.isArray(arr[1])) {
13
- const label = arr[0];
14
- return arr.slice(1).map(bbox => ({ label, bbox_2d: bbox }));
15
- }
16
- return [];
17
- }
18
-
19
- function normalizeBoxes(raw: any): { label: string, bbox_2d: number[] }[] {
20
- if (!raw) return [];
21
- let boxes = [];
22
- if (typeof raw === "object" && raw !== null && Array.isArray(raw.image)) {
23
- boxes = raw.image;
24
- } else if (Array.isArray(raw)) {
25
- boxes = raw;
26
- } else if (typeof raw === "object" && raw !== null) {
27
- boxes = [raw];
28
- }
29
- return boxes
30
- .map((obj: any) => {
31
- if (!obj || !obj.bbox_2d) return null;
32
- let bbox = obj.bbox_2d;
33
- // If bbox_2d is [[x1, y1], [x2, y2]], convert to [x1, y1, x2, y2]
34
- if (
35
- Array.isArray(bbox) &&
36
- bbox.length === 2 &&
37
- Array.isArray(bbox[0]) &&
38
- Array.isArray(bbox[1]) &&
39
- bbox[0].length === 2 &&
40
- bbox[1].length === 2
41
- ) {
42
- bbox = [bbox[0][0], bbox[0][1], bbox[1][0], bbox[1][1]];
43
- }
44
- // If bbox_2d is [x1, y1, x2, y2], use as-is
45
- if (
46
- Array.isArray(bbox) &&
47
- bbox.length === 4 &&
48
- bbox.every((v: any) => typeof v === "number")
49
- ) {
50
- return { ...obj, bbox_2d: bbox };
51
- }
52
- // Otherwise, skip
53
- return null;
54
- })
55
- .filter((obj: any) => obj);
56
- }
57
-
58
  function isImageFile(file: File) {
59
  return file.type.startsWith("image/");
60
  }
 
1
  import { useState, useRef, useEffect } from "react";
2
  import { useVLMContext } from "../context/useVLMContext";
3
+ import { drawBoundingBoxesOnCanvas } from "./BoxAnnotator";
4
 
5
  const MODES = ["Webcam", "URL", "File"] as const;
6
  type Mode = typeof MODES[number];
 
8
  const EXAMPLE_VIDEO_URL = "/space/videos/1.mp4";
9
  const EXAMPLE_PROMPT = "Detect all birds in the image. For each bird, output a JSON array of objects with fields: 'label' (string) and 'bbox_2d' ([x1, y1, x2, y2]) where coordinates are in pixel values. Example: [{\"label\": \"bird\", \"bbox_2d\": [x1, y1, x2, y2]}]";
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  function isImageFile(file: File) {
12
  return file.type.startsWith("image/");
13
  }