Spaces:

Quazim0t0
/

FastVLMBoxes

Running

App Files Files Community

Quazim0t0 commited on 1 day ago

Commit

cf6cccc

verified ·

1 Parent(s): 43aa15a

Upload 51 files

Browse files

Files changed (1) hide show

src/components/MultiSourceCaptioningView.tsx +1 -48

src/components/MultiSourceCaptioningView.tsx CHANGED Viewed

@@ -1,6 +1,6 @@
 import { useState, useRef, useEffect } from "react";
 import { useVLMContext } from "../context/useVLMContext";
-import { extractJsonFromMarkdown, drawBoundingBoxesOnCanvas } from "./BoxAnnotator";
 const MODES = ["Webcam", "URL", "File"] as const;
 type Mode = typeof MODES[number];
@@ -8,53 +8,6 @@ type Mode = typeof MODES[number];
 const EXAMPLE_VIDEO_URL = "/space/videos/1.mp4";
 const EXAMPLE_PROMPT = "Detect all birds in the image. For each bird, output a JSON array of objects with fields: 'label' (string) and 'bbox_2d' ([x1, y1, x2, y2]) where coordinates are in pixel values. Example: [{\"label\": \"bird\", \"bbox_2d\": [x1, y1, x2, y2]}]";
-function parseFlatBoxArray(arr: any[]): { label: string, bbox_2d: number[] }[] {
-  if (typeof arr[0] === "string" && Array.isArray(arr[1])) {
-    const label = arr[0];
-    return arr.slice(1).map(bbox => ({ label, bbox_2d: bbox }));
-  }
-  return [];
-}
-function normalizeBoxes(raw: any): { label: string, bbox_2d: number[] }[] {
-  if (!raw) return [];
-  let boxes = [];
-  if (typeof raw === "object" && raw !== null && Array.isArray(raw.image)) {
-    boxes = raw.image;
-  } else if (Array.isArray(raw)) {
-    boxes = raw;
-  } else if (typeof raw === "object" && raw !== null) {
-    boxes = [raw];
-  }
-  return boxes
-    .map((obj: any) => {
-      if (!obj || !obj.bbox_2d) return null;
-      let bbox = obj.bbox_2d;
-      // If bbox_2d is [[x1, y1], [x2, y2]], convert to [x1, y1, x2, y2]
-      if (
-        Array.isArray(bbox) &&
-        bbox.length === 2 &&
-        Array.isArray(bbox[0]) &&
-        Array.isArray(bbox[1]) &&
-        bbox[0].length === 2 &&
-        bbox[1].length === 2
-      ) {
-        bbox = [bbox[0][0], bbox[0][1], bbox[1][0], bbox[1][1]];
-      }
-      // If bbox_2d is [x1, y1, x2, y2], use as-is
-      if (
-        Array.isArray(bbox) &&
-        bbox.length === 4 &&
-        bbox.every((v: any) => typeof v === "number")
-      ) {
-        return { ...obj, bbox_2d: bbox };
-      }
-      // Otherwise, skip
-      return null;
-    })
-    .filter((obj: any) => obj);
-}
 function isImageFile(file: File) {
   return file.type.startsWith("image/");
 }

 import { useState, useRef, useEffect } from "react";
 import { useVLMContext } from "../context/useVLMContext";
+import { drawBoundingBoxesOnCanvas } from "./BoxAnnotator";
 const MODES = ["Webcam", "URL", "File"] as const;
 type Mode = typeof MODES[number];
 const EXAMPLE_VIDEO_URL = "/space/videos/1.mp4";
 const EXAMPLE_PROMPT = "Detect all birds in the image. For each bird, output a JSON array of objects with fields: 'label' (string) and 'bbox_2d' ([x1, y1, x2, y2]) where coordinates are in pixel values. Example: [{\"label\": \"bird\", \"bbox_2d\": [x1, y1, x2, y2]}]";
 function isImageFile(file: File) {
   return file.type.startsWith("image/");
 }