Spaces:
Running
Running
Upload 51 files
Browse files
src/components/MultiSourceCaptioningView.tsx
CHANGED
@@ -6,7 +6,7 @@ const MODES = ["Webcam", "URL", "File"] as const;
|
|
6 |
type Mode = typeof MODES[number];
|
7 |
|
8 |
const EXAMPLE_VIDEO_URL = "/space/videos/1.mp4";
|
9 |
-
const EXAMPLE_PROMPT = "Detect all birds in the image. For each bird, output a JSON array of objects with fields: 'label' (string) and 'bbox_2d' ([x1, y1, x2, y2]) where coordinates are in pixel values.
|
10 |
|
11 |
function parseFlatBoxArray(arr: any[]): { label: string, bbox_2d: number[] }[] {
|
12 |
if (typeof arr[0] === "string" && Array.isArray(arr[1])) {
|
@@ -62,6 +62,19 @@ function isVideoFile(file: File) {
|
|
62 |
return file.type.startsWith("video/");
|
63 |
}
|
64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
export default function MultiSourceCaptioningView() {
|
66 |
const [mode, setMode] = useState<Mode>("File");
|
67 |
const [videoUrl, setVideoUrl] = useState<string>(EXAMPLE_VIDEO_URL);
|
@@ -337,7 +350,12 @@ export default function MultiSourceCaptioningView() {
|
|
337 |
if (boxHistory.length > 0) {
|
338 |
const scaleX = canvas.width / overlayVideo.videoWidth;
|
339 |
const scaleY = canvas.height / overlayVideo.videoHeight;
|
340 |
-
|
|
|
|
|
|
|
|
|
|
|
341 |
}
|
342 |
};
|
343 |
draw();
|
|
|
6 |
type Mode = typeof MODES[number];
|
7 |
|
8 |
const EXAMPLE_VIDEO_URL = "/space/videos/1.mp4";
|
9 |
+
const EXAMPLE_PROMPT = "Detect all birds in the image. For each bird, output a JSON array of objects with fields: 'label' (string) and 'bbox_2d' ([x1, y1, x2, y2]) where coordinates are in pixel values. Example: [{\"label\": \"bird\", \"bbox_2d\": [x1, y1, x2, y2]}]";
|
10 |
|
11 |
function parseFlatBoxArray(arr: any[]): { label: string, bbox_2d: number[] }[] {
|
12 |
if (typeof arr[0] === "string" && Array.isArray(arr[1])) {
|
|
|
62 |
return file.type.startsWith("video/");
|
63 |
}
|
64 |
|
65 |
+
function denormalizeBox(box: number[], width: number, height: number) {
|
66 |
+
// If all values are between 0 and 1, treat as normalized
|
67 |
+
if (box.length === 4 && box.every(v => v >= 0 && v <= 1)) {
|
68 |
+
return [
|
69 |
+
box[0] * width,
|
70 |
+
box[1] * height,
|
71 |
+
box[2] * width,
|
72 |
+
box[3] * height
|
73 |
+
];
|
74 |
+
}
|
75 |
+
return box;
|
76 |
+
}
|
77 |
+
|
78 |
export default function MultiSourceCaptioningView() {
|
79 |
const [mode, setMode] = useState<Mode>("File");
|
80 |
const [videoUrl, setVideoUrl] = useState<string>(EXAMPLE_VIDEO_URL);
|
|
|
350 |
if (boxHistory.length > 0) {
|
351 |
const scaleX = canvas.width / overlayVideo.videoWidth;
|
352 |
const scaleY = canvas.height / overlayVideo.videoHeight;
|
353 |
+
// Denormalize boxes if needed
|
354 |
+
const denormalizedBoxes = boxHistory.map(b => ({
|
355 |
+
...b,
|
356 |
+
bbox_2d: denormalizeBox(b.bbox_2d, canvas.width, canvas.height)
|
357 |
+
}));
|
358 |
+
drawBoundingBoxesOnCanvas(ctx, denormalizedBoxes, { color: "#FF00FF", lineWidth: 4, font: "20px Arial", scaleX, scaleY });
|
359 |
}
|
360 |
};
|
361 |
draw();
|