Spaces:
Running
Running
Upload 51 files
Browse files- src/components/MultiSourceCaptioningView.tsx +25 -1
- videos/1.mp4 +2 -2
src/components/MultiSourceCaptioningView.tsx
CHANGED
@@ -6,7 +6,7 @@ const MODES = ["Webcam", "URL", "File"] as const;
|
|
6 |
type Mode = typeof MODES[number];
|
7 |
|
8 |
const EXAMPLE_VIDEO_URL = "/space/videos/1.mp4";
|
9 |
-
const EXAMPLE_PROMPT = "Detect all
|
10 |
|
11 |
function parseFlatBoxArray(arr: any[]): { label: string, bbox_2d: number[] }[] {
|
12 |
if (typeof arr[0] === "string" && Array.isArray(arr[1])) {
|
@@ -321,6 +321,30 @@ export default function MultiSourceCaptioningView() {
|
|
321 |
ctx.fillText("Test Box", 50, 35);
|
322 |
};
|
323 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
324 |
return (
|
325 |
<div className="absolute inset-0 text-white">
|
326 |
<div className="fixed top-0 left-0 w-full bg-gray-900 text-white text-center py-2 z-50">
|
|
|
6 |
type Mode = typeof MODES[number];
|
7 |
|
8 |
const EXAMPLE_VIDEO_URL = "/space/videos/1.mp4";
|
9 |
+
const EXAMPLE_PROMPT = "Detect all birds in the image. For each bird, output a JSON array of objects with fields: 'label' (string) and 'bbox_2d' ([x1, y1, x2, y2]) where coordinates are in pixel values. This is only an example use your own values (Do not use these values [100, 50, 200, 300]}) Example: [{"label": "bird", "bbox_2d": [100, 50, 200, 300]}]";
|
10 |
|
11 |
function parseFlatBoxArray(arr: any[]): { label: string, bbox_2d: number[] }[] {
|
12 |
if (typeof arr[0] === "string" && Array.isArray(arr[1])) {
|
|
|
321 |
ctx.fillText("Test Box", 50, 35);
|
322 |
};
|
323 |
|
324 |
+
useEffect(() => {
|
325 |
+
const draw = () => {
|
326 |
+
const overlayVideo = overlayVideoRef.current;
|
327 |
+
const canvas = canvasRef.current;
|
328 |
+
if (!overlayVideo || !canvas) return;
|
329 |
+
if (overlayVideo.videoWidth === 0) return;
|
330 |
+
canvas.width = overlayVideo.videoWidth;
|
331 |
+
canvas.height = overlayVideo.videoHeight;
|
332 |
+
const ctx = canvas.getContext("2d");
|
333 |
+
if (!ctx) return;
|
334 |
+
ctx.clearRect(0, 0, canvas.width, canvas.height);
|
335 |
+
const now = Date.now();
|
336 |
+
const boxHistory = boxHistoryRef.current.filter((b: any) => now - b.timestamp < 2000);
|
337 |
+
if (boxHistory.length > 0) {
|
338 |
+
const scaleX = canvas.width / overlayVideo.videoWidth;
|
339 |
+
const scaleY = canvas.height / overlayVideo.videoHeight;
|
340 |
+
drawBoundingBoxesOnCanvas(ctx, boxHistory, { color: "#FF00FF", lineWidth: 4, font: "20px Arial", scaleX, scaleY });
|
341 |
+
}
|
342 |
+
};
|
343 |
+
draw();
|
344 |
+
const interval = setInterval(draw, 100);
|
345 |
+
return () => clearInterval(interval);
|
346 |
+
}, [overlayVideoRef, canvasRef]);
|
347 |
+
|
348 |
return (
|
349 |
<div className="absolute inset-0 text-white">
|
350 |
<div className="fixed top-0 left-0 w-full bg-gray-900 text-white text-center py-2 z-50">
|
videos/1.mp4
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:01ea1c79a0b83ee324c292b3f0f12214f5e9d8a42437c0d26bd3be71004b57c0
|
3 |
+
size 1754021
|