Spaces:
Running
Running
Upload 50 files
Browse files- .gitignore +23 -23
- public/index.html +43 -43
- public/manifest.json +25 -25
- public/robots.txt +3 -3
- src/App.css +38 -38
- src/App.js +25 -25
- src/App.test.js +8 -8
- src/App.tsx +6 -2
- src/components/BoxAnnotator.ts +13 -3
- src/components/MultiSourceCaptioningView.tsx +434 -473
- src/index.js +17 -17
- src/reportWebVitals.js +13 -13
- src/setupTests.js +5 -5
.gitignore
CHANGED
@@ -1,23 +1,23 @@
|
|
1 |
-
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
|
2 |
-
|
3 |
-
# dependencies
|
4 |
-
/node_modules
|
5 |
-
/.pnp
|
6 |
-
.pnp.js
|
7 |
-
|
8 |
-
# testing
|
9 |
-
/coverage
|
10 |
-
|
11 |
-
# production
|
12 |
-
/build
|
13 |
-
|
14 |
-
# misc
|
15 |
-
.DS_Store
|
16 |
-
.env.local
|
17 |
-
.env.development.local
|
18 |
-
.env.test.local
|
19 |
-
.env.production.local
|
20 |
-
|
21 |
-
npm-debug.log*
|
22 |
-
yarn-debug.log*
|
23 |
-
yarn-error.log*
|
|
|
1 |
+
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
|
2 |
+
|
3 |
+
# dependencies
|
4 |
+
/node_modules
|
5 |
+
/.pnp
|
6 |
+
.pnp.js
|
7 |
+
|
8 |
+
# testing
|
9 |
+
/coverage
|
10 |
+
|
11 |
+
# production
|
12 |
+
/build
|
13 |
+
|
14 |
+
# misc
|
15 |
+
.DS_Store
|
16 |
+
.env.local
|
17 |
+
.env.development.local
|
18 |
+
.env.test.local
|
19 |
+
.env.production.local
|
20 |
+
|
21 |
+
npm-debug.log*
|
22 |
+
yarn-debug.log*
|
23 |
+
yarn-error.log*
|
public/index.html
CHANGED
@@ -1,43 +1,43 @@
|
|
1 |
-
<!DOCTYPE html>
|
2 |
-
<html lang="en">
|
3 |
-
<head>
|
4 |
-
<meta charset="utf-8" />
|
5 |
-
<link rel="icon" href="%PUBLIC_URL%/favicon.ico" />
|
6 |
-
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
7 |
-
<meta name="theme-color" content="#000000" />
|
8 |
-
<meta
|
9 |
-
name="description"
|
10 |
-
content="Web site created using create-react-app"
|
11 |
-
/>
|
12 |
-
<link rel="apple-touch-icon" href="%PUBLIC_URL%/logo192.png" />
|
13 |
-
<!--
|
14 |
-
manifest.json provides metadata used when your web app is installed on a
|
15 |
-
user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
|
16 |
-
-->
|
17 |
-
<link rel="manifest" href="%PUBLIC_URL%/manifest.json" />
|
18 |
-
<!--
|
19 |
-
Notice the use of %PUBLIC_URL% in the tags above.
|
20 |
-
It will be replaced with the URL of the `public` folder during the build.
|
21 |
-
Only files inside the `public` folder can be referenced from the HTML.
|
22 |
-
|
23 |
-
Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
|
24 |
-
work correctly both with client-side routing and a non-root public URL.
|
25 |
-
Learn how to configure a non-root public URL by running `npm run build`.
|
26 |
-
-->
|
27 |
-
<title>React App</title>
|
28 |
-
</head>
|
29 |
-
<body>
|
30 |
-
<noscript>You need to enable JavaScript to run this app.</noscript>
|
31 |
-
<div id="root"></div>
|
32 |
-
<!--
|
33 |
-
This HTML file is a template.
|
34 |
-
If you open it directly in the browser, you will see an empty page.
|
35 |
-
|
36 |
-
You can add webfonts, meta tags, or analytics to this file.
|
37 |
-
The build step will place the bundled scripts into the <body> tag.
|
38 |
-
|
39 |
-
To begin the development, run `npm start` or `yarn start`.
|
40 |
-
To create a production bundle, use `npm run build` or `yarn build`.
|
41 |
-
-->
|
42 |
-
</body>
|
43 |
-
</html>
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
<head>
|
4 |
+
<meta charset="utf-8" />
|
5 |
+
<link rel="icon" href="%PUBLIC_URL%/favicon.ico" />
|
6 |
+
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
7 |
+
<meta name="theme-color" content="#000000" />
|
8 |
+
<meta
|
9 |
+
name="description"
|
10 |
+
content="Web site created using create-react-app"
|
11 |
+
/>
|
12 |
+
<link rel="apple-touch-icon" href="%PUBLIC_URL%/logo192.png" />
|
13 |
+
<!--
|
14 |
+
manifest.json provides metadata used when your web app is installed on a
|
15 |
+
user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
|
16 |
+
-->
|
17 |
+
<link rel="manifest" href="%PUBLIC_URL%/manifest.json" />
|
18 |
+
<!--
|
19 |
+
Notice the use of %PUBLIC_URL% in the tags above.
|
20 |
+
It will be replaced with the URL of the `public` folder during the build.
|
21 |
+
Only files inside the `public` folder can be referenced from the HTML.
|
22 |
+
|
23 |
+
Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
|
24 |
+
work correctly both with client-side routing and a non-root public URL.
|
25 |
+
Learn how to configure a non-root public URL by running `npm run build`.
|
26 |
+
-->
|
27 |
+
<title>React App</title>
|
28 |
+
</head>
|
29 |
+
<body>
|
30 |
+
<noscript>You need to enable JavaScript to run this app.</noscript>
|
31 |
+
<div id="root"></div>
|
32 |
+
<!--
|
33 |
+
This HTML file is a template.
|
34 |
+
If you open it directly in the browser, you will see an empty page.
|
35 |
+
|
36 |
+
You can add webfonts, meta tags, or analytics to this file.
|
37 |
+
The build step will place the bundled scripts into the <body> tag.
|
38 |
+
|
39 |
+
To begin the development, run `npm start` or `yarn start`.
|
40 |
+
To create a production bundle, use `npm run build` or `yarn build`.
|
41 |
+
-->
|
42 |
+
</body>
|
43 |
+
</html>
|
public/manifest.json
CHANGED
@@ -1,25 +1,25 @@
|
|
1 |
-
{
|
2 |
-
"short_name": "React App",
|
3 |
-
"name": "Create React App Sample",
|
4 |
-
"icons": [
|
5 |
-
{
|
6 |
-
"src": "favicon.ico",
|
7 |
-
"sizes": "64x64 32x32 24x24 16x16",
|
8 |
-
"type": "image/x-icon"
|
9 |
-
},
|
10 |
-
{
|
11 |
-
"src": "logo192.png",
|
12 |
-
"type": "image/png",
|
13 |
-
"sizes": "192x192"
|
14 |
-
},
|
15 |
-
{
|
16 |
-
"src": "logo512.png",
|
17 |
-
"type": "image/png",
|
18 |
-
"sizes": "512x512"
|
19 |
-
}
|
20 |
-
],
|
21 |
-
"start_url": ".",
|
22 |
-
"display": "standalone",
|
23 |
-
"theme_color": "#000000",
|
24 |
-
"background_color": "#ffffff"
|
25 |
-
}
|
|
|
1 |
+
{
|
2 |
+
"short_name": "React App",
|
3 |
+
"name": "Create React App Sample",
|
4 |
+
"icons": [
|
5 |
+
{
|
6 |
+
"src": "favicon.ico",
|
7 |
+
"sizes": "64x64 32x32 24x24 16x16",
|
8 |
+
"type": "image/x-icon"
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"src": "logo192.png",
|
12 |
+
"type": "image/png",
|
13 |
+
"sizes": "192x192"
|
14 |
+
},
|
15 |
+
{
|
16 |
+
"src": "logo512.png",
|
17 |
+
"type": "image/png",
|
18 |
+
"sizes": "512x512"
|
19 |
+
}
|
20 |
+
],
|
21 |
+
"start_url": ".",
|
22 |
+
"display": "standalone",
|
23 |
+
"theme_color": "#000000",
|
24 |
+
"background_color": "#ffffff"
|
25 |
+
}
|
public/robots.txt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
-
# https://www.robotstxt.org/robotstxt.html
|
2 |
-
User-agent: *
|
3 |
-
Disallow:
|
|
|
1 |
+
# https://www.robotstxt.org/robotstxt.html
|
2 |
+
User-agent: *
|
3 |
+
Disallow:
|
src/App.css
CHANGED
@@ -1,38 +1,38 @@
|
|
1 |
-
.App {
|
2 |
-
text-align: center;
|
3 |
-
}
|
4 |
-
|
5 |
-
.App-logo {
|
6 |
-
height: 40vmin;
|
7 |
-
pointer-events: none;
|
8 |
-
}
|
9 |
-
|
10 |
-
@media (prefers-reduced-motion: no-preference) {
|
11 |
-
.App-logo {
|
12 |
-
animation: App-logo-spin infinite 20s linear;
|
13 |
-
}
|
14 |
-
}
|
15 |
-
|
16 |
-
.App-header {
|
17 |
-
background-color: #282c34;
|
18 |
-
min-height: 100vh;
|
19 |
-
display: flex;
|
20 |
-
flex-direction: column;
|
21 |
-
align-items: center;
|
22 |
-
justify-content: center;
|
23 |
-
font-size: calc(10px + 2vmin);
|
24 |
-
color: white;
|
25 |
-
}
|
26 |
-
|
27 |
-
.App-link {
|
28 |
-
color: #61dafb;
|
29 |
-
}
|
30 |
-
|
31 |
-
@keyframes App-logo-spin {
|
32 |
-
from {
|
33 |
-
transform: rotate(0deg);
|
34 |
-
}
|
35 |
-
to {
|
36 |
-
transform: rotate(360deg);
|
37 |
-
}
|
38 |
-
}
|
|
|
1 |
+
.App {
|
2 |
+
text-align: center;
|
3 |
+
}
|
4 |
+
|
5 |
+
.App-logo {
|
6 |
+
height: 40vmin;
|
7 |
+
pointer-events: none;
|
8 |
+
}
|
9 |
+
|
10 |
+
@media (prefers-reduced-motion: no-preference) {
|
11 |
+
.App-logo {
|
12 |
+
animation: App-logo-spin infinite 20s linear;
|
13 |
+
}
|
14 |
+
}
|
15 |
+
|
16 |
+
.App-header {
|
17 |
+
background-color: #282c34;
|
18 |
+
min-height: 100vh;
|
19 |
+
display: flex;
|
20 |
+
flex-direction: column;
|
21 |
+
align-items: center;
|
22 |
+
justify-content: center;
|
23 |
+
font-size: calc(10px + 2vmin);
|
24 |
+
color: white;
|
25 |
+
}
|
26 |
+
|
27 |
+
.App-link {
|
28 |
+
color: #61dafb;
|
29 |
+
}
|
30 |
+
|
31 |
+
@keyframes App-logo-spin {
|
32 |
+
from {
|
33 |
+
transform: rotate(0deg);
|
34 |
+
}
|
35 |
+
to {
|
36 |
+
transform: rotate(360deg);
|
37 |
+
}
|
38 |
+
}
|
src/App.js
CHANGED
@@ -1,25 +1,25 @@
|
|
1 |
-
import logo from './logo.svg';
|
2 |
-
import './App.css';
|
3 |
-
|
4 |
-
function App() {
|
5 |
-
return (
|
6 |
-
<div className="App">
|
7 |
-
<header className="App-header">
|
8 |
-
<img src={logo} className="App-logo" alt="logo" />
|
9 |
-
<p>
|
10 |
-
Edit <code>src/App.js</code> and save to reload.
|
11 |
-
</p>
|
12 |
-
<a
|
13 |
-
className="App-link"
|
14 |
-
href="https://reactjs.org"
|
15 |
-
target="_blank"
|
16 |
-
rel="noopener noreferrer"
|
17 |
-
>
|
18 |
-
Learn React
|
19 |
-
</a>
|
20 |
-
</header>
|
21 |
-
</div>
|
22 |
-
);
|
23 |
-
}
|
24 |
-
|
25 |
-
export default App;
|
|
|
1 |
+
import logo from './logo.svg';
|
2 |
+
import './App.css';
|
3 |
+
|
4 |
+
function App() {
|
5 |
+
return (
|
6 |
+
<div className="App">
|
7 |
+
<header className="App-header">
|
8 |
+
<img src={logo} className="App-logo" alt="logo" />
|
9 |
+
<p>
|
10 |
+
Edit <code>src/App.js</code> and save to reload.
|
11 |
+
</p>
|
12 |
+
<a
|
13 |
+
className="App-link"
|
14 |
+
href="https://reactjs.org"
|
15 |
+
target="_blank"
|
16 |
+
rel="noopener noreferrer"
|
17 |
+
>
|
18 |
+
Learn React
|
19 |
+
</a>
|
20 |
+
</header>
|
21 |
+
</div>
|
22 |
+
);
|
23 |
+
}
|
24 |
+
|
25 |
+
export default App;
|
src/App.test.js
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
-
import { render, screen } from '@testing-library/react';
|
2 |
-
import App from './App';
|
3 |
-
|
4 |
-
test('renders learn react link', () => {
|
5 |
-
render(<App />);
|
6 |
-
const linkElement = screen.getByText(/learn react/i);
|
7 |
-
expect(linkElement).toBeInTheDocument();
|
8 |
-
});
|
|
|
1 |
+
import { render, screen } from '@testing-library/react';
|
2 |
+
import App from './App';
|
3 |
+
|
4 |
+
test('renders learn react link', () => {
|
5 |
+
render(<App />);
|
6 |
+
const linkElement = screen.getByText(/learn react/i);
|
7 |
+
expect(linkElement).toBeInTheDocument();
|
8 |
+
});
|
src/App.tsx
CHANGED
@@ -11,7 +11,8 @@ export default function App() {
|
|
11 |
await loadModel();
|
12 |
setStarted(true);
|
13 |
} catch (e) {
|
14 |
-
// error is handled by context
|
|
|
15 |
}
|
16 |
};
|
17 |
|
@@ -27,6 +28,9 @@ export default function App() {
|
|
27 |
{isLoading ? "Loading Model..." : "Load Model"}
|
28 |
</button>
|
29 |
{error && <div className="text-red-400 mt-2">Model error: {error}</div>}
|
|
|
|
|
|
|
30 |
</div>
|
31 |
);
|
32 |
}
|
@@ -37,4 +41,4 @@ export default function App() {
|
|
37 |
<MultiSourceCaptioningView />
|
38 |
</div>
|
39 |
);
|
40 |
-
}
|
|
|
11 |
await loadModel();
|
12 |
setStarted(true);
|
13 |
} catch (e) {
|
14 |
+
// error is handled by context, could log here if needed
|
15 |
+
console.error("Failed to load model:", e);
|
16 |
}
|
17 |
};
|
18 |
|
|
|
28 |
{isLoading ? "Loading Model..." : "Load Model"}
|
29 |
</button>
|
30 |
{error && <div className="text-red-400 mt-2">Model error: {error}</div>}
|
31 |
+
<p className="text-sm text-gray-400 mt-2">
|
32 |
+
Model will download on first load. This may take a moment.
|
33 |
+
</p>
|
34 |
</div>
|
35 |
);
|
36 |
}
|
|
|
41 |
<MultiSourceCaptioningView />
|
42 |
</div>
|
43 |
);
|
44 |
+
}
|
src/components/BoxAnnotator.ts
CHANGED
@@ -16,6 +16,7 @@ export function extractJsonFromMarkdown(markdown: string): any[] | null {
|
|
16 |
if (typeof parsed === "object" && parsed !== null) return [parsed]; // <-- Wrap object in array
|
17 |
return null;
|
18 |
} catch {
|
|
|
19 |
return null;
|
20 |
}
|
21 |
}
|
@@ -31,7 +32,15 @@ export function drawBoundingBoxesOnCanvas(
|
|
31 |
boxes: { bbox_2d: number[]; label?: string }[],
|
32 |
options?: { color?: string; lineWidth?: number; font?: string, scaleX?: number, scaleY?: number }
|
33 |
) {
|
34 |
-
if (!Array.isArray(boxes))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
const color = options?.color || "#00FF00";
|
36 |
const lineWidth = options?.lineWidth || 2;
|
37 |
const font = options?.font || "16px Arial";
|
@@ -54,9 +63,10 @@ export function drawBoundingBoxesOnCanvas(
|
|
54 |
ctx.rect(sx1, sy1, sx2 - sx1, sy2 - sy1);
|
55 |
ctx.stroke();
|
56 |
if (obj.label) {
|
57 |
-
|
|
|
58 |
}
|
59 |
});
|
60 |
|
61 |
ctx.restore();
|
62 |
-
}
|
|
|
16 |
if (typeof parsed === "object" && parsed !== null) return [parsed]; // <-- Wrap object in array
|
17 |
return null;
|
18 |
} catch {
|
19 |
+
console.error("Failed to parse JSON from markdown:", jsonString);
|
20 |
return null;
|
21 |
}
|
22 |
}
|
|
|
32 |
boxes: { bbox_2d: number[]; label?: string }[],
|
33 |
options?: { color?: string; lineWidth?: number; font?: string, scaleX?: number, scaleY?: number }
|
34 |
) {
|
35 |
+
if (!Array.isArray(boxes)) {
|
36 |
+
console.warn("drawBoundingBoxesOnCanvas: 'boxes' is not an array or is null/undefined.", boxes);
|
37 |
+
return;
|
38 |
+
}
|
39 |
+
if (boxes.length === 0) {
|
40 |
+
// console.log("drawBoundingBoxesOnCanvas: 'boxes' array is empty, nothing to draw.");
|
41 |
+
return;
|
42 |
+
}
|
43 |
+
|
44 |
const color = options?.color || "#00FF00";
|
45 |
const lineWidth = options?.lineWidth || 2;
|
46 |
const font = options?.font || "16px Arial";
|
|
|
63 |
ctx.rect(sx1, sy1, sx2 - sx1, sy2 - sy1);
|
64 |
ctx.stroke();
|
65 |
if (obj.label) {
|
66 |
+
// Adjust text position to ensure visibility, especially if near top edge
|
67 |
+
ctx.fillText(obj.label, sx1 + 4, sy1 - 4 < 16 ? sy1 + 16 : sy1 - 4);
|
68 |
}
|
69 |
});
|
70 |
|
71 |
ctx.restore();
|
72 |
+
}
|
src/components/MultiSourceCaptioningView.tsx
CHANGED
@@ -1,13 +1,22 @@
|
|
1 |
-
import { useState, useRef, useEffect } from "react";
|
2 |
import { useVLMContext } from "../context/useVLMContext";
|
3 |
import { extractJsonFromMarkdown, drawBoundingBoxesOnCanvas } from "./BoxAnnotator";
|
4 |
|
5 |
const MODES = ["Webcam", "URL", "File"] as const;
|
6 |
type Mode = typeof MODES[number];
|
7 |
|
8 |
-
const EXAMPLE_VIDEO_URL = "/videos/1.mp4";
|
9 |
const EXAMPLE_PROMPT = "Detect all people in the image. For each person, output a JSON array of objects with fields: 'label' (string) and 'bbox_2d' ([x1, y1, x2, y2]) where coordinates are in pixel values. Example: [{\"label\": \"person\", \"bbox_2d\": [100, 50, 200, 300]}]";
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
function normalizeBoxes(raw: any): { label: string, bbox_2d: number[] }[] {
|
12 |
if (!raw) return [];
|
13 |
let boxes = [];
|
@@ -22,7 +31,6 @@ function normalizeBoxes(raw: any): { label: string, bbox_2d: number[] }[] {
|
|
22 |
.map((obj: any) => {
|
23 |
if (!obj || !obj.bbox_2d) return null;
|
24 |
let bbox = obj.bbox_2d;
|
25 |
-
// If bbox_2d is [[x1, y1], [x2, y2]], convert to [x1, y1, x2, y2]
|
26 |
if (
|
27 |
Array.isArray(bbox) &&
|
28 |
bbox.length === 2 &&
|
@@ -33,7 +41,6 @@ function normalizeBoxes(raw: any): { label: string, bbox_2d: number[] }[] {
|
|
33 |
) {
|
34 |
bbox = [bbox[0][0], bbox[0][1], bbox[1][0], bbox[1][1]];
|
35 |
}
|
36 |
-
// If bbox_2d is [x1, y1, x2, y2], use as-is
|
37 |
if (
|
38 |
Array.isArray(bbox) &&
|
39 |
bbox.length === 4 &&
|
@@ -41,7 +48,6 @@ function normalizeBoxes(raw: any): { label: string, bbox_2d: number[] }[] {
|
|
41 |
) {
|
42 |
return { ...obj, bbox_2d: bbox };
|
43 |
}
|
44 |
-
// Otherwise, skip
|
45 |
return null;
|
46 |
})
|
47 |
.filter((obj: any) => obj);
|
@@ -54,329 +60,365 @@ function isVideoFile(file: File) {
|
|
54 |
return file.type.startsWith("video/");
|
55 |
}
|
56 |
|
57 |
-
// Utility to get ImageData from a video or image element
|
58 |
-
function getImageDataFromElement(media: HTMLVideoElement | HTMLImageElement): ImageData | null {
|
59 |
-
const canvas = document.createElement("canvas");
|
60 |
-
let width = 0, height = 0;
|
61 |
-
if (media instanceof HTMLVideoElement) {
|
62 |
-
width = media.videoWidth;
|
63 |
-
height = media.videoHeight;
|
64 |
-
} else if (media instanceof HTMLImageElement) {
|
65 |
-
width = media.naturalWidth;
|
66 |
-
height = media.naturalHeight;
|
67 |
-
} else {
|
68 |
-
return null;
|
69 |
-
}
|
70 |
-
canvas.width = width;
|
71 |
-
canvas.height = height;
|
72 |
-
const ctx = canvas.getContext("2d");
|
73 |
-
if (!ctx) return null;
|
74 |
-
ctx.drawImage(media, 0, 0, width, height);
|
75 |
-
return ctx.getImageData(0, 0, width, height);
|
76 |
-
}
|
77 |
-
|
78 |
export default function MultiSourceCaptioningView() {
|
79 |
const [mode, setMode] = useState<Mode>("File");
|
80 |
-
const [
|
81 |
-
const [inputUrl, setInputUrl] = useState<string>(EXAMPLE_VIDEO_URL);
|
82 |
const [prompt, setPrompt] = useState<string>(EXAMPLE_PROMPT);
|
83 |
-
const [
|
84 |
const [error, setError] = useState<string | null>(null);
|
85 |
-
const [
|
86 |
-
const [
|
87 |
-
const [uploadedUrl, setUploadedUrl] = useState<string>("");
|
88 |
-
const [videoProcessing, setVideoProcessing] = useState(false);
|
89 |
-
const [imageProcessed, setImageProcessed] = useState(false);
|
90 |
-
const [exampleProcessing, setExampleProcessing] = useState(false);
|
91 |
-
const [urlProcessing, setUrlProcessing] = useState(false);
|
92 |
-
const [debugOutput, setDebugOutput] = useState<string>("");
|
93 |
-
const [canvasDims, setCanvasDims] = useState<{w:number,h:number}|null>(null);
|
94 |
-
const [videoDims, setVideoDims] = useState<{w:number,h:number}|null>(null);
|
95 |
const [inferenceStatus, setInferenceStatus] = useState<string>("");
|
96 |
-
const
|
97 |
-
|
|
|
|
|
|
|
|
|
|
|
98 |
|
99 |
-
const videoRef = useRef<HTMLVideoElement | null>(null);
|
100 |
-
const canvasRef = useRef<HTMLCanvasElement | null>(null);
|
101 |
-
const imageRef = useRef<HTMLImageElement | null>(null);
|
102 |
-
const webcamStreamRef = useRef<MediaStream | null>(null);
|
103 |
const { isLoaded, isLoading, error: modelError, runInference } = useVLMContext();
|
104 |
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
|
|
|
|
|
|
111 |
}
|
112 |
-
return () => {
|
113 |
-
inferenceWorkerRef.current?.terminate();
|
114 |
-
inferenceWorkerRef.current = null;
|
115 |
-
};
|
116 |
-
}, [useWorker]);
|
117 |
-
|
118 |
-
// Helper to run inference in worker
|
119 |
-
const runInferenceInWorker = (media: HTMLVideoElement | HTMLImageElement, prompt: string) => {
|
120 |
-
return new Promise((resolve, reject) => {
|
121 |
-
if (!inferenceWorkerRef.current) return reject('No worker');
|
122 |
-
const imageData = getImageDataFromElement(media);
|
123 |
-
if (!imageData) return reject('Could not get image data');
|
124 |
-
inferenceWorkerRef.current.onmessage = (event) => resolve(event.data);
|
125 |
-
inferenceWorkerRef.current.onerror = (err) => reject(err);
|
126 |
-
inferenceWorkerRef.current.postMessage({ imageData, prompt });
|
127 |
-
});
|
128 |
-
};
|
129 |
|
130 |
-
|
131 |
-
if (
|
132 |
-
|
133 |
-
|
134 |
-
if (video.paused || video.ended || video.videoWidth === 0) return;
|
135 |
-
canvas.width = video.videoWidth;
|
136 |
-
canvas.height = video.videoHeight;
|
137 |
-
const ctx = canvas.getContext("2d");
|
138 |
-
if (!ctx) return;
|
139 |
-
ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
|
140 |
-
if (useWorker && inferenceWorkerRef.current) {
|
141 |
-
try {
|
142 |
-
const output = await runInferenceInWorker(video, prompt);
|
143 |
-
setDebugOutput(JSON.stringify(output, null, 2));
|
144 |
-
let boxes = normalizeBoxes(output);
|
145 |
-
if (boxes.length === 0) setInferenceStatus("No boxes detected or model output invalid.");
|
146 |
-
if (Array.isArray(boxes) && boxes.length > 0) {
|
147 |
-
const scaleX = canvas.width / video.videoWidth;
|
148 |
-
const scaleY = canvas.height / video.videoHeight;
|
149 |
-
ctx.clearRect(0, 0, canvas.width, canvas.height);
|
150 |
-
drawBoundingBoxesOnCanvas(ctx, boxes, { color: "#FF00FF", lineWidth: 4, font: "20px Arial", scaleX, scaleY });
|
151 |
-
}
|
152 |
-
} catch (err) {
|
153 |
-
setInferenceStatus("Worker inference failed, falling back to main thread.");
|
154 |
-
// fallback to main-thread inference
|
155 |
-
await runInference(video, prompt, (output: string) => {
|
156 |
-
setDebugOutput(output);
|
157 |
-
let boxes = normalizeBoxes(extractJsonFromMarkdown(output) || []);
|
158 |
-
if (boxes.length === 0) setInferenceStatus("No boxes detected or model output invalid.");
|
159 |
-
if (Array.isArray(boxes) && boxes.length > 0) {
|
160 |
-
const scaleX = canvas.width / video.videoWidth;
|
161 |
-
const scaleY = canvas.height / video.videoHeight;
|
162 |
-
ctx.clearRect(0, 0, canvas.width, canvas.height);
|
163 |
-
drawBoundingBoxesOnCanvas(ctx, boxes, { color: "#FF00FF", lineWidth: 4, font: "20px Arial", scaleX, scaleY });
|
164 |
-
}
|
165 |
-
});
|
166 |
-
}
|
167 |
-
} else {
|
168 |
-
await runInference(video, prompt, (output: string) => {
|
169 |
-
setDebugOutput(output);
|
170 |
-
let boxes = normalizeBoxes(extractJsonFromMarkdown(output) || []);
|
171 |
-
if (boxes.length === 0) setInferenceStatus("No boxes detected or model output invalid.");
|
172 |
-
if (Array.isArray(boxes) && boxes.length > 0) {
|
173 |
-
const scaleX = canvas.width / video.videoWidth;
|
174 |
-
const scaleY = canvas.height / video.videoHeight;
|
175 |
-
ctx.clearRect(0, 0, canvas.width, canvas.height);
|
176 |
-
drawBoundingBoxesOnCanvas(ctx, boxes, { color: "#FF00FF", lineWidth: 4, font: "20px Arial", scaleX, scaleY });
|
177 |
-
}
|
178 |
-
});
|
179 |
}
|
180 |
-
};
|
181 |
|
182 |
-
|
183 |
-
|
184 |
-
setUploadedFile(file);
|
185 |
-
setUploadedUrl(file ? URL.createObjectURL(file) : "");
|
186 |
-
setError(null);
|
187 |
-
setImageProcessed(false);
|
188 |
-
setVideoProcessing(false);
|
189 |
-
setExampleProcessing(false);
|
190 |
-
};
|
191 |
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
return;
|
201 |
}
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
|
|
|
|
210 |
}
|
211 |
-
}
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
setupWebcam();
|
217 |
-
return () => {
|
218 |
-
if (webcamStreamRef.current) {
|
219 |
-
webcamStreamRef.current.getTracks().forEach((track: MediaStreamTrack) => track.stop());
|
220 |
-
webcamStreamRef.current = null;
|
221 |
}
|
222 |
-
|
223 |
-
|
224 |
-
|
|
|
|
|
225 |
|
226 |
-
//
|
|
|
227 |
useEffect(() => {
|
228 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
229 |
let interval: ReturnType<typeof setInterval> | null = null;
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
};
|
236 |
-
}, [mode, isLoaded, prompt, runInference, webcamActive]);
|
237 |
|
238 |
-
|
239 |
-
useEffect(() => {
|
240 |
-
if (mode !== "URL" || !isLoaded || !urlProcessing) return;
|
241 |
-
let interval: ReturnType<typeof setInterval> | null = null;
|
242 |
-
interval = setInterval(() => {
|
243 |
-
processVideoFrame();
|
244 |
-
}, 1000);
|
245 |
-
return () => {
|
246 |
if (interval) clearInterval(interval);
|
|
|
|
|
|
|
247 |
};
|
248 |
-
}, [mode, isLoaded, prompt, runInference, urlProcessing]);
|
249 |
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
|
|
|
|
|
|
|
|
257 |
return () => {
|
258 |
-
|
|
|
|
|
|
|
259 |
};
|
260 |
-
}, [mode, isLoaded, prompt, runInference,
|
261 |
|
262 |
-
//
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
263 |
useEffect(() => {
|
264 |
-
|
265 |
-
let interval: ReturnType<typeof setInterval> | null = null;
|
266 |
-
interval = setInterval(() => {
|
267 |
-
processVideoFrame();
|
268 |
-
}, 1000);
|
269 |
-
return () => {
|
270 |
-
if (interval) clearInterval(interval);
|
271 |
-
};
|
272 |
-
}, [mode, isLoaded, prompt, runInference, uploadedFile, exampleProcessing]);
|
273 |
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
setProcessing(true);
|
287 |
setError(null);
|
288 |
-
setInferenceStatus("
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
302 |
}
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
//
|
307 |
-
await runInference(img, prompt, (output: string) => {
|
308 |
-
setDebugOutput(output);
|
309 |
-
setInferenceStatus("Inference complete.");
|
310 |
-
ctx.drawImage(img, 0, 0, canvas.width, canvas.height);
|
311 |
-
let boxes = normalizeBoxes(extractJsonFromMarkdown(output) || []);
|
312 |
-
if (boxes.length === 0) setInferenceStatus("No boxes detected or model output invalid.");
|
313 |
-
if (Array.isArray(boxes) && boxes.length > 0) {
|
314 |
-
const scaleX = canvas.width / img.naturalWidth;
|
315 |
-
const scaleY = canvas.height / img.naturalHeight;
|
316 |
-
ctx.clearRect(0, 0, canvas.width, canvas.height);
|
317 |
-
drawBoundingBoxesOnCanvas(ctx, boxes, { color: "#FF00FF", lineWidth: 4, font: "20px Arial", scaleX, scaleY });
|
318 |
-
}
|
319 |
-
setImageProcessed(true);
|
320 |
-
});
|
321 |
}
|
322 |
} else {
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
ctx.clearRect(0, 0, canvas.width, canvas.height);
|
333 |
-
drawBoundingBoxesOnCanvas(ctx, boxes, { color: "#FF00FF", lineWidth: 4, font: "20px Arial", scaleX, scaleY });
|
334 |
}
|
335 |
-
|
336 |
-
});
|
337 |
}
|
338 |
-
|
339 |
-
};
|
340 |
|
341 |
-
// File mode: process uploaded video frames (start/stop)
|
342 |
-
const handleToggleVideoProcessing = () => {
|
343 |
-
setVideoProcessing((prev) => !prev);
|
344 |
-
};
|
345 |
-
|
346 |
-
// Handle start/stop for example video processing
|
347 |
-
const handleToggleExampleProcessing = () => {
|
348 |
-
setExampleProcessing((prev) => !prev);
|
349 |
-
};
|
350 |
|
351 |
-
//
|
352 |
-
const
|
353 |
-
|
354 |
-
|
|
|
|
|
355 |
|
356 |
-
|
357 |
-
const handleTestDrawBox = () => {
|
358 |
-
if (!canvasRef.current) return;
|
359 |
const canvas = canvasRef.current;
|
360 |
const ctx = canvas.getContext("2d");
|
361 |
if (!ctx) return;
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
369 |
};
|
370 |
|
|
|
371 |
return (
|
372 |
-
<div className="absolute inset-0 text-white">
|
373 |
<div className="fixed top-0 left-0 w-full bg-gray-900 text-white text-center py-2 z-50">
|
374 |
{isLoading ? "Loading model..." : isLoaded ? "Model loaded" : modelError ? `Model error: ${modelError}` : "Model not loaded"}
|
375 |
</div>
|
376 |
-
<div className="text-center text-sm text-blue-300 mt-
|
377 |
-
|
|
|
378 |
{/* Mode Selector */}
|
379 |
-
<div className="mb-6">
|
380 |
<div className="flex space-x-4">
|
381 |
{MODES.map((m) => (
|
382 |
<button
|
@@ -385,6 +427,7 @@ export default function MultiSourceCaptioningView() {
|
|
385 |
mode === m ? "bg-blue-600 text-white" : "bg-gray-700 text-gray-300 hover:bg-blue-500"
|
386 |
}`}
|
387 |
onClick={() => setMode(m)}
|
|
|
388 |
>
|
389 |
{m}
|
390 |
</button>
|
@@ -392,212 +435,130 @@ export default function MultiSourceCaptioningView() {
|
|
392 |
</div>
|
393 |
</div>
|
394 |
|
395 |
-
{/*
|
396 |
-
<div className="w-full max-w-
|
397 |
-
{
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
className="w-full p-2 rounded-lg text-black"
|
449 |
-
rows={3}
|
450 |
-
value={prompt}
|
451 |
-
onChange={(e) => setPrompt(e.target.value)}
|
452 |
-
/>
|
453 |
-
</div>
|
454 |
-
<div className="relative w-full max-w-xl">
|
455 |
-
<video
|
456 |
-
ref={videoRef}
|
457 |
-
src={videoUrl}
|
458 |
-
controls
|
459 |
-
autoPlay
|
460 |
-
loop
|
461 |
-
className="w-full rounded-lg shadow-lg mb-2"
|
462 |
-
style={{ background: "#222" }}
|
463 |
-
/>
|
464 |
-
<canvas
|
465 |
-
ref={canvasRef}
|
466 |
-
className="absolute top-0 left-0 w-full h-full pointer-events-none"
|
467 |
-
style={{ zIndex: 10, pointerEvents: "none" }}
|
468 |
-
/>
|
469 |
-
<button
|
470 |
-
className="mt-4 px-6 py-2 rounded-lg bg-blue-600 text-white font-semibold"
|
471 |
-
onClick={handleToggleUrlProcessing}
|
472 |
-
>
|
473 |
-
{urlProcessing ? "Stop Processing" : "Start Processing"}
|
474 |
-
</button>
|
475 |
-
</div>
|
476 |
-
{processing && <div className="text-blue-400 mt-2">Processing frame...</div>}
|
477 |
-
{error && <div className="text-red-400 mt-2">Error: {error}</div>}
|
478 |
<button
|
479 |
-
className="
|
480 |
-
onClick={
|
|
|
481 |
>
|
482 |
-
|
483 |
</button>
|
484 |
-
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
|
490 |
-
|
491 |
-
|
492 |
-
|
493 |
-
|
494 |
-
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
|
499 |
-
|
500 |
-
|
501 |
-
|
502 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
503 |
<input
|
504 |
type="file"
|
505 |
accept="image/*,video/*"
|
506 |
onChange={handleFileChange}
|
507 |
-
className="block w-full text-sm text-gray-300 file:mr-4 file:py-2 file:px-4 file:rounded-lg file:border-0 file:text-sm file:font-semibold file:bg-blue-600 file:text-white hover:file:bg-blue-700"
|
|
|
508 |
/>
|
509 |
-
|
510 |
-
{/* Show uploaded image */}
|
511 |
-
{uploadedFile && isImageFile(uploadedFile) && (
|
512 |
-
<div className="relative w-full max-w-xl">
|
513 |
-
<img
|
514 |
-
ref={imageRef}
|
515 |
-
src={uploadedUrl}
|
516 |
-
alt="Uploaded"
|
517 |
-
className="w-full rounded-lg shadow-lg mb-2"
|
518 |
-
style={{ background: "#222" }}
|
519 |
-
/>
|
520 |
-
<canvas
|
521 |
-
ref={canvasRef}
|
522 |
-
className="absolute top-0 left-0 w-full h-full pointer-events-none"
|
523 |
-
style={{ zIndex: 10, pointerEvents: "none" }}
|
524 |
-
/>
|
525 |
<button
|
526 |
-
className="mt-
|
527 |
onClick={handleProcessImage}
|
528 |
-
disabled={
|
529 |
>
|
530 |
-
{
|
531 |
</button>
|
532 |
-
|
533 |
-
|
534 |
-
|
535 |
-
|
536 |
-
|
537 |
-
|
538 |
-
|
539 |
-
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
-
className="w-full rounded-lg shadow-lg mb-2"
|
544 |
-
style={{ background: "#222" }}
|
545 |
-
/>
|
546 |
-
<canvas
|
547 |
-
ref={canvasRef}
|
548 |
-
className="absolute top-0 left-0 w-full h-full pointer-events-none"
|
549 |
-
style={{ zIndex: 10, pointerEvents: "none" }}
|
550 |
-
/>
|
551 |
-
<button
|
552 |
-
className="mt-4 px-6 py-2 rounded-lg bg-blue-600 text-white font-semibold"
|
553 |
-
onClick={handleToggleVideoProcessing}
|
554 |
-
>
|
555 |
-
{videoProcessing ? "Stop Processing" : "Start Processing"}
|
556 |
-
</button>
|
557 |
-
</div>
|
558 |
-
)}
|
559 |
-
{/* Show example video if no file uploaded */}
|
560 |
-
{!uploadedFile && (
|
561 |
-
<div className="relative w-full max-w-xl">
|
562 |
-
<video
|
563 |
-
ref={videoRef}
|
564 |
-
src={EXAMPLE_VIDEO_URL}
|
565 |
-
controls
|
566 |
-
autoPlay
|
567 |
-
loop
|
568 |
-
className="w-full rounded-lg shadow-lg mb-2"
|
569 |
-
style={{ background: "#222" }}
|
570 |
-
/>
|
571 |
-
<canvas
|
572 |
-
ref={canvasRef}
|
573 |
-
className="absolute top-0 left-0 w-full h-full pointer-events-none"
|
574 |
-
style={{ zIndex: 10, pointerEvents: "none" }}
|
575 |
-
/>
|
576 |
-
<button
|
577 |
-
className="mt-4 px-6 py-2 rounded-lg bg-blue-600 text-white font-semibold"
|
578 |
-
onClick={handleToggleExampleProcessing}
|
579 |
-
>
|
580 |
-
{exampleProcessing ? "Stop Processing" : "Start Processing"}
|
581 |
-
</button>
|
582 |
-
</div>
|
583 |
-
)}
|
584 |
-
{processing && <div className="text-blue-400 mt-2">Processing frame...</div>}
|
585 |
-
{error && <div className="text-red-400 mt-2">Error: {error}</div>}
|
586 |
-
<button
|
587 |
-
className="mt-4 px-6 py-2 rounded-lg bg-gray-600 text-white font-semibold"
|
588 |
-
onClick={handleTestDrawBox}
|
589 |
-
>
|
590 |
-
Test Draw Box
|
591 |
-
</button>
|
592 |
-
<div className="mt-2 p-2 bg-gray-800 rounded text-xs">
|
593 |
-
<div>Canvas: {canvasDims ? `${canvasDims.w}x${canvasDims.h}` : "-"} | Video: {videoDims ? `${videoDims.w}x${videoDims.h}` : "-"}</div>
|
594 |
-
<div>Raw Model Output:</div>
|
595 |
-
<pre className="overflow-x-auto max-h-32 whitespace-pre-wrap">{debugOutput}</pre>
|
596 |
-
</div>
|
597 |
-
</div>
|
598 |
-
)}
|
599 |
</div>
|
600 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
601 |
</div>
|
602 |
);
|
603 |
-
}
|
|
|
1 |
+
import React, { useState, useRef, useEffect, useCallback } from "react";
|
2 |
import { useVLMContext } from "../context/useVLMContext";
|
3 |
import { extractJsonFromMarkdown, drawBoundingBoxesOnCanvas } from "./BoxAnnotator";
|
4 |
|
5 |
const MODES = ["Webcam", "URL", "File"] as const;
|
6 |
type Mode = typeof MODES[number];
|
7 |
|
8 |
+
const EXAMPLE_VIDEO_URL = "/videos/1.mp4"; // Ensure this path is correct
|
9 |
const EXAMPLE_PROMPT = "Detect all people in the image. For each person, output a JSON array of objects with fields: 'label' (string) and 'bbox_2d' ([x1, y1, x2, y2]) where coordinates are in pixel values. Example: [{\"label\": \"person\", \"bbox_2d\": [100, 50, 200, 300]}]";
|
10 |
|
11 |
+
// Helper functions (remain the same)
|
12 |
+
function parseFlatBoxArray(arr: any[]): { label: string, bbox_2d: number[] }[] {
|
13 |
+
if (typeof arr[0] === "string" && Array.isArray(arr[1])) {
|
14 |
+
const label = arr[0];
|
15 |
+
return arr.slice(1).map(bbox => ({ label, bbox_2d: bbox }));
|
16 |
+
}
|
17 |
+
return [];
|
18 |
+
}
|
19 |
+
|
20 |
function normalizeBoxes(raw: any): { label: string, bbox_2d: number[] }[] {
|
21 |
if (!raw) return [];
|
22 |
let boxes = [];
|
|
|
31 |
.map((obj: any) => {
|
32 |
if (!obj || !obj.bbox_2d) return null;
|
33 |
let bbox = obj.bbox_2d;
|
|
|
34 |
if (
|
35 |
Array.isArray(bbox) &&
|
36 |
bbox.length === 2 &&
|
|
|
41 |
) {
|
42 |
bbox = [bbox[0][0], bbox[0][1], bbox[1][0], bbox[1][1]];
|
43 |
}
|
|
|
44 |
if (
|
45 |
Array.isArray(bbox) &&
|
46 |
bbox.length === 4 &&
|
|
|
48 |
) {
|
49 |
return { ...obj, bbox_2d: bbox };
|
50 |
}
|
|
|
51 |
return null;
|
52 |
})
|
53 |
.filter((obj: any) => obj);
|
|
|
60 |
return file.type.startsWith("video/");
|
61 |
}
|
62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
export default function MultiSourceCaptioningView() {
|
64 |
const [mode, setMode] = useState<Mode>("File");
|
65 |
+
const [currentUrlInput, setCurrentUrlInput] = useState<string>(EXAMPLE_VIDEO_URL);
|
|
|
66 |
const [prompt, setPrompt] = useState<string>(EXAMPLE_PROMPT);
|
67 |
+
const [processingState, setProcessingState] = useState(false); // General processing indicator
|
68 |
const [error, setError] = useState<string | null>(null);
|
69 |
+
const [mediaStream, setMediaStream] = useState<MediaStream | null>(null); // For webcam stream
|
70 |
+
const [latestBoxes, setLatestBoxes] = useState<any[]>([]); // State for boxes to draw
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
const [inferenceStatus, setInferenceStatus] = useState<string>("");
|
72 |
+
const [debugOutput, setDebugOutput] = useState<string>("");
|
73 |
+
|
74 |
+
// Refs for the two video elements and the canvas
|
75 |
+
const displayVideoRef = useRef<HTMLVideoElement>(null); // The visible video
|
76 |
+
const vlmVideoRef = useRef<HTMLVideoElement>(null); // The hidden video for VLM processing
|
77 |
+
const canvasRef = useRef<HTMLCanvasElement>(null); // The canvas overlay for drawing boxes
|
78 |
+
const imageRef = useRef<HTMLImageElement>(null); // For image file processing
|
79 |
|
|
|
|
|
|
|
|
|
80 |
const { isLoaded, isLoading, error: modelError, runInference } = useVLMContext();
|
81 |
|
82 |
+
// --- Drawing Loop for the Visible Display ---
|
83 |
+
// This loop runs constantly to draw the latest boxes on the display video
|
84 |
+
const drawDisplayCanvas = useCallback(() => {
|
85 |
+
const displayVideo = displayVideoRef.current;
|
86 |
+
const canvas = canvasRef.current;
|
87 |
+
const ctx = canvas?.getContext('2d');
|
88 |
+
|
89 |
+
if (!displayVideo || !canvas || !ctx) {
|
90 |
+
return;
|
91 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
+
// Adjust canvas size to match the display video's dimensions
|
94 |
+
if (canvas.width !== displayVideo.videoWidth || canvas.height !== displayVideo.videoHeight) {
|
95 |
+
canvas.width = displayVideo.videoWidth;
|
96 |
+
canvas.height = displayVideo.videoHeight;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
}
|
|
|
98 |
|
99 |
+
// Clear the canvas each frame
|
100 |
+
ctx.clearRect(0, 0, canvas.width, canvas.height);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
|
102 |
+
// Draw the latest bounding boxes
|
103 |
+
const scaleX = canvas.width / (displayVideo.videoWidth || 1); // Avoid division by zero
|
104 |
+
const scaleY = canvas.height / (displayVideo.videoHeight || 1);
|
105 |
+
drawBoundingBoxesOnCanvas(ctx, latestBoxes, { color: "#FF00FF", lineWidth: 4, font: "20px Arial", scaleX, scaleY });
|
106 |
+
|
107 |
+
// Only request next frame if video is playing to avoid unnecessary redraws when paused/ended
|
108 |
+
if (!displayVideo.paused && !displayVideo.ended) {
|
109 |
+
requestAnimationFrame(drawDisplayCanvas);
|
|
|
110 |
}
|
111 |
+
}, [latestBoxes]); // Re-create if latestBoxes changes
|
112 |
+
|
113 |
+
// Effect to start the display drawing loop when the display video is ready
|
114 |
+
useEffect(() => {
|
115 |
+
const displayVideo = displayVideoRef.current;
|
116 |
+
if (displayVideo) {
|
117 |
+
const handleVideoReady = () => {
|
118 |
+
// Start the requestAnimationFrame loop once the video has loaded metadata
|
119 |
+
if (displayVideo.readyState >= 1) { // HAVE_METADATA
|
120 |
+
requestAnimationFrame(drawDisplayCanvas);
|
121 |
}
|
122 |
+
};
|
123 |
+
displayVideo.addEventListener('loadedmetadata', handleVideoReady);
|
124 |
+
// Also check if video is already ready (e.g., on component re-mount)
|
125 |
+
if (displayVideo.readyState >= 1) {
|
126 |
+
requestAnimationFrame(drawDisplayCanvas);
|
|
|
|
|
|
|
|
|
|
|
127 |
}
|
128 |
+
return () => {
|
129 |
+
displayVideo.removeEventListener('loadedmetadata', handleVideoReady);
|
130 |
+
};
|
131 |
+
}
|
132 |
+
}, [drawDisplayCanvas]);
|
133 |
|
134 |
+
// --- FastVLM Processing Loop (from hidden video/image) ---
|
135 |
+
// This interval loop controls when FastVLM processes a frame
|
136 |
useEffect(() => {
|
137 |
+
const vlmVideo = vlmVideoRef.current;
|
138 |
+
const isVideoMode = (mode === "Webcam" || (mode === "URL" && vlmVideo?.src) || (mode === "File" && vlmVideo?.src && isVideoFile(uploadedFile || null)));
|
139 |
+
|
140 |
+
if (!isLoaded || !vlmVideo || !isVideoMode) {
|
141 |
+
// If not in a video mode or VLM/video not ready, ensure processing stops
|
142 |
+
setProcessingState(false);
|
143 |
+
return;
|
144 |
+
}
|
145 |
+
|
146 |
let interval: ReturnType<typeof setInterval> | null = null;
|
147 |
+
|
148 |
+
const startVLMProcessing = () => {
|
149 |
+
if (interval) clearInterval(interval); // Clear any old interval
|
150 |
+
|
151 |
+
interval = setInterval(async () => {
|
152 |
+
if (!vlmVideo || vlmVideo.paused || vlmVideo.ended || vlmVideo.videoWidth === 0 || processingState) {
|
153 |
+
return; // Skip if video not ready, paused, ended, or already processing
|
154 |
+
}
|
155 |
+
|
156 |
+
setProcessingState(true);
|
157 |
+
setInferenceStatus("Running inference...");
|
158 |
+
setError(null); // Clear previous errors
|
159 |
+
|
160 |
+
try {
|
161 |
+
// Create a temporary offscreen canvas to get image data from the VLM video
|
162 |
+
const tempCanvas = document.createElement('canvas');
|
163 |
+
tempCanvas.width = vlmVideo.videoWidth;
|
164 |
+
tempCanvas.height = vlmVideo.videoHeight;
|
165 |
+
const tempCtx = tempCanvas.getContext('2d', { willReadFrequently: true });
|
166 |
+
|
167 |
+
if (tempCtx && vlmVideo.readyState >= 2) { // HAVE_CURRENT_DATA
|
168 |
+
tempCtx.drawImage(vlmVideo, 0, 0, tempCanvas.width, tempCanvas.height);
|
169 |
+
const imageData = tempCtx.getImageData(0, 0, tempCanvas.width, tempCanvas.height);
|
170 |
+
|
171 |
+
const modelOutput = await runInference(imageData, prompt); // Pass ImageData
|
172 |
+
setDebugOutput(modelOutput); // Update raw model output
|
173 |
+
|
174 |
+
let boxes = extractJsonFromMarkdown(modelOutput) || [];
|
175 |
+
if (boxes.length === 0 && Array.isArray(modelOutput)) { // Fallback for direct array output
|
176 |
+
// This condition `Array.isArray(modelOutput)` is unlikely if modelOutput is string,
|
177 |
+
// so ensure `extractJsonFromMarkdown` is robust or `runInference` returns expected string
|
178 |
+
}
|
179 |
+
boxes = normalizeBoxes(boxes);
|
180 |
+
|
181 |
+
setLatestBoxes(boxes); // Update state, triggers display canvas redraw
|
182 |
+
setInferenceStatus(boxes.length > 0 ? "Inference complete. Boxes detected." : "Inference complete. No boxes detected.");
|
183 |
+
} else {
|
184 |
+
setInferenceStatus("Video not ready for processing.");
|
185 |
+
}
|
186 |
+
} catch (e) {
|
187 |
+
setError("Inference error: " + (e instanceof Error ? e.message : String(e)));
|
188 |
+
setLatestBoxes([]);
|
189 |
+
setInferenceStatus("Inference failed.");
|
190 |
+
} finally {
|
191 |
+
setProcessingState(false); // Processing finished
|
192 |
+
}
|
193 |
+
}, 200); // Inference interval (e.g., 5 frames per second)
|
194 |
};
|
|
|
195 |
|
196 |
+
const stopVLMProcessing = () => {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
if (interval) clearInterval(interval);
|
198 |
+
interval = null;
|
199 |
+
setProcessingState(false);
|
200 |
+
setInferenceStatus("Stopped processing.");
|
201 |
};
|
|
|
202 |
|
203 |
+
// Start/stop processing based on video playback events
|
204 |
+
vlmVideo.addEventListener('play', startVLMProcessing);
|
205 |
+
vlmVideo.addEventListener('pause', stopVLMProcessing);
|
206 |
+
vlmVideo.addEventListener('ended', stopVLMProcessing);
|
207 |
+
|
208 |
+
// Initial check if video is already playing (e.g., after initial load/autoplay)
|
209 |
+
if (vlmVideo.readyState >= 2 && !vlmVideo.paused && !vlmVideo.ended) {
|
210 |
+
startVLMProcessing();
|
211 |
+
}
|
212 |
+
|
213 |
+
// Cleanup function for useEffect
|
214 |
return () => {
|
215 |
+
stopVLMProcessing();
|
216 |
+
vlmVideo.removeEventListener('play', startVLMProcessing);
|
217 |
+
vlmVideo.removeEventListener('pause', stopVLMProcessing);
|
218 |
+
vlmVideo.removeEventListener('ended', stopVLMProcessing);
|
219 |
};
|
220 |
+
}, [mode, isLoaded, prompt, runInference, processingState, uploadedFile]); // Added uploadedFile for file mode re-trigger
|
221 |
|
222 |
+
// --- Media Source Handling ---
|
223 |
+
|
224 |
+
// Cleanup for media stream and object URLs
|
225 |
+
const cleanupMediaSource = useCallback(() => {
|
226 |
+
if (mediaStream) {
|
227 |
+
mediaStream.getTracks().forEach(track => track.stop());
|
228 |
+
setMediaStream(null);
|
229 |
+
}
|
230 |
+
// Revoke any created blob URLs (for file inputs)
|
231 |
+
if (displayVideoRef.current?.src.startsWith('blob:')) {
|
232 |
+
URL.revokeObjectURL(displayVideoRef.current.src);
|
233 |
+
displayVideoRef.current.src = "";
|
234 |
+
}
|
235 |
+
if (vlmVideoRef.current?.src.startsWith('blob:')) {
|
236 |
+
URL.revokeObjectURL(vlmVideoRef.current.src);
|
237 |
+
vlmVideoRef.current.src = "";
|
238 |
+
}
|
239 |
+
setLatestBoxes([]); // Clear boxes when source changes
|
240 |
+
setError(null);
|
241 |
+
setInferenceStatus("");
|
242 |
+
setDebugOutput("");
|
243 |
+
}, [mediaStream]);
|
244 |
+
|
245 |
+
// Handle changing the mode (Webcam, URL, File)
|
246 |
useEffect(() => {
|
247 |
+
cleanupMediaSource(); // Clean up previous source
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
248 |
|
249 |
+
const displayVideo = displayVideoRef.current;
|
250 |
+
const vlmVideo = vlmVideoRef.current;
|
251 |
+
|
252 |
+
if (!displayVideo || !vlmVideo) return;
|
253 |
+
|
254 |
+
// Reset srcObject/src to ensure fresh start
|
255 |
+
displayVideo.srcObject = null;
|
256 |
+
vlmVideo.srcObject = null;
|
257 |
+
displayVideo.src = "";
|
258 |
+
vlmVideo.src = "";
|
259 |
+
|
260 |
+
setLatestBoxes([]); // Clear boxes on mode change
|
|
|
261 |
setError(null);
|
262 |
+
setInferenceStatus("");
|
263 |
+
setDebugOutput("");
|
264 |
+
|
265 |
+
// Special handling for initial file mode to load example video
|
266 |
+
if (mode === "File" && !uploadedFile) {
|
267 |
+
displayVideo.src = EXAMPLE_VIDEO_URL;
|
268 |
+
vlmVideo.src = EXAMPLE_VIDEO_URL;
|
269 |
+
displayVideo.load(); vlmVideo.load(); // Load the video
|
270 |
+
displayVideo.play().catch(e => console.error("Error playing example display video:", e));
|
271 |
+
vlmVideo.play().catch(e => console.error("Error playing example VLM video:", e));
|
272 |
+
}
|
273 |
+
}, [mode, uploadedFile, cleanupMediaSource]); // Added uploadedFile to ensure re-trigger for file mode
|
274 |
+
|
275 |
+
// Handle Webcam Input
|
276 |
+
const handleWebcamInput = useCallback(async () => {
|
277 |
+
cleanupMediaSource(); // Clean up any active stream
|
278 |
+
try {
|
279 |
+
const stream = await navigator.mediaDevices.getUserMedia({ video: true });
|
280 |
+
setMediaStream(stream); // Store stream to manage it
|
281 |
+
|
282 |
+
if (displayVideoRef.current && vlmVideoRef.current) {
|
283 |
+
displayVideoRef.current.srcObject = stream;
|
284 |
+
vlmVideoRef.current.srcObject = stream;
|
285 |
+
// Programmatically play both videos
|
286 |
+
displayVideoRef.current.play().catch(e => console.error("Error playing display video:", e));
|
287 |
+
vlmVideoRef.current.play().catch(e => console.error("Error playing VLM video:", e));
|
288 |
+
}
|
289 |
+
setMode("Webcam");
|
290 |
+
} catch (e) {
|
291 |
+
setError("Could not access webcam: " + (e instanceof Error ? e.message : String(e)));
|
292 |
+
setMediaStream(null);
|
293 |
+
setLatestBoxes([]);
|
294 |
+
setInferenceStatus("Webcam access denied or failed.");
|
295 |
+
}
|
296 |
+
}, [cleanupMediaSource]);
|
297 |
+
|
298 |
+
// Handle URL Input (when Load button is clicked)
|
299 |
+
const handleLoadUrl = useCallback(() => {
|
300 |
+
cleanupMediaSource(); // Clean up any active stream
|
301 |
+
|
302 |
+
const url = currentUrlInput;
|
303 |
+
if (!url) {
|
304 |
+
setError("Please enter a valid URL.");
|
305 |
+
return;
|
306 |
+
}
|
307 |
+
|
308 |
+
if (displayVideoRef.current && vlmVideoRef.current) {
|
309 |
+
displayVideoRef.current.src = url;
|
310 |
+
vlmVideoRef.current.src = url;
|
311 |
+
displayVideoRef.current.load(); vlmVideoRef.current.load(); // Load the video
|
312 |
+
displayVideoRef.current.play().catch(e => console.error("Error playing display video:", e));
|
313 |
+
vlmVideoRef.current.play().catch(e => console.error("Error playing VLM video:", e));
|
314 |
+
setMode("URL");
|
315 |
+
}
|
316 |
+
}, [currentUrlInput, cleanupMediaSource]);
|
317 |
+
|
318 |
+
// Handle File Input
|
319 |
+
const handleFileChange = useCallback((e: React.ChangeEvent<HTMLInputElement>) => {
|
320 |
+
cleanupMediaSource(); // Clean up any active stream
|
321 |
+
|
322 |
+
const file = e.target.files?.[0] || null;
|
323 |
+
if (file) {
|
324 |
+
const fileUrl = URL.createObjectURL(file); // Create blob URL for the file
|
325 |
+
// Store the file to differentiate image/video and manage its URL
|
326 |
+
setUploadedFile(file);
|
327 |
+
|
328 |
+
if (isImageFile(file)) {
|
329 |
+
// For images, we handle processing on a button click, not a continuous loop
|
330 |
+
// The imageRef will display the image
|
331 |
+
// The canvas will be used for processing and drawing
|
332 |
+
setError(null);
|
333 |
+
setMode("File");
|
334 |
+
} else if (isVideoFile(file)) {
|
335 |
+
if (displayVideoRef.current && vlmVideoRef.current) {
|
336 |
+
displayVideoRef.current.src = fileUrl;
|
337 |
+
vlmVideoRef.current.src = fileUrl;
|
338 |
+
displayVideoRef.current.load(); vlmVideoRef.current.load();
|
339 |
+
displayVideoRef.current.play().catch(e => console.error("Error playing display video:", e));
|
340 |
+
vlmVideoRef.current.play().catch(e => console.error("Error playing VLM video:", e));
|
341 |
+
setMode("File");
|
342 |
}
|
343 |
+
} else {
|
344 |
+
setError("Unsupported file type. Please upload an image or video.");
|
345 |
+
setUploadedFile(null);
|
346 |
+
if (fileUrl) URL.revokeObjectURL(fileUrl); // Clean up invalid file URL
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
347 |
}
|
348 |
} else {
|
349 |
+
setUploadedFile(null); // Clear file if nothing selected
|
350 |
+
// If no file selected, revert to example video if in File mode
|
351 |
+
if (mode === "File") {
|
352 |
+
if (displayVideoRef.current && vlmVideoRef.current) {
|
353 |
+
displayVideoRef.current.src = EXAMPLE_VIDEO_URL;
|
354 |
+
vlmVideoRef.current.src = EXAMPLE_VIDEO_URL;
|
355 |
+
displayVideoRef.current.load(); vlmVideoRef.current.load();
|
356 |
+
displayVideoRef.current.play().catch(e => console.error("Error playing example display video:", e));
|
357 |
+
vlmVideoRef.current.play().catch(e => console.error("Error playing example VLM video:", e));
|
|
|
|
|
358 |
}
|
359 |
+
}
|
|
|
360 |
}
|
361 |
+
}, [cleanupMediaSource, mode]);
|
|
|
362 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
363 |
|
364 |
+
// Handler for processing an uploaded image file (one-time inference)
|
365 |
+
const handleProcessImage = async () => {
|
366 |
+
if (!isLoaded || !uploadedFile || !isImageFile(uploadedFile) || !imageRef.current || !canvasRef.current) {
|
367 |
+
setError("Image or model not ready for processing.");
|
368 |
+
return;
|
369 |
+
}
|
370 |
|
371 |
+
const img = imageRef.current;
|
|
|
|
|
372 |
const canvas = canvasRef.current;
|
373 |
const ctx = canvas.getContext("2d");
|
374 |
if (!ctx) return;
|
375 |
+
|
376 |
+
// Ensure canvas dimensions match image for processing and display
|
377 |
+
canvas.width = img.naturalWidth;
|
378 |
+
canvas.height = img.naturalHeight;
|
379 |
+
|
380 |
+
setProcessingState(true);
|
381 |
+
setError(null);
|
382 |
+
setInferenceStatus("Running image inference...");
|
383 |
+
|
384 |
+
try {
|
385 |
+
// Draw image to canvas to get ImageData for inference
|
386 |
+
ctx.drawImage(img, 0, 0, canvas.width, canvas.height);
|
387 |
+
const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
|
388 |
+
|
389 |
+
const modelOutput = await runInference(imageData, prompt);
|
390 |
+
setDebugOutput(modelOutput);
|
391 |
+
setInferenceStatus("Image inference complete.");
|
392 |
+
|
393 |
+
// Clear canvas and redraw image before drawing boxes
|
394 |
+
ctx.clearRect(0, 0, canvas.width, canvas.height);
|
395 |
+
ctx.drawImage(img, 0, 0, canvas.width, canvas.height);
|
396 |
+
|
397 |
+
let boxes = extractJsonFromMarkdown(modelOutput) || [];
|
398 |
+
boxes = normalizeBoxes(boxes);
|
399 |
+
setLatestBoxes(boxes); // Update latestBoxes for display
|
400 |
+
|
401 |
+
if (boxes.length === 0) setInferenceStatus("Image inference complete. No boxes detected.");
|
402 |
+
} catch (e) {
|
403 |
+
setError("Image inference error: " + (e instanceof Error ? e.message : String(e)));
|
404 |
+
setLatestBoxes([]);
|
405 |
+
setInferenceStatus("Image inference failed.");
|
406 |
+
} finally {
|
407 |
+
setProcessingState(false);
|
408 |
+
}
|
409 |
};
|
410 |
|
411 |
+
// --- Rendered UI ---
|
412 |
return (
|
413 |
+
<div className="absolute inset-0 text-white flex flex-col">
|
414 |
<div className="fixed top-0 left-0 w-full bg-gray-900 text-white text-center py-2 z-50">
|
415 |
{isLoading ? "Loading model..." : isLoaded ? "Model loaded" : modelError ? `Model error: ${modelError}` : "Model not loaded"}
|
416 |
</div>
|
417 |
+
<div className="text-center text-sm text-blue-300 mt-10">{inferenceStatus}</div> {/* Adjusted top margin */}
|
418 |
+
|
419 |
+
<div className="flex flex-col items-center justify-center flex-1 w-full p-4"> {/* Added padding */}
|
420 |
{/* Mode Selector */}
|
421 |
+
<div className="mb-6 mt-4"> {/* Increased margin-top for selector */}
|
422 |
<div className="flex space-x-4">
|
423 |
{MODES.map((m) => (
|
424 |
<button
|
|
|
427 |
mode === m ? "bg-blue-600 text-white" : "bg-gray-700 text-gray-300 hover:bg-blue-500"
|
428 |
}`}
|
429 |
onClick={() => setMode(m)}
|
430 |
+
disabled={!isLoaded && m !== "File"} // Disable if model not loaded, except for initial file view
|
431 |
>
|
432 |
{m}
|
433 |
</button>
|
|
|
435 |
</div>
|
436 |
</div>
|
437 |
|
438 |
+
{/* Dynamic Content Area */}
|
439 |
+
<div className="w-full max-w-4xl flex-1 flex flex-col items-center justify-center relative">
|
440 |
+
{/* Prompt Input (Common to all modes) */}
|
441 |
+
<div className="mb-4 w-full max-w-xl">
|
442 |
+
<label className="block text-left mb-2 font-medium">Detection Prompt:</label>
|
443 |
+
<textarea
|
444 |
+
className="w-full p-2 rounded-lg text-black"
|
445 |
+
rows={3}
|
446 |
+
value={prompt}
|
447 |
+
onChange={(e) => setPrompt(e.target.value)}
|
448 |
+
disabled={processingState}
|
449 |
+
/>
|
450 |
+
</div>
|
451 |
+
|
452 |
+
{/* Video/Image Display and Canvas Overlay */}
|
453 |
+
<div className="relative w-full" style={{ maxWidth: '1280px', aspectRatio: '16/9', backgroundColor: '#000', display: 'flex', justifyContent: 'center', alignItems: 'center' }}>
|
454 |
+
{/* Conditional rendering for image vs video display */}
|
455 |
+
{mode === "File" && uploadedFile && isImageFile(uploadedFile) ? (
|
456 |
+
<img
|
457 |
+
ref={imageRef}
|
458 |
+
src={URL.createObjectURL(uploadedFile)} // Use object URL for display
|
459 |
+
alt="Uploaded"
|
460 |
+
className="max-w-full max-h-full block object-contain"
|
461 |
+
style={{ position: 'absolute' }}
|
462 |
+
onLoad={() => {
|
463 |
+
// This is important to ensure canvas matches image size for single image processing
|
464 |
+
if (imageRef.current && canvasRef.current) {
|
465 |
+
canvasRef.current.width = imageRef.current.naturalWidth;
|
466 |
+
canvasRef.current.height = imageRef.current.naturalHeight;
|
467 |
+
}
|
468 |
+
}}
|
469 |
+
/>
|
470 |
+
) : (
|
471 |
+
<video
|
472 |
+
ref={displayVideoRef}
|
473 |
+
autoPlay
|
474 |
+
muted
|
475 |
+
playsInline
|
476 |
+
loop // Loop for URL and File videos
|
477 |
+
className="max-w-full max-h-full block object-contain"
|
478 |
+
style={{ position: 'absolute' }}
|
479 |
+
/>
|
480 |
+
)}
|
481 |
+
<canvas
|
482 |
+
ref={canvasRef}
|
483 |
+
className="absolute top-0 left-0 w-full h-full pointer-events-none"
|
484 |
+
style={{ zIndex: 10 }}
|
485 |
+
/>
|
486 |
+
</div>
|
487 |
+
|
488 |
+
{/* Controls specific to each mode */}
|
489 |
+
<div className="mt-4 flex flex-col items-center gap-2">
|
490 |
+
{mode === "Webcam" && (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
491 |
<button
|
492 |
+
className="px-6 py-2 rounded-lg bg-blue-600 text-white font-semibold hover:bg-blue-700 disabled:opacity-50"
|
493 |
+
onClick={handleWebcamInput} // This button sets up/starts webcam
|
494 |
+
disabled={processingState || !isLoaded}
|
495 |
>
|
496 |
+
{mediaStream ? "Restart Webcam" : "Start Webcam"} 📸
|
497 |
</button>
|
498 |
+
)}
|
499 |
+
|
500 |
+
{mode === "URL" && (
|
501 |
+
<>
|
502 |
+
<div className="flex w-full max-w-xl">
|
503 |
+
<input
|
504 |
+
type="text"
|
505 |
+
className="flex-1 px-4 py-2 rounded-l-lg text-black"
|
506 |
+
value={currentUrlInput}
|
507 |
+
onChange={(e) => setCurrentUrlInput(e.target.value)}
|
508 |
+
placeholder="Paste video URL here"
|
509 |
+
disabled={processingState}
|
510 |
+
/>
|
511 |
+
<button
|
512 |
+
className="px-4 py-2 rounded-r-lg bg-blue-600 text-white font-semibold hover:bg-blue-700 disabled:opacity-50"
|
513 |
+
onClick={handleLoadUrl}
|
514 |
+
disabled={processingState || !isLoaded}
|
515 |
+
>
|
516 |
+
Load URL
|
517 |
+
</button>
|
518 |
+
</div>
|
519 |
+
</>
|
520 |
+
)}
|
521 |
+
|
522 |
+
{mode === "File" && (
|
523 |
+
<>
|
524 |
<input
|
525 |
type="file"
|
526 |
accept="image/*,video/*"
|
527 |
onChange={handleFileChange}
|
528 |
+
className="block w-full text-sm text-gray-300 file:mr-4 file:py-2 file:px-4 file:rounded-lg file:border-0 file:text-sm file:font-semibold file:bg-blue-600 file:text-white hover:file:bg-blue-700 disabled:opacity-50"
|
529 |
+
disabled={processingState}
|
530 |
/>
|
531 |
+
{uploadedFile && isImageFile(uploadedFile) && (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
532 |
<button
|
533 |
+
className="mt-2 px-6 py-2 rounded-lg bg-blue-600 text-white font-semibold hover:bg-blue-700 disabled:opacity-50"
|
534 |
onClick={handleProcessImage}
|
535 |
+
disabled={processingState || !isLoaded}
|
536 |
>
|
537 |
+
{processingState ? "Processing Image..." : "Process Image"}
|
538 |
</button>
|
539 |
+
)}
|
540 |
+
</>
|
541 |
+
)}
|
542 |
+
</div>
|
543 |
+
|
544 |
+
{/* Error and Debug Output */}
|
545 |
+
{error && <div className="text-red-400 mt-2 text-center">{error}</div>}
|
546 |
+
<div className="mt-4 p-2 bg-gray-800 rounded text-xs w-full max-w-xl">
|
547 |
+
<div>Raw Model Output:</div>
|
548 |
+
<pre className="overflow-x-auto max-h-32 whitespace-pre-wrap">{debugOutput}</pre>
|
549 |
+
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
550 |
</div>
|
551 |
</div>
|
552 |
+
|
553 |
+
{/* Hidden Video for VLM processing - this must be rendered always */}
|
554 |
+
<video
|
555 |
+
ref={vlmVideoRef}
|
556 |
+
autoPlay
|
557 |
+
muted
|
558 |
+
playsInline
|
559 |
+
loop // Loop for URL and File videos
|
560 |
+
style={{ display: 'none' }} // Hidden from view
|
561 |
+
/>
|
562 |
</div>
|
563 |
);
|
564 |
+
}
|
src/index.js
CHANGED
@@ -1,17 +1,17 @@
|
|
1 |
-
import React from 'react';
|
2 |
-
import ReactDOM from 'react-dom/client';
|
3 |
-
import './index.css';
|
4 |
-
import App from './App';
|
5 |
-
import reportWebVitals from './reportWebVitals';
|
6 |
-
|
7 |
-
const root = ReactDOM.createRoot(document.getElementById('root'));
|
8 |
-
root.render(
|
9 |
-
<React.StrictMode>
|
10 |
-
<App />
|
11 |
-
</React.StrictMode>
|
12 |
-
);
|
13 |
-
|
14 |
-
// If you want to start measuring performance in your app, pass a function
|
15 |
-
// to log results (for example: reportWebVitals(console.log))
|
16 |
-
// or send to an analytics endpoint. Learn more: https://bit.ly/CRA-vitals
|
17 |
-
reportWebVitals();
|
|
|
1 |
+
import React from 'react';
|
2 |
+
import ReactDOM from 'react-dom/client';
|
3 |
+
import './index.css';
|
4 |
+
import App from './App';
|
5 |
+
import reportWebVitals from './reportWebVitals';
|
6 |
+
|
7 |
+
const root = ReactDOM.createRoot(document.getElementById('root'));
|
8 |
+
root.render(
|
9 |
+
<React.StrictMode>
|
10 |
+
<App />
|
11 |
+
</React.StrictMode>
|
12 |
+
);
|
13 |
+
|
14 |
+
// If you want to start measuring performance in your app, pass a function
|
15 |
+
// to log results (for example: reportWebVitals(console.log))
|
16 |
+
// or send to an analytics endpoint. Learn more: https://bit.ly/CRA-vitals
|
17 |
+
reportWebVitals();
|
src/reportWebVitals.js
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
-
const reportWebVitals = onPerfEntry => {
|
2 |
-
if (onPerfEntry && onPerfEntry instanceof Function) {
|
3 |
-
import('web-vitals').then(({ getCLS, getFID, getFCP, getLCP, getTTFB }) => {
|
4 |
-
getCLS(onPerfEntry);
|
5 |
-
getFID(onPerfEntry);
|
6 |
-
getFCP(onPerfEntry);
|
7 |
-
getLCP(onPerfEntry);
|
8 |
-
getTTFB(onPerfEntry);
|
9 |
-
});
|
10 |
-
}
|
11 |
-
};
|
12 |
-
|
13 |
-
export default reportWebVitals;
|
|
|
1 |
+
const reportWebVitals = onPerfEntry => {
|
2 |
+
if (onPerfEntry && onPerfEntry instanceof Function) {
|
3 |
+
import('web-vitals').then(({ getCLS, getFID, getFCP, getLCP, getTTFB }) => {
|
4 |
+
getCLS(onPerfEntry);
|
5 |
+
getFID(onPerfEntry);
|
6 |
+
getFCP(onPerfEntry);
|
7 |
+
getLCP(onPerfEntry);
|
8 |
+
getTTFB(onPerfEntry);
|
9 |
+
});
|
10 |
+
}
|
11 |
+
};
|
12 |
+
|
13 |
+
export default reportWebVitals;
|
src/setupTests.js
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
-
// jest-dom adds custom jest matchers for asserting on DOM nodes.
|
2 |
-
// allows you to do things like:
|
3 |
-
// expect(element).toHaveTextContent(/react/i)
|
4 |
-
// learn more: https://github.com/testing-library/jest-dom
|
5 |
-
import '@testing-library/jest-dom';
|
|
|
1 |
+
// jest-dom adds custom jest matchers for asserting on DOM nodes.
|
2 |
+
// allows you to do things like:
|
3 |
+
// expect(element).toHaveTextContent(/react/i)
|
4 |
+
// learn more: https://github.com/testing-library/jest-dom
|
5 |
+
import '@testing-library/jest-dom';
|