Spaces:
Running
Running
Upload 36 files
Browse files
src/components/MultiSourceCaptioningView.tsx
CHANGED
@@ -33,12 +33,13 @@ export default function MultiSourceCaptioningView() {
|
|
33 |
const [debugOutput, setDebugOutput] = useState<string>("");
|
34 |
const [canvasDims, setCanvasDims] = useState<{w:number,h:number}|null>(null);
|
35 |
const [videoDims, setVideoDims] = useState<{w:number,h:number}|null>(null);
|
|
|
36 |
|
37 |
const videoRef = useRef<HTMLVideoElement | null>(null);
|
38 |
const canvasRef = useRef<HTMLCanvasElement | null>(null);
|
39 |
const imageRef = useRef<HTMLImageElement | null>(null);
|
40 |
const webcamStreamRef = useRef<MediaStream | null>(null);
|
41 |
-
const { isLoaded, runInference } = useVLMContext();
|
42 |
|
43 |
// Webcam setup and teardown (unchanged)
|
44 |
useEffect(() => {
|
@@ -91,6 +92,7 @@ export default function MultiSourceCaptioningView() {
|
|
91 |
try {
|
92 |
setProcessing(true);
|
93 |
setError(null);
|
|
|
94 |
const fakeVideo = {
|
95 |
videoWidth: canvas.width,
|
96 |
videoHeight: canvas.height,
|
@@ -98,11 +100,14 @@ export default function MultiSourceCaptioningView() {
|
|
98 |
} as unknown as HTMLVideoElement;
|
99 |
const result = await runInference(fakeVideo, prompt);
|
100 |
setDebugOutput(result);
|
|
|
101 |
ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
|
102 |
const boxes = extractJsonFromMarkdown(result) || [];
|
|
|
103 |
drawBoundingBoxesOnCanvas(ctx, boxes);
|
104 |
} catch (e) {
|
105 |
setError(e instanceof Error ? e.message : String(e));
|
|
|
106 |
} finally {
|
107 |
setProcessing(false);
|
108 |
}
|
@@ -132,6 +137,7 @@ export default function MultiSourceCaptioningView() {
|
|
132 |
try {
|
133 |
setProcessing(true);
|
134 |
setError(null);
|
|
|
135 |
const fakeVideo = {
|
136 |
videoWidth: canvas.width,
|
137 |
videoHeight: canvas.height,
|
@@ -139,11 +145,14 @@ export default function MultiSourceCaptioningView() {
|
|
139 |
} as unknown as HTMLVideoElement;
|
140 |
const result = await runInference(fakeVideo, prompt);
|
141 |
setDebugOutput(result);
|
|
|
142 |
ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
|
143 |
const boxes = extractJsonFromMarkdown(result) || [];
|
|
|
144 |
drawBoundingBoxesOnCanvas(ctx, boxes);
|
145 |
} catch (e) {
|
146 |
setError(e instanceof Error ? e.message : String(e));
|
|
|
147 |
} finally {
|
148 |
setProcessing(false);
|
149 |
}
|
@@ -171,6 +180,7 @@ export default function MultiSourceCaptioningView() {
|
|
171 |
try {
|
172 |
setProcessing(true);
|
173 |
setError(null);
|
|
|
174 |
const fakeVideo = {
|
175 |
videoWidth: canvas.width,
|
176 |
videoHeight: canvas.height,
|
@@ -178,12 +188,15 @@ export default function MultiSourceCaptioningView() {
|
|
178 |
} as unknown as HTMLVideoElement;
|
179 |
const result = await runInference(fakeVideo, prompt);
|
180 |
setDebugOutput(result);
|
|
|
181 |
ctx.drawImage(img, 0, 0, canvas.width, canvas.height);
|
182 |
const boxes = extractJsonFromMarkdown(result) || [];
|
|
|
183 |
drawBoundingBoxesOnCanvas(ctx, boxes);
|
184 |
setImageProcessed(true);
|
185 |
} catch (e) {
|
186 |
setError(e instanceof Error ? e.message : String(e));
|
|
|
187 |
} finally {
|
188 |
setProcessing(false);
|
189 |
}
|
@@ -206,6 +219,7 @@ export default function MultiSourceCaptioningView() {
|
|
206 |
try {
|
207 |
setProcessing(true);
|
208 |
setError(null);
|
|
|
209 |
const fakeVideo = {
|
210 |
videoWidth: canvas.width,
|
211 |
videoHeight: canvas.height,
|
@@ -213,11 +227,14 @@ export default function MultiSourceCaptioningView() {
|
|
213 |
} as unknown as HTMLVideoElement;
|
214 |
const result = await runInference(fakeVideo, prompt);
|
215 |
setDebugOutput(result);
|
|
|
216 |
ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
|
217 |
const boxes = extractJsonFromMarkdown(result) || [];
|
|
|
218 |
drawBoundingBoxesOnCanvas(ctx, boxes);
|
219 |
} catch (e) {
|
220 |
setError(e instanceof Error ? e.message : String(e));
|
|
|
221 |
} finally {
|
222 |
setProcessing(false);
|
223 |
}
|
@@ -247,6 +264,7 @@ export default function MultiSourceCaptioningView() {
|
|
247 |
try {
|
248 |
setProcessing(true);
|
249 |
setError(null);
|
|
|
250 |
const fakeVideo = {
|
251 |
videoWidth: canvas.width,
|
252 |
videoHeight: canvas.height,
|
@@ -254,11 +272,14 @@ export default function MultiSourceCaptioningView() {
|
|
254 |
} as unknown as HTMLVideoElement;
|
255 |
const result = await runInference(fakeVideo, prompt);
|
256 |
setDebugOutput(result);
|
|
|
257 |
ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
|
258 |
const boxes = extractJsonFromMarkdown(result) || [];
|
|
|
259 |
drawBoundingBoxesOnCanvas(ctx, boxes);
|
260 |
} catch (e) {
|
261 |
setError(e instanceof Error ? e.message : String(e));
|
|
|
262 |
} finally {
|
263 |
setProcessing(false);
|
264 |
}
|
@@ -314,6 +335,10 @@ export default function MultiSourceCaptioningView() {
|
|
314 |
|
315 |
return (
|
316 |
<div className="absolute inset-0 text-white">
|
|
|
|
|
|
|
|
|
317 |
<div className="flex flex-col items-center justify-center h-full w-full">
|
318 |
{/* Mode Selector */}
|
319 |
<div className="mb-6">
|
|
|
33 |
const [debugOutput, setDebugOutput] = useState<string>("");
|
34 |
const [canvasDims, setCanvasDims] = useState<{w:number,h:number}|null>(null);
|
35 |
const [videoDims, setVideoDims] = useState<{w:number,h:number}|null>(null);
|
36 |
+
const [inferenceStatus, setInferenceStatus] = useState<string>("");
|
37 |
|
38 |
const videoRef = useRef<HTMLVideoElement | null>(null);
|
39 |
const canvasRef = useRef<HTMLCanvasElement | null>(null);
|
40 |
const imageRef = useRef<HTMLImageElement | null>(null);
|
41 |
const webcamStreamRef = useRef<MediaStream | null>(null);
|
42 |
+
const { isLoaded, isLoading, error: modelError, runInference } = useVLMContext();
|
43 |
|
44 |
// Webcam setup and teardown (unchanged)
|
45 |
useEffect(() => {
|
|
|
92 |
try {
|
93 |
setProcessing(true);
|
94 |
setError(null);
|
95 |
+
setInferenceStatus("Running inference...");
|
96 |
const fakeVideo = {
|
97 |
videoWidth: canvas.width,
|
98 |
videoHeight: canvas.height,
|
|
|
100 |
} as unknown as HTMLVideoElement;
|
101 |
const result = await runInference(fakeVideo, prompt);
|
102 |
setDebugOutput(result);
|
103 |
+
setInferenceStatus("Inference complete.");
|
104 |
ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
|
105 |
const boxes = extractJsonFromMarkdown(result) || [];
|
106 |
+
if (boxes.length === 0) setInferenceStatus("No boxes detected or model output invalid.");
|
107 |
drawBoundingBoxesOnCanvas(ctx, boxes);
|
108 |
} catch (e) {
|
109 |
setError(e instanceof Error ? e.message : String(e));
|
110 |
+
setInferenceStatus("Inference error: " + (e instanceof Error ? e.message : String(e)));
|
111 |
} finally {
|
112 |
setProcessing(false);
|
113 |
}
|
|
|
137 |
try {
|
138 |
setProcessing(true);
|
139 |
setError(null);
|
140 |
+
setInferenceStatus("Running inference...");
|
141 |
const fakeVideo = {
|
142 |
videoWidth: canvas.width,
|
143 |
videoHeight: canvas.height,
|
|
|
145 |
} as unknown as HTMLVideoElement;
|
146 |
const result = await runInference(fakeVideo, prompt);
|
147 |
setDebugOutput(result);
|
148 |
+
setInferenceStatus("Inference complete.");
|
149 |
ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
|
150 |
const boxes = extractJsonFromMarkdown(result) || [];
|
151 |
+
if (boxes.length === 0) setInferenceStatus("No boxes detected or model output invalid.");
|
152 |
drawBoundingBoxesOnCanvas(ctx, boxes);
|
153 |
} catch (e) {
|
154 |
setError(e instanceof Error ? e.message : String(e));
|
155 |
+
setInferenceStatus("Inference error: " + (e instanceof Error ? e.message : String(e)));
|
156 |
} finally {
|
157 |
setProcessing(false);
|
158 |
}
|
|
|
180 |
try {
|
181 |
setProcessing(true);
|
182 |
setError(null);
|
183 |
+
setInferenceStatus("Running inference...");
|
184 |
const fakeVideo = {
|
185 |
videoWidth: canvas.width,
|
186 |
videoHeight: canvas.height,
|
|
|
188 |
} as unknown as HTMLVideoElement;
|
189 |
const result = await runInference(fakeVideo, prompt);
|
190 |
setDebugOutput(result);
|
191 |
+
setInferenceStatus("Inference complete.");
|
192 |
ctx.drawImage(img, 0, 0, canvas.width, canvas.height);
|
193 |
const boxes = extractJsonFromMarkdown(result) || [];
|
194 |
+
if (boxes.length === 0) setInferenceStatus("No boxes detected or model output invalid.");
|
195 |
drawBoundingBoxesOnCanvas(ctx, boxes);
|
196 |
setImageProcessed(true);
|
197 |
} catch (e) {
|
198 |
setError(e instanceof Error ? e.message : String(e));
|
199 |
+
setInferenceStatus("Inference error: " + (e instanceof Error ? e.message : String(e)));
|
200 |
} finally {
|
201 |
setProcessing(false);
|
202 |
}
|
|
|
219 |
try {
|
220 |
setProcessing(true);
|
221 |
setError(null);
|
222 |
+
setInferenceStatus("Running inference...");
|
223 |
const fakeVideo = {
|
224 |
videoWidth: canvas.width,
|
225 |
videoHeight: canvas.height,
|
|
|
227 |
} as unknown as HTMLVideoElement;
|
228 |
const result = await runInference(fakeVideo, prompt);
|
229 |
setDebugOutput(result);
|
230 |
+
setInferenceStatus("Inference complete.");
|
231 |
ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
|
232 |
const boxes = extractJsonFromMarkdown(result) || [];
|
233 |
+
if (boxes.length === 0) setInferenceStatus("No boxes detected or model output invalid.");
|
234 |
drawBoundingBoxesOnCanvas(ctx, boxes);
|
235 |
} catch (e) {
|
236 |
setError(e instanceof Error ? e.message : String(e));
|
237 |
+
setInferenceStatus("Inference error: " + (e instanceof Error ? e.message : String(e)));
|
238 |
} finally {
|
239 |
setProcessing(false);
|
240 |
}
|
|
|
264 |
try {
|
265 |
setProcessing(true);
|
266 |
setError(null);
|
267 |
+
setInferenceStatus("Running inference...");
|
268 |
const fakeVideo = {
|
269 |
videoWidth: canvas.width,
|
270 |
videoHeight: canvas.height,
|
|
|
272 |
} as unknown as HTMLVideoElement;
|
273 |
const result = await runInference(fakeVideo, prompt);
|
274 |
setDebugOutput(result);
|
275 |
+
setInferenceStatus("Inference complete.");
|
276 |
ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
|
277 |
const boxes = extractJsonFromMarkdown(result) || [];
|
278 |
+
if (boxes.length === 0) setInferenceStatus("No boxes detected or model output invalid.");
|
279 |
drawBoundingBoxesOnCanvas(ctx, boxes);
|
280 |
} catch (e) {
|
281 |
setError(e instanceof Error ? e.message : String(e));
|
282 |
+
setInferenceStatus("Inference error: " + (e instanceof Error ? e.message : String(e)));
|
283 |
} finally {
|
284 |
setProcessing(false);
|
285 |
}
|
|
|
335 |
|
336 |
return (
|
337 |
<div className="absolute inset-0 text-white">
|
338 |
+
<div className="fixed top-0 left-0 w-full bg-gray-900 text-white text-center py-2 z-50">
|
339 |
+
{isLoading ? "Loading model..." : isLoaded ? "Model loaded" : modelError ? `Model error: ${modelError}` : "Model not loaded"}
|
340 |
+
</div>
|
341 |
+
<div className="text-center text-sm text-blue-300 mt-2">{inferenceStatus}</div>
|
342 |
<div className="flex flex-col items-center justify-center h-full w-full">
|
343 |
{/* Mode Selector */}
|
344 |
<div className="mb-6">
|