Quazim0t0 commited on
Commit
c5dbb8d
·
verified ·
1 Parent(s): 26adbba

Upload 36 files

Browse files
src/components/MultiSourceCaptioningView.tsx CHANGED
@@ -33,12 +33,13 @@ export default function MultiSourceCaptioningView() {
33
  const [debugOutput, setDebugOutput] = useState<string>("");
34
  const [canvasDims, setCanvasDims] = useState<{w:number,h:number}|null>(null);
35
  const [videoDims, setVideoDims] = useState<{w:number,h:number}|null>(null);
 
36
 
37
  const videoRef = useRef<HTMLVideoElement | null>(null);
38
  const canvasRef = useRef<HTMLCanvasElement | null>(null);
39
  const imageRef = useRef<HTMLImageElement | null>(null);
40
  const webcamStreamRef = useRef<MediaStream | null>(null);
41
- const { isLoaded, runInference } = useVLMContext();
42
 
43
  // Webcam setup and teardown (unchanged)
44
  useEffect(() => {
@@ -91,6 +92,7 @@ export default function MultiSourceCaptioningView() {
91
  try {
92
  setProcessing(true);
93
  setError(null);
 
94
  const fakeVideo = {
95
  videoWidth: canvas.width,
96
  videoHeight: canvas.height,
@@ -98,11 +100,14 @@ export default function MultiSourceCaptioningView() {
98
  } as unknown as HTMLVideoElement;
99
  const result = await runInference(fakeVideo, prompt);
100
  setDebugOutput(result);
 
101
  ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
102
  const boxes = extractJsonFromMarkdown(result) || [];
 
103
  drawBoundingBoxesOnCanvas(ctx, boxes);
104
  } catch (e) {
105
  setError(e instanceof Error ? e.message : String(e));
 
106
  } finally {
107
  setProcessing(false);
108
  }
@@ -132,6 +137,7 @@ export default function MultiSourceCaptioningView() {
132
  try {
133
  setProcessing(true);
134
  setError(null);
 
135
  const fakeVideo = {
136
  videoWidth: canvas.width,
137
  videoHeight: canvas.height,
@@ -139,11 +145,14 @@ export default function MultiSourceCaptioningView() {
139
  } as unknown as HTMLVideoElement;
140
  const result = await runInference(fakeVideo, prompt);
141
  setDebugOutput(result);
 
142
  ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
143
  const boxes = extractJsonFromMarkdown(result) || [];
 
144
  drawBoundingBoxesOnCanvas(ctx, boxes);
145
  } catch (e) {
146
  setError(e instanceof Error ? e.message : String(e));
 
147
  } finally {
148
  setProcessing(false);
149
  }
@@ -171,6 +180,7 @@ export default function MultiSourceCaptioningView() {
171
  try {
172
  setProcessing(true);
173
  setError(null);
 
174
  const fakeVideo = {
175
  videoWidth: canvas.width,
176
  videoHeight: canvas.height,
@@ -178,12 +188,15 @@ export default function MultiSourceCaptioningView() {
178
  } as unknown as HTMLVideoElement;
179
  const result = await runInference(fakeVideo, prompt);
180
  setDebugOutput(result);
 
181
  ctx.drawImage(img, 0, 0, canvas.width, canvas.height);
182
  const boxes = extractJsonFromMarkdown(result) || [];
 
183
  drawBoundingBoxesOnCanvas(ctx, boxes);
184
  setImageProcessed(true);
185
  } catch (e) {
186
  setError(e instanceof Error ? e.message : String(e));
 
187
  } finally {
188
  setProcessing(false);
189
  }
@@ -206,6 +219,7 @@ export default function MultiSourceCaptioningView() {
206
  try {
207
  setProcessing(true);
208
  setError(null);
 
209
  const fakeVideo = {
210
  videoWidth: canvas.width,
211
  videoHeight: canvas.height,
@@ -213,11 +227,14 @@ export default function MultiSourceCaptioningView() {
213
  } as unknown as HTMLVideoElement;
214
  const result = await runInference(fakeVideo, prompt);
215
  setDebugOutput(result);
 
216
  ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
217
  const boxes = extractJsonFromMarkdown(result) || [];
 
218
  drawBoundingBoxesOnCanvas(ctx, boxes);
219
  } catch (e) {
220
  setError(e instanceof Error ? e.message : String(e));
 
221
  } finally {
222
  setProcessing(false);
223
  }
@@ -247,6 +264,7 @@ export default function MultiSourceCaptioningView() {
247
  try {
248
  setProcessing(true);
249
  setError(null);
 
250
  const fakeVideo = {
251
  videoWidth: canvas.width,
252
  videoHeight: canvas.height,
@@ -254,11 +272,14 @@ export default function MultiSourceCaptioningView() {
254
  } as unknown as HTMLVideoElement;
255
  const result = await runInference(fakeVideo, prompt);
256
  setDebugOutput(result);
 
257
  ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
258
  const boxes = extractJsonFromMarkdown(result) || [];
 
259
  drawBoundingBoxesOnCanvas(ctx, boxes);
260
  } catch (e) {
261
  setError(e instanceof Error ? e.message : String(e));
 
262
  } finally {
263
  setProcessing(false);
264
  }
@@ -314,6 +335,10 @@ export default function MultiSourceCaptioningView() {
314
 
315
  return (
316
  <div className="absolute inset-0 text-white">
 
 
 
 
317
  <div className="flex flex-col items-center justify-center h-full w-full">
318
  {/* Mode Selector */}
319
  <div className="mb-6">
 
33
  const [debugOutput, setDebugOutput] = useState<string>("");
34
  const [canvasDims, setCanvasDims] = useState<{w:number,h:number}|null>(null);
35
  const [videoDims, setVideoDims] = useState<{w:number,h:number}|null>(null);
36
+ const [inferenceStatus, setInferenceStatus] = useState<string>("");
37
 
38
  const videoRef = useRef<HTMLVideoElement | null>(null);
39
  const canvasRef = useRef<HTMLCanvasElement | null>(null);
40
  const imageRef = useRef<HTMLImageElement | null>(null);
41
  const webcamStreamRef = useRef<MediaStream | null>(null);
42
+ const { isLoaded, isLoading, error: modelError, runInference } = useVLMContext();
43
 
44
  // Webcam setup and teardown (unchanged)
45
  useEffect(() => {
 
92
  try {
93
  setProcessing(true);
94
  setError(null);
95
+ setInferenceStatus("Running inference...");
96
  const fakeVideo = {
97
  videoWidth: canvas.width,
98
  videoHeight: canvas.height,
 
100
  } as unknown as HTMLVideoElement;
101
  const result = await runInference(fakeVideo, prompt);
102
  setDebugOutput(result);
103
+ setInferenceStatus("Inference complete.");
104
  ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
105
  const boxes = extractJsonFromMarkdown(result) || [];
106
+ if (boxes.length === 0) setInferenceStatus("No boxes detected or model output invalid.");
107
  drawBoundingBoxesOnCanvas(ctx, boxes);
108
  } catch (e) {
109
  setError(e instanceof Error ? e.message : String(e));
110
+ setInferenceStatus("Inference error: " + (e instanceof Error ? e.message : String(e)));
111
  } finally {
112
  setProcessing(false);
113
  }
 
137
  try {
138
  setProcessing(true);
139
  setError(null);
140
+ setInferenceStatus("Running inference...");
141
  const fakeVideo = {
142
  videoWidth: canvas.width,
143
  videoHeight: canvas.height,
 
145
  } as unknown as HTMLVideoElement;
146
  const result = await runInference(fakeVideo, prompt);
147
  setDebugOutput(result);
148
+ setInferenceStatus("Inference complete.");
149
  ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
150
  const boxes = extractJsonFromMarkdown(result) || [];
151
+ if (boxes.length === 0) setInferenceStatus("No boxes detected or model output invalid.");
152
  drawBoundingBoxesOnCanvas(ctx, boxes);
153
  } catch (e) {
154
  setError(e instanceof Error ? e.message : String(e));
155
+ setInferenceStatus("Inference error: " + (e instanceof Error ? e.message : String(e)));
156
  } finally {
157
  setProcessing(false);
158
  }
 
180
  try {
181
  setProcessing(true);
182
  setError(null);
183
+ setInferenceStatus("Running inference...");
184
  const fakeVideo = {
185
  videoWidth: canvas.width,
186
  videoHeight: canvas.height,
 
188
  } as unknown as HTMLVideoElement;
189
  const result = await runInference(fakeVideo, prompt);
190
  setDebugOutput(result);
191
+ setInferenceStatus("Inference complete.");
192
  ctx.drawImage(img, 0, 0, canvas.width, canvas.height);
193
  const boxes = extractJsonFromMarkdown(result) || [];
194
+ if (boxes.length === 0) setInferenceStatus("No boxes detected or model output invalid.");
195
  drawBoundingBoxesOnCanvas(ctx, boxes);
196
  setImageProcessed(true);
197
  } catch (e) {
198
  setError(e instanceof Error ? e.message : String(e));
199
+ setInferenceStatus("Inference error: " + (e instanceof Error ? e.message : String(e)));
200
  } finally {
201
  setProcessing(false);
202
  }
 
219
  try {
220
  setProcessing(true);
221
  setError(null);
222
+ setInferenceStatus("Running inference...");
223
  const fakeVideo = {
224
  videoWidth: canvas.width,
225
  videoHeight: canvas.height,
 
227
  } as unknown as HTMLVideoElement;
228
  const result = await runInference(fakeVideo, prompt);
229
  setDebugOutput(result);
230
+ setInferenceStatus("Inference complete.");
231
  ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
232
  const boxes = extractJsonFromMarkdown(result) || [];
233
+ if (boxes.length === 0) setInferenceStatus("No boxes detected or model output invalid.");
234
  drawBoundingBoxesOnCanvas(ctx, boxes);
235
  } catch (e) {
236
  setError(e instanceof Error ? e.message : String(e));
237
+ setInferenceStatus("Inference error: " + (e instanceof Error ? e.message : String(e)));
238
  } finally {
239
  setProcessing(false);
240
  }
 
264
  try {
265
  setProcessing(true);
266
  setError(null);
267
+ setInferenceStatus("Running inference...");
268
  const fakeVideo = {
269
  videoWidth: canvas.width,
270
  videoHeight: canvas.height,
 
272
  } as unknown as HTMLVideoElement;
273
  const result = await runInference(fakeVideo, prompt);
274
  setDebugOutput(result);
275
+ setInferenceStatus("Inference complete.");
276
  ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
277
  const boxes = extractJsonFromMarkdown(result) || [];
278
+ if (boxes.length === 0) setInferenceStatus("No boxes detected or model output invalid.");
279
  drawBoundingBoxesOnCanvas(ctx, boxes);
280
  } catch (e) {
281
  setError(e instanceof Error ? e.message : String(e));
282
+ setInferenceStatus("Inference error: " + (e instanceof Error ? e.message : String(e)));
283
  } finally {
284
  setProcessing(false);
285
  }
 
335
 
336
  return (
337
  <div className="absolute inset-0 text-white">
338
+ <div className="fixed top-0 left-0 w-full bg-gray-900 text-white text-center py-2 z-50">
339
+ {isLoading ? "Loading model..." : isLoaded ? "Model loaded" : modelError ? `Model error: ${modelError}` : "Model not loaded"}
340
+ </div>
341
+ <div className="text-center text-sm text-blue-300 mt-2">{inferenceStatus}</div>
342
  <div className="flex flex-col items-center justify-center h-full w-full">
343
  {/* Mode Selector */}
344
  <div className="mb-6">