Spaces:
Running
Running
Upload 51 files
Browse files
src/components/MultiSourceCaptioningView.tsx
CHANGED
@@ -2,11 +2,11 @@ import { useState, useRef, useEffect } from "react";
|
|
2 |
import { useVLMContext } from "../context/useVLMContext";
|
3 |
import { drawBoundingBoxesOnCanvas } from "./BoxAnnotator";
|
4 |
|
5 |
-
const MODES = ["Webcam", "
|
6 |
type Mode = typeof MODES[number];
|
7 |
|
8 |
const EXAMPLE_VIDEO_URL = "/space/videos/1.mp4";
|
9 |
-
const EXAMPLE_PROMPT = "Detect
|
10 |
|
11 |
function isImageFile(file: File) {
|
12 |
return file.type.startsWith("image/");
|
@@ -435,84 +435,6 @@ export default function MultiSourceCaptioningView() {
|
|
435 |
{error && <div className="text-red-400 mt-2">Error: {error}</div>}
|
436 |
</div>
|
437 |
)}
|
438 |
-
{mode === "URL" && (
|
439 |
-
<div className="w-full text-center flex flex-col items-center">
|
440 |
-
<p className="mb-4">Enter a video stream URL (e.g., HTTP MP4, MJPEG, HLS, etc.):</p>
|
441 |
-
<div className="flex w-full max-w-xl mb-4">
|
442 |
-
<input
|
443 |
-
type="text"
|
444 |
-
className="flex-1 px-4 py-2 rounded-l-lg text-black"
|
445 |
-
value={inputUrl}
|
446 |
-
onChange={(e) => setInputUrl(e.target.value)}
|
447 |
-
placeholder="Paste video URL here"
|
448 |
-
/>
|
449 |
-
<button
|
450 |
-
className="px-4 py-2 rounded-r-lg bg-blue-600 text-white font-semibold"
|
451 |
-
onClick={() => setVideoUrl(inputUrl)}
|
452 |
-
>
|
453 |
-
Load
|
454 |
-
</button>
|
455 |
-
</div>
|
456 |
-
<div className="mb-4 w-full max-w-xl">
|
457 |
-
<label className="block text-left mb-2 font-medium">Detection Prompt:</label>
|
458 |
-
<textarea
|
459 |
-
className="w-full p-2 rounded-lg text-black"
|
460 |
-
rows={3}
|
461 |
-
value={prompt}
|
462 |
-
onChange={(e) => setPrompt(e.target.value)}
|
463 |
-
/>
|
464 |
-
</div>
|
465 |
-
<div className="relative w-full max-w-xl">
|
466 |
-
{/* Visible overlay video for user */}
|
467 |
-
<video
|
468 |
-
ref={overlayVideoRef}
|
469 |
-
src={videoUrl}
|
470 |
-
controls
|
471 |
-
autoPlay
|
472 |
-
loop
|
473 |
-
muted
|
474 |
-
playsInline
|
475 |
-
className="w-full rounded-lg shadow-lg mb-2"
|
476 |
-
style={{ background: "#222" }}
|
477 |
-
/>
|
478 |
-
{/* Hidden processing video for FastVLM/canvas */}
|
479 |
-
<video
|
480 |
-
ref={processingVideoRef}
|
481 |
-
src={videoUrl}
|
482 |
-
autoPlay
|
483 |
-
loop
|
484 |
-
muted
|
485 |
-
playsInline
|
486 |
-
style={{ display: "none" }}
|
487 |
-
onLoadedData={e => { e.currentTarget.play().catch(() => {}); }}
|
488 |
-
/>
|
489 |
-
<canvas
|
490 |
-
ref={canvasRef}
|
491 |
-
className="absolute top-0 left-0 w-full h-full pointer-events-none"
|
492 |
-
style={{ zIndex: 20, pointerEvents: "none" }}
|
493 |
-
/>
|
494 |
-
<button
|
495 |
-
className="mt-4 px-6 py-2 rounded-lg bg-blue-600 text-white font-semibold"
|
496 |
-
onClick={handleToggleUrlProcessing}
|
497 |
-
>
|
498 |
-
{urlProcessing ? "Stop Processing" : "Start Processing"}
|
499 |
-
</button>
|
500 |
-
</div>
|
501 |
-
{processing && <div className="text-blue-400 mt-2">Processing frame...</div>}
|
502 |
-
{error && <div className="text-red-400 mt-2">Error: {error}</div>}
|
503 |
-
<button
|
504 |
-
className="mt-4 px-6 py-2 rounded-lg bg-gray-600 text-white font-semibold"
|
505 |
-
onClick={handleTestDrawBox}
|
506 |
-
>
|
507 |
-
Test Draw Box
|
508 |
-
</button>
|
509 |
-
<div className="mt-2 p-2 bg-gray-800 rounded text-xs">
|
510 |
-
<div>Canvas: {canvasDims ? `${canvasDims.w}x${canvasDims.h}` : "-"} | Video: {videoDims ? `${videoDims.w}x${videoDims.h}` : "-"}</div>
|
511 |
-
<div>Raw Model Output:</div>
|
512 |
-
<pre className="overflow-x-auto max-h-32 whitespace-pre-wrap">{debugOutput}</pre>
|
513 |
-
</div>
|
514 |
-
</div>
|
515 |
-
)}
|
516 |
{mode === "File" && (
|
517 |
<div className="w-full text-center flex flex-col items-center">
|
518 |
<div className="mb-4 w-full max-w-xl">
|
|
|
2 |
import { useVLMContext } from "../context/useVLMContext";
|
3 |
import { drawBoundingBoxesOnCanvas } from "./BoxAnnotator";
|
4 |
|
5 |
+
const MODES = ["Webcam", "File"] as const;
|
6 |
type Mode = typeof MODES[number];
|
7 |
|
8 |
const EXAMPLE_VIDEO_URL = "/space/videos/1.mp4";
|
9 |
+
const EXAMPLE_PROMPT = "Detect each individual bird in the image. The birds are moving. For each object, output a JSON array of objects with fields. Each bird should have its own ([x1, y1, x2, y2]) where coordinates are in pixel values. This should be used to draw a box using the points around the bird. Follow the format of this Example: [x1, y1, x2, y2], [x1, y1, x2, y2]";
|
10 |
|
11 |
function isImageFile(file: File) {
|
12 |
return file.type.startsWith("image/");
|
|
|
435 |
{error && <div className="text-red-400 mt-2">Error: {error}</div>}
|
436 |
</div>
|
437 |
)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
438 |
{mode === "File" && (
|
439 |
<div className="w-full text-center flex flex-col items-center">
|
440 |
<div className="mb-4 w-full max-w-xl">
|