Quazim0t0 commited on
Commit
d08dbeb
·
verified ·
1 Parent(s): 5e2eff9

Upload 51 files

Browse files
src/components/MultiSourceCaptioningView.tsx CHANGED
@@ -2,11 +2,11 @@ import { useState, useRef, useEffect } from "react";
2
  import { useVLMContext } from "../context/useVLMContext";
3
  import { drawBoundingBoxesOnCanvas } from "./BoxAnnotator";
4
 
5
- const MODES = ["Webcam", "URL", "File"] as const;
6
  type Mode = typeof MODES[number];
7
 
8
  const EXAMPLE_VIDEO_URL = "/space/videos/1.mp4";
9
- const EXAMPLE_PROMPT = "Detect all birds in the image. For each bird, output a JSON array of objects with fields: 'label' (string) and 'bbox_2d' ([x1, y1, x2, y2]) where coordinates are in pixel values. Example: [{\"label\": \"bird\", \"bbox_2d\": [x1, y1, x2, y2]}]";
10
 
11
  function isImageFile(file: File) {
12
  return file.type.startsWith("image/");
@@ -435,84 +435,6 @@ export default function MultiSourceCaptioningView() {
435
  {error && <div className="text-red-400 mt-2">Error: {error}</div>}
436
  </div>
437
  )}
438
- {mode === "URL" && (
439
- <div className="w-full text-center flex flex-col items-center">
440
- <p className="mb-4">Enter a video stream URL (e.g., HTTP MP4, MJPEG, HLS, etc.):</p>
441
- <div className="flex w-full max-w-xl mb-4">
442
- <input
443
- type="text"
444
- className="flex-1 px-4 py-2 rounded-l-lg text-black"
445
- value={inputUrl}
446
- onChange={(e) => setInputUrl(e.target.value)}
447
- placeholder="Paste video URL here"
448
- />
449
- <button
450
- className="px-4 py-2 rounded-r-lg bg-blue-600 text-white font-semibold"
451
- onClick={() => setVideoUrl(inputUrl)}
452
- >
453
- Load
454
- </button>
455
- </div>
456
- <div className="mb-4 w-full max-w-xl">
457
- <label className="block text-left mb-2 font-medium">Detection Prompt:</label>
458
- <textarea
459
- className="w-full p-2 rounded-lg text-black"
460
- rows={3}
461
- value={prompt}
462
- onChange={(e) => setPrompt(e.target.value)}
463
- />
464
- </div>
465
- <div className="relative w-full max-w-xl">
466
- {/* Visible overlay video for user */}
467
- <video
468
- ref={overlayVideoRef}
469
- src={videoUrl}
470
- controls
471
- autoPlay
472
- loop
473
- muted
474
- playsInline
475
- className="w-full rounded-lg shadow-lg mb-2"
476
- style={{ background: "#222" }}
477
- />
478
- {/* Hidden processing video for FastVLM/canvas */}
479
- <video
480
- ref={processingVideoRef}
481
- src={videoUrl}
482
- autoPlay
483
- loop
484
- muted
485
- playsInline
486
- style={{ display: "none" }}
487
- onLoadedData={e => { e.currentTarget.play().catch(() => {}); }}
488
- />
489
- <canvas
490
- ref={canvasRef}
491
- className="absolute top-0 left-0 w-full h-full pointer-events-none"
492
- style={{ zIndex: 20, pointerEvents: "none" }}
493
- />
494
- <button
495
- className="mt-4 px-6 py-2 rounded-lg bg-blue-600 text-white font-semibold"
496
- onClick={handleToggleUrlProcessing}
497
- >
498
- {urlProcessing ? "Stop Processing" : "Start Processing"}
499
- </button>
500
- </div>
501
- {processing && <div className="text-blue-400 mt-2">Processing frame...</div>}
502
- {error && <div className="text-red-400 mt-2">Error: {error}</div>}
503
- <button
504
- className="mt-4 px-6 py-2 rounded-lg bg-gray-600 text-white font-semibold"
505
- onClick={handleTestDrawBox}
506
- >
507
- Test Draw Box
508
- </button>
509
- <div className="mt-2 p-2 bg-gray-800 rounded text-xs">
510
- <div>Canvas: {canvasDims ? `${canvasDims.w}x${canvasDims.h}` : "-"} | Video: {videoDims ? `${videoDims.w}x${videoDims.h}` : "-"}</div>
511
- <div>Raw Model Output:</div>
512
- <pre className="overflow-x-auto max-h-32 whitespace-pre-wrap">{debugOutput}</pre>
513
- </div>
514
- </div>
515
- )}
516
  {mode === "File" && (
517
  <div className="w-full text-center flex flex-col items-center">
518
  <div className="mb-4 w-full max-w-xl">
 
2
  import { useVLMContext } from "../context/useVLMContext";
3
  import { drawBoundingBoxesOnCanvas } from "./BoxAnnotator";
4
 
5
+ const MODES = ["Webcam", "File"] as const;
6
  type Mode = typeof MODES[number];
7
 
8
  const EXAMPLE_VIDEO_URL = "/space/videos/1.mp4";
9
+ const EXAMPLE_PROMPT = "Detect each individual bird in the image. The birds are moving. For each object, output a JSON array of objects with fields. Each bird should have its own ([x1, y1, x2, y2]) where coordinates are in pixel values. This should be used to draw a box using the points around the bird. Follow the format of this Example: [x1, y1, x2, y2], [x1, y1, x2, y2]";
10
 
11
  function isImageFile(file: File) {
12
  return file.type.startsWith("image/");
 
435
  {error && <div className="text-red-400 mt-2">Error: {error}</div>}
436
  </div>
437
  )}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
438
  {mode === "File" && (
439
  <div className="w-full text-center flex flex-col items-center">
440
  <div className="mb-4 w-full max-w-xl">