Spaces:

Quazim0t0
/

FastVLMBoxes

Running

Quazim0t0 commited on 1 day ago

Commit

af73025

verified ·

1 Parent(s): 988447b

Upload 36 files

Files changed (2) hide show

src/context/VLMContext.tsx CHANGED Viewed

@@ -67,7 +67,7 @@ export const VLMProvider: React.FC<React.PropsWithChildren> = ({ children }) =>
   );
   const runInference = useCallback(
-    async (video: HTMLVideoElement, instruction: string, onTextUpdate?: (text: string) => void): Promise<string> => {
       if (inferenceLock.current) {
         console.log("Inference already running, skipping frame");
         return ""; // Return empty string to signal a skip
@@ -83,13 +83,25 @@ export const VLMProvider: React.FC<React.PropsWithChildren> = ({ children }) =>
       }
       const canvas = canvasRef.current;
-      canvas.width = video.videoWidth;
-      canvas.height = video.videoHeight;
       const ctx = canvas.getContext("2d", { willReadFrequently: true });
       if (!ctx) throw new Error("Could not get canvas context");
-      ctx.drawImage(video, 0, 0);
       const frame = ctx.getImageData(0, 0, canvas.width, canvas.height);
       const rawImg = new RawImage(frame.data, frame.width, frame.height, 4);

   );
   const runInference = useCallback(
+    async (media: HTMLVideoElement | HTMLImageElement, instruction: string, onTextUpdate?: (text: string) => void): Promise<string> => {
       if (inferenceLock.current) {
         console.log("Inference already running, skipping frame");
         return ""; // Return empty string to signal a skip
       }
       const canvas = canvasRef.current;
+      // Support both video and image
+      let width = 0;
+      let height = 0;
+      if (media instanceof HTMLVideoElement) {
+        width = media.videoWidth;
+        height = media.videoHeight;
+      } else if (media instanceof HTMLImageElement) {
+        width = media.naturalWidth;
+        height = media.naturalHeight;
+      } else {
+        throw new Error("Unsupported media type");
+      }
+      canvas.width = width;
+      canvas.height = height;
       const ctx = canvas.getContext("2d", { willReadFrequently: true });
       if (!ctx) throw new Error("Could not get canvas context");
+      ctx.drawImage(media, 0, 0, width, height);
       const frame = ctx.getImageData(0, 0, canvas.width, canvas.height);
       const rawImg = new RawImage(frame.data, frame.width, frame.height, 4);

src/types/vlm.ts CHANGED Viewed

@@ -4,7 +4,7 @@ export type VLMContextValue = {
   error: string | null;
   loadModel: (onProgress?: (msg: string) => void) => Promise<void>;
   runInference: (
-    video: HTMLVideoElement,
     instruction: string,
     onTextUpdate?: (text: string) => void,
   ) => Promise<string>;

   error: string | null;
   loadModel: (onProgress?: (msg: string) => void) => Promise<void>;
   runInference: (
+    media: HTMLVideoElement | HTMLImageElement,
     instruction: string,
     onTextUpdate?: (text: string) => void,
   ) => Promise<string>;