Quazim0t0 commited on
Commit
af73025
·
verified ·
1 Parent(s): 988447b

Upload 36 files

Browse files
Files changed (2) hide show
  1. src/context/VLMContext.tsx +16 -4
  2. src/types/vlm.ts +1 -1
src/context/VLMContext.tsx CHANGED
@@ -67,7 +67,7 @@ export const VLMProvider: React.FC<React.PropsWithChildren> = ({ children }) =>
67
  );
68
 
69
  const runInference = useCallback(
70
- async (video: HTMLVideoElement, instruction: string, onTextUpdate?: (text: string) => void): Promise<string> => {
71
  if (inferenceLock.current) {
72
  console.log("Inference already running, skipping frame");
73
  return ""; // Return empty string to signal a skip
@@ -83,13 +83,25 @@ export const VLMProvider: React.FC<React.PropsWithChildren> = ({ children }) =>
83
  }
84
  const canvas = canvasRef.current;
85
 
86
- canvas.width = video.videoWidth;
87
- canvas.height = video.videoHeight;
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  const ctx = canvas.getContext("2d", { willReadFrequently: true });
90
  if (!ctx) throw new Error("Could not get canvas context");
91
 
92
- ctx.drawImage(video, 0, 0);
93
 
94
  const frame = ctx.getImageData(0, 0, canvas.width, canvas.height);
95
  const rawImg = new RawImage(frame.data, frame.width, frame.height, 4);
 
67
  );
68
 
69
  const runInference = useCallback(
70
+ async (media: HTMLVideoElement | HTMLImageElement, instruction: string, onTextUpdate?: (text: string) => void): Promise<string> => {
71
  if (inferenceLock.current) {
72
  console.log("Inference already running, skipping frame");
73
  return ""; // Return empty string to signal a skip
 
83
  }
84
  const canvas = canvasRef.current;
85
 
86
+ // Support both video and image
87
+ let width = 0;
88
+ let height = 0;
89
+ if (media instanceof HTMLVideoElement) {
90
+ width = media.videoWidth;
91
+ height = media.videoHeight;
92
+ } else if (media instanceof HTMLImageElement) {
93
+ width = media.naturalWidth;
94
+ height = media.naturalHeight;
95
+ } else {
96
+ throw new Error("Unsupported media type");
97
+ }
98
+ canvas.width = width;
99
+ canvas.height = height;
100
 
101
  const ctx = canvas.getContext("2d", { willReadFrequently: true });
102
  if (!ctx) throw new Error("Could not get canvas context");
103
 
104
+ ctx.drawImage(media, 0, 0, width, height);
105
 
106
  const frame = ctx.getImageData(0, 0, canvas.width, canvas.height);
107
  const rawImg = new RawImage(frame.data, frame.width, frame.height, 4);
src/types/vlm.ts CHANGED
@@ -4,7 +4,7 @@ export type VLMContextValue = {
4
  error: string | null;
5
  loadModel: (onProgress?: (msg: string) => void) => Promise<void>;
6
  runInference: (
7
- video: HTMLVideoElement,
8
  instruction: string,
9
  onTextUpdate?: (text: string) => void,
10
  ) => Promise<string>;
 
4
  error: string | null;
5
  loadModel: (onProgress?: (msg: string) => void) => Promise<void>;
6
  runInference: (
7
+ media: HTMLVideoElement | HTMLImageElement,
8
  instruction: string,
9
  onTextUpdate?: (text: string) => void,
10
  ) => Promise<string>;