Spaces:
Running
Running
Upload 36 files
Browse files- src/context/VLMContext.tsx +16 -4
- src/types/vlm.ts +1 -1
src/context/VLMContext.tsx
CHANGED
@@ -67,7 +67,7 @@ export const VLMProvider: React.FC<React.PropsWithChildren> = ({ children }) =>
|
|
67 |
);
|
68 |
|
69 |
const runInference = useCallback(
|
70 |
-
async (
|
71 |
if (inferenceLock.current) {
|
72 |
console.log("Inference already running, skipping frame");
|
73 |
return ""; // Return empty string to signal a skip
|
@@ -83,13 +83,25 @@ export const VLMProvider: React.FC<React.PropsWithChildren> = ({ children }) =>
|
|
83 |
}
|
84 |
const canvas = canvasRef.current;
|
85 |
|
86 |
-
|
87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
const ctx = canvas.getContext("2d", { willReadFrequently: true });
|
90 |
if (!ctx) throw new Error("Could not get canvas context");
|
91 |
|
92 |
-
ctx.drawImage(
|
93 |
|
94 |
const frame = ctx.getImageData(0, 0, canvas.width, canvas.height);
|
95 |
const rawImg = new RawImage(frame.data, frame.width, frame.height, 4);
|
|
|
67 |
);
|
68 |
|
69 |
const runInference = useCallback(
|
70 |
+
async (media: HTMLVideoElement | HTMLImageElement, instruction: string, onTextUpdate?: (text: string) => void): Promise<string> => {
|
71 |
if (inferenceLock.current) {
|
72 |
console.log("Inference already running, skipping frame");
|
73 |
return ""; // Return empty string to signal a skip
|
|
|
83 |
}
|
84 |
const canvas = canvasRef.current;
|
85 |
|
86 |
+
// Support both video and image
|
87 |
+
let width = 0;
|
88 |
+
let height = 0;
|
89 |
+
if (media instanceof HTMLVideoElement) {
|
90 |
+
width = media.videoWidth;
|
91 |
+
height = media.videoHeight;
|
92 |
+
} else if (media instanceof HTMLImageElement) {
|
93 |
+
width = media.naturalWidth;
|
94 |
+
height = media.naturalHeight;
|
95 |
+
} else {
|
96 |
+
throw new Error("Unsupported media type");
|
97 |
+
}
|
98 |
+
canvas.width = width;
|
99 |
+
canvas.height = height;
|
100 |
|
101 |
const ctx = canvas.getContext("2d", { willReadFrequently: true });
|
102 |
if (!ctx) throw new Error("Could not get canvas context");
|
103 |
|
104 |
+
ctx.drawImage(media, 0, 0, width, height);
|
105 |
|
106 |
const frame = ctx.getImageData(0, 0, canvas.width, canvas.height);
|
107 |
const rawImg = new RawImage(frame.data, frame.width, frame.height, 4);
|
src/types/vlm.ts
CHANGED
@@ -4,7 +4,7 @@ export type VLMContextValue = {
|
|
4 |
error: string | null;
|
5 |
loadModel: (onProgress?: (msg: string) => void) => Promise<void>;
|
6 |
runInference: (
|
7 |
-
|
8 |
instruction: string,
|
9 |
onTextUpdate?: (text: string) => void,
|
10 |
) => Promise<string>;
|
|
|
4 |
error: string | null;
|
5 |
loadModel: (onProgress?: (msg: string) => void) => Promise<void>;
|
6 |
runInference: (
|
7 |
+
media: HTMLVideoElement | HTMLImageElement,
|
8 |
instruction: string,
|
9 |
onTextUpdate?: (text: string) => void,
|
10 |
) => Promise<string>;
|