Spaces:
Paused
Paused
goupilew
commited on
feat: add support for multimodal in Vertex (#1338)
Browse files* feat: add support for multimodal in Vertex
* Nit changes and remove tools if multimodal
* revert model name change
* Fix tools/multimodal condition
* chores(lint): fix formatting
---------
Co-authored-by: Thomas <[email protected]>
Co-authored-by: Nathan Sarrazin <[email protected]>
- README.md +11 -3
- src/lib/server/endpoints/google/endpointVertex.ts +48 -12
README.md
CHANGED
|
@@ -775,21 +775,29 @@ MODELS=`[
|
|
| 775 |
{
|
| 776 |
"name": "gemini-1.5-pro",
|
| 777 |
"displayName": "Vertex Gemini Pro 1.5",
|
|
|
|
| 778 |
"endpoints" : [{
|
| 779 |
"type": "vertex",
|
| 780 |
"project": "abc-xyz",
|
| 781 |
"location": "europe-west3",
|
| 782 |
"model": "gemini-1.5-pro-preview-0409", // model-name
|
| 783 |
-
|
| 784 |
// Optional
|
| 785 |
"safetyThreshold": "BLOCK_MEDIUM_AND_ABOVE",
|
| 786 |
"apiEndpoint": "", // alternative api endpoint url,
|
| 787 |
-
// Optional
|
| 788 |
"tools": [{
|
| 789 |
"googleSearchRetrieval": {
|
| 790 |
"disableAttribution": true
|
| 791 |
}
|
| 792 |
-
}]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 793 |
}]
|
| 794 |
},
|
| 795 |
]`
|
|
|
|
| 775 |
{
|
| 776 |
"name": "gemini-1.5-pro",
|
| 777 |
"displayName": "Vertex Gemini Pro 1.5",
|
| 778 |
+
"multimodal": true,
|
| 779 |
"endpoints" : [{
|
| 780 |
"type": "vertex",
|
| 781 |
"project": "abc-xyz",
|
| 782 |
"location": "europe-west3",
|
| 783 |
"model": "gemini-1.5-pro-preview-0409", // model-name
|
|
|
|
| 784 |
// Optional
|
| 785 |
"safetyThreshold": "BLOCK_MEDIUM_AND_ABOVE",
|
| 786 |
"apiEndpoint": "", // alternative api endpoint url,
|
|
|
|
| 787 |
"tools": [{
|
| 788 |
"googleSearchRetrieval": {
|
| 789 |
"disableAttribution": true
|
| 790 |
}
|
| 791 |
+
}],
|
| 792 |
+
"multimodal": {
|
| 793 |
+
"image": {
|
| 794 |
+
"supportedMimeTypes": ["image/png", "image/jpeg", "image/webp"],
|
| 795 |
+
"preferredMimeType": "image/png",
|
| 796 |
+
"maxSizeInMB": 5,
|
| 797 |
+
"maxWidth": 2000,
|
| 798 |
+
"maxHeight": 1000;
|
| 799 |
+
}
|
| 800 |
+
}
|
| 801 |
}]
|
| 802 |
},
|
| 803 |
]`
|
src/lib/server/endpoints/google/endpointVertex.ts
CHANGED
|
@@ -9,6 +9,7 @@ import type { Endpoint } from "../endpoints";
|
|
| 9 |
import { z } from "zod";
|
| 10 |
import type { Message } from "$lib/types/Message";
|
| 11 |
import type { TextGenerationStreamOutput } from "@huggingface/inference";
|
|
|
|
| 12 |
|
| 13 |
export const endpointVertexParametersSchema = z.object({
|
| 14 |
weight: z.number().int().positive().default(1),
|
|
@@ -27,10 +28,28 @@ export const endpointVertexParametersSchema = z.object({
|
|
| 27 |
])
|
| 28 |
.optional(),
|
| 29 |
tools: z.array(z.any()).optional(),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
});
|
| 31 |
|
| 32 |
export function endpointVertex(input: z.input<typeof endpointVertexParametersSchema>): Endpoint {
|
| 33 |
-
const { project, location, model, apiEndpoint, safetyThreshold, tools } =
|
| 34 |
endpointVertexParametersSchema.parse(input);
|
| 35 |
|
| 36 |
const vertex_ai = new VertexAI({
|
|
@@ -42,6 +61,8 @@ export function endpointVertex(input: z.input<typeof endpointVertexParametersSch
|
|
| 42 |
return async ({ messages, preprompt, generateSettings }) => {
|
| 43 |
const parameters = { ...model.parameters, ...generateSettings };
|
| 44 |
|
|
|
|
|
|
|
| 45 |
const generativeModel = vertex_ai.getGenerativeModel({
|
| 46 |
model: model.id ?? model.name,
|
| 47 |
safetySettings: safetyThreshold
|
|
@@ -73,7 +94,8 @@ export function endpointVertex(input: z.input<typeof endpointVertexParametersSch
|
|
| 73 |
stopSequences: parameters?.stop,
|
| 74 |
temperature: parameters?.temperature ?? 1,
|
| 75 |
},
|
| 76 |
-
tools
|
|
|
|
| 77 |
});
|
| 78 |
|
| 79 |
// Preprompt is the same as the first system message.
|
|
@@ -83,16 +105,30 @@ export function endpointVertex(input: z.input<typeof endpointVertexParametersSch
|
|
| 83 |
messages.shift();
|
| 84 |
}
|
| 85 |
|
| 86 |
-
const vertexMessages =
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
|
| 97 |
const result = await generativeModel.generateContentStream({
|
| 98 |
contents: vertexMessages,
|
|
|
|
| 9 |
import { z } from "zod";
|
| 10 |
import type { Message } from "$lib/types/Message";
|
| 11 |
import type { TextGenerationStreamOutput } from "@huggingface/inference";
|
| 12 |
+
import { createImageProcessorOptionsValidator, makeImageProcessor } from "../images";
|
| 13 |
|
| 14 |
export const endpointVertexParametersSchema = z.object({
|
| 15 |
weight: z.number().int().positive().default(1),
|
|
|
|
| 28 |
])
|
| 29 |
.optional(),
|
| 30 |
tools: z.array(z.any()).optional(),
|
| 31 |
+
multimodal: z
|
| 32 |
+
.object({
|
| 33 |
+
image: createImageProcessorOptionsValidator({
|
| 34 |
+
supportedMimeTypes: [
|
| 35 |
+
"image/png",
|
| 36 |
+
"image/jpeg",
|
| 37 |
+
"image/webp",
|
| 38 |
+
"image/avif",
|
| 39 |
+
"image/tiff",
|
| 40 |
+
"image/gif",
|
| 41 |
+
],
|
| 42 |
+
preferredMimeType: "image/webp",
|
| 43 |
+
maxSizeInMB: Infinity,
|
| 44 |
+
maxWidth: 4096,
|
| 45 |
+
maxHeight: 4096,
|
| 46 |
+
}),
|
| 47 |
+
})
|
| 48 |
+
.default({}),
|
| 49 |
});
|
| 50 |
|
| 51 |
export function endpointVertex(input: z.input<typeof endpointVertexParametersSchema>): Endpoint {
|
| 52 |
+
const { project, location, model, apiEndpoint, safetyThreshold, tools, multimodal } =
|
| 53 |
endpointVertexParametersSchema.parse(input);
|
| 54 |
|
| 55 |
const vertex_ai = new VertexAI({
|
|
|
|
| 61 |
return async ({ messages, preprompt, generateSettings }) => {
|
| 62 |
const parameters = { ...model.parameters, ...generateSettings };
|
| 63 |
|
| 64 |
+
const hasFiles = messages.some((message) => message.files && message.files.length > 0);
|
| 65 |
+
|
| 66 |
const generativeModel = vertex_ai.getGenerativeModel({
|
| 67 |
model: model.id ?? model.name,
|
| 68 |
safetySettings: safetyThreshold
|
|
|
|
| 94 |
stopSequences: parameters?.stop,
|
| 95 |
temperature: parameters?.temperature ?? 1,
|
| 96 |
},
|
| 97 |
+
// tools and multimodal are mutually exclusive
|
| 98 |
+
tools: !hasFiles ? tools : undefined,
|
| 99 |
});
|
| 100 |
|
| 101 |
// Preprompt is the same as the first system message.
|
|
|
|
| 105 |
messages.shift();
|
| 106 |
}
|
| 107 |
|
| 108 |
+
const vertexMessages = await Promise.all(
|
| 109 |
+
messages.map(async ({ from, content, files }: Omit<Message, "id">): Promise<Content> => {
|
| 110 |
+
const imageProcessor = makeImageProcessor(multimodal.image);
|
| 111 |
+
const processedFiles =
|
| 112 |
+
files && files.length > 0
|
| 113 |
+
? await Promise.all(files.map(async (file) => imageProcessor(file)))
|
| 114 |
+
: [];
|
| 115 |
+
|
| 116 |
+
return {
|
| 117 |
+
role: from === "user" ? "user" : "model",
|
| 118 |
+
parts: [
|
| 119 |
+
...processedFiles.map((processedFile) => ({
|
| 120 |
+
inlineData: {
|
| 121 |
+
data: processedFile.image.toString("base64"),
|
| 122 |
+
mimeType: processedFile.mime,
|
| 123 |
+
},
|
| 124 |
+
})),
|
| 125 |
+
{
|
| 126 |
+
text: content,
|
| 127 |
+
},
|
| 128 |
+
],
|
| 129 |
+
};
|
| 130 |
+
})
|
| 131 |
+
);
|
| 132 |
|
| 133 |
const result = await generativeModel.generateContentStream({
|
| 134 |
contents: vertexMessages,
|