File size: 2,966 Bytes
564e576
 
 
719022a
564e576
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd7f926
564e576
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
719022a
564e576
 
 
 
 
 
aa0485a
 
564e576
aa0485a
564e576
 
 
 
 
 
 
719022a
 
564e576
 
 
 
 
 
 
 
 
719022a
 
564e576
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f6440ab
564e576
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import type { BackendTool } from "..";
import { uploadFile } from "../../files/uploadFile";
import { MessageUpdateType } from "$lib/types/MessageUpdate";
import { callSpace, getIpToken, type GradioImage } from "../utils";
import { downloadFile } from "$lib/server/files/downloadFile";

type ImageEditingInput = [
	Blob /* image */,
	string /* prompt */,
	string /* negative prompt */,
	number /* guidance scale */,
	number /* steps */
];
type ImageEditingOutput = [GradioImage];

const imageEditing: BackendTool = {
	name: "image_editing",
	displayName: "Image Editing",
	description: "Use this tool to edit an image from a prompt.",
	isOnByDefault: true,
	mimeTypes: ["image/*"],
	parameterDefinitions: {
		prompt: {
			description:
				"A prompt to generate an image from. Describe the image visually in simple terms, separate terms with a comma.",
			type: "string",
			required: true,
		},
		fileMessageIndex: {
			description: "Index of the message containing the file to edit",
			type: "number",
			required: true,
		},
		fileIndex: {
			description: "Index of the file to edit",
			type: "number",
			required: true,
		},
	},
	async *call({ prompt, fileMessageIndex, fileIndex }, { conv, messages, ip, username }) {
		prompt = String(prompt);
		fileMessageIndex = Number(fileMessageIndex);
		fileIndex = Number(fileIndex);

		const message = messages[fileMessageIndex];
		const images = message?.files ?? [];
		if (!images || images.length === 0) throw Error("User did not provide an image to edit");
		if (fileIndex >= images.length) throw Error("Model provided an invalid file index");
		if (!images[fileIndex].mime.startsWith("image/")) {
			throw Error("Model provided a file idex which is not an image");
		}

		// todo: should handle multiple images
		const image = await downloadFile(images[fileIndex].value, conv._id)
			.then((file) => fetch(`data:${file.mime};base64,${file.value}`))
			.then((res) => res.blob());

		const ipToken = await getIpToken(ip, username);

		const outputs = await callSpace<ImageEditingInput, ImageEditingOutput>(
			"multimodalart/cosxl",
			"run_edit",
			[
				image,
				prompt,
				"", // negative prompt
				7, // guidance scale
				20, // steps
			],
			ipToken
		);

		const outputImage = await fetch(outputs[0].url)
			.then((res) => res.blob())
			.then((blob) => new File([blob], outputs[0].orig_name, { type: blob.type }))
			.then((file) => uploadFile(file, conv));

		yield {
			type: MessageUpdateType.File,
			name: outputImage.name,
			sha: outputImage.value,
			mime: outputImage.mime,
		};

		return {
			outputs: [
				{
					imageEditing: `An image has been generated for the following prompt: "${prompt}". Answer as if the user can already see the image. Do not try to insert the image or to add space for it. The user can already see the image. Do not try to describe the image as you the model cannot see it. Be concise.`,
				},
			],
			display: false,
		};
	},
};

export default imageEditing;