Spaces:
Sleeping
Sleeping
File size: 5,984 Bytes
f24ad59 d4a1dc1 8919651 f24ad59 8919651 f24ad59 8919651 db70195 f24ad59 8919651 db70195 f24ad59 8919651 a54215e 8919651 db70195 8919651 d4a1dc1 a54215e 8919651 a54215e 8919651 58b1ffb 8919651 a54215e 8919651 58b1ffb 8919651 f24ad59 8919651 f24ad59 db70195 f24ad59 8919651 f24ad59 58b1ffb f24ad59 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
import { ClapProject, ClapSegmentCategory, getClapAssetSourceType, getValidNumber, newEntity } from "@aitube/clap"
import { ClapCompletionMode, ClapEntityPrompt } from "@aitube/client"
import { generateImageID } from "./generateImageID"
import { generateAudioID } from "./generateAudioID"
import { generateEntityPrompts } from "./generateEntityPrompts"
import { clapToLatentStory } from "./clapToLatentStory"
export async function editEntities({
existingClap,
newerClap,
entityPrompts = [],
mode = ClapCompletionMode.PARTIAL,
turbo = false,
}: {
existingClap: ClapProject
newerClap: ClapProject
entityPrompts?: ClapEntityPrompt[]
mode?: ClapCompletionMode
turbo?: boolean
}) {
// note that we can only handle either FULL or PARTIAL
// other modes such as MERGE, REPLACE.. are irrelevant since those are client-side modes
// so from a server point of view those correspond to PARTIAL
//
// it is also worth noting that the use of FULL should be discouraged
const isFull = mode === ClapCompletionMode.FULL
const isPartial = !isFull
// if we don't have existing entities, and user passed none,
// then we need to hallucinate them
if (existingClap.entities.length === 0 && entityPrompts.length === 0) {
const entityPromptsWithShots = await generateEntityPrompts({
prompt: existingClap.meta.description,
latentStory: await clapToLatentStory(existingClap),
turbo,
})
const allShots = existingClap.segments.filter(s => s.category === ClapSegmentCategory.CAMERA)
for (const {
entityPrompt: { name, category, age, variant, region, identityImage, identityVoice },
shots: entityShots
} of entityPromptsWithShots) {
const newEnt = newEntity({
category,
triggerName: name,
label: name,
description: name,
author: "auto",
thumbnailUrl: "",
imagePrompt: "",
imageSourceType: getClapAssetSourceType(identityImage),
imageEngine: "SD Lightning",
imageId: identityImage,
audioPrompt: "",
audioSourceType: getClapAssetSourceType(identityVoice),
audioEngine: "Parler-TTS", // <- TODO: use OpenVoice 2, that way it can be personalized
audioId: identityVoice,
// note: using a numeric age should be deprecated,
// instead we should be able to specify things using text,
// eg. "8 months", "25 years old", "12th century"
age: getValidNumber(age, 0, 120, 25),
// TODO: delete gender and appearance, replace by a single concept of "variant"
gender: "",
appearance: variant,
region: region,
})
existingClap.entities.push(newEnt)
// now let's assign our entity to shots!
//
// warning: the shot assignment is the responsibility of the LLM.
// if the LLM hallucinates non-existing shot ids, it will cause trouble!
for (const shotId of entityShots) {
if (allShots[shotId]) {
allShots[shotId].entityId = newEnt.id
} else {
console.log(`[api/v1/edit/entities] warning: the LLM generated a non-existing shot (shot "${shotId}", but we only have ${allShots.length} shots)`)
}
}
}
}
// otherwise try to add what's new
for (const { name, category, age, variant, region, identityImage, identityVoice } of entityPrompts) {
const newEnt = newEntity({
category,
triggerName: name,
label: name,
description: name,
author: "auto",
thumbnailUrl: "",
imagePrompt: "",
imageSourceType: getClapAssetSourceType(identityImage),
imageEngine: "SD Lightning",
imageId: identityImage,
audioPrompt: "",
audioSourceType: getClapAssetSourceType(identityVoice),
audioEngine: "Parler-TTS", // <- TODO: use OpenVoice 2, that way it can be personalized
audioId: identityVoice,
// note: using a numeric age should be deprecated,
// instead we should be able to specify things using text,
// eg. "8 months", "25 years old", "12th century"
age: getValidNumber(age, 0, 120, 25),
// TODO: delete gender and appearance, replace by a single concept of "variant"
gender: "",
appearance: variant,
region: region,
})
existingClap.entities.push(newEnt)
}
if (!existingClap.entities.length) { throw new Error(`please provide at least one entity`) }
// then we try to automatically repair, edit, complete.. all the existing entities
for (const entity of existingClap.entities) {
let entityHasBeenModified = false
// TASK 1: GENERATE THE IMAGE PROMPT IF MISSING
if (!entity.imagePrompt) {
entity.imagePrompt = "a man with a beard"
entityHasBeenModified = true
}
// TASK 2: GENERATE THE IMAGE ID IF MISSING
if (!entity.imageId) {
entity.imageId = await generateImageID({
prompt: entity.imagePrompt,
seed: entity.seed,
turbo,
})
entity.imageSourceType = getClapAssetSourceType(entity.imageId)
entityHasBeenModified = true
}
// TASK 3: GENERATE THE AUDIO PROMPT IF MISSING
if (!entity.audioPrompt) {
entity.audioPrompt = "a man with a beard"
entityHasBeenModified = true
}
// TASK 4: GENERATE THE AUDIO ID IF MISSING
// TODO here: call Parler-TTS or a generic audio generator
if (!entity.audioId) {
entity.audioId = await generateAudioID({
prompt: entity.audioPrompt,
seed: entity.seed
})
entity.audioSourceType = getClapAssetSourceType(entity.audioId)
entityHasBeenModified = true
}
// in case we are doing a partial update
if (mode !== ClapCompletionMode.FULL && entityHasBeenModified && !newerClap.entityIndex[entity.id]) {
newerClap.entities.push(entity)
newerClap.entityIndex[entity.id] = entity
}
}
// console.log(`api/edit/entities(): returning the newerClap`)
return newerClap
}
|