File size: 5,984 Bytes
f24ad59
d4a1dc1
8919651
f24ad59
 
 
8919651
 
f24ad59
 
 
 
8919651
db70195
 
f24ad59
 
 
8919651
 
db70195
f24ad59
 
8919651
 
 
 
 
 
 
 
 
 
 
a54215e
8919651
 
db70195
 
8919651
 
d4a1dc1
a54215e
8919651
 
a54215e
8919651
 
 
 
 
 
 
 
 
 
 
58b1ffb
8919651
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a54215e
 
 
 
 
 
 
 
 
 
 
 
8919651
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58b1ffb
8919651
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f24ad59
 
8919651
 
f24ad59
 
 
 
 
 
 
 
 
 
 
 
 
 
db70195
 
f24ad59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8919651
f24ad59
 
 
 
 
58b1ffb
f24ad59
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179

import { ClapProject, ClapSegmentCategory, getClapAssetSourceType, getValidNumber, newEntity } from "@aitube/clap"
import { ClapCompletionMode, ClapEntityPrompt } from "@aitube/client"

import { generateImageID } from "./generateImageID"
import { generateAudioID } from "./generateAudioID"
import { generateEntityPrompts } from "./generateEntityPrompts"
import { clapToLatentStory } from "./clapToLatentStory"

export async function editEntities({
  existingClap,
  newerClap,
  entityPrompts = [],
  mode = ClapCompletionMode.PARTIAL,
  turbo = false,
}: {
  existingClap: ClapProject
  newerClap: ClapProject
  entityPrompts?: ClapEntityPrompt[]
  mode?: ClapCompletionMode
  turbo?: boolean
}) {

  // note that we can only handle either FULL or PARTIAL
  // other modes such as MERGE, REPLACE.. are irrelevant since those are client-side modes
  // so from a server point of view those correspond to PARTIAL
  //
  // it is also worth noting that the use of FULL should be discouraged
  const isFull = mode === ClapCompletionMode.FULL
  const isPartial = !isFull

  // if we don't have existing entities, and user passed none,
  // then we need to hallucinate them
  if (existingClap.entities.length === 0 && entityPrompts.length === 0) {

    const entityPromptsWithShots = await generateEntityPrompts({
      prompt: existingClap.meta.description,
      latentStory: await clapToLatentStory(existingClap),
      turbo,
    })

    const allShots = existingClap.segments.filter(s => s.category === ClapSegmentCategory.CAMERA)

    for (const {
      entityPrompt: { name, category, age, variant, region, identityImage, identityVoice },
      shots: entityShots
    } of entityPromptsWithShots) {
      const newEnt = newEntity({
        category,
        triggerName: name,
        label: name,
        description: name,
        author: "auto",
        thumbnailUrl: "",
  
        imagePrompt: "",
        imageSourceType: getClapAssetSourceType(identityImage),
        imageEngine: "SD Lightning", 
        imageId: identityImage,
        audioPrompt: "",
        audioSourceType: getClapAssetSourceType(identityVoice),
        audioEngine: "Parler-TTS", // <- TODO: use OpenVoice 2, that way it can be personalized
        audioId: identityVoice,
  
        // note: using a numeric age should be deprecated,
        // instead we should be able to specify things using text,
        // eg. "8 months", "25 years old", "12th century"
        age: getValidNumber(age, 0, 120, 25),
   
        // TODO: delete gender and appearance, replace by a single concept of "variant"
        gender: "",
        appearance: variant,
        region: region,
      })
      
      existingClap.entities.push(newEnt)

      // now let's assign our entity to shots!
      //
      // warning: the shot assignment is the responsibility of the LLM.
      // if the LLM hallucinates non-existing shot ids, it will cause trouble!
      for (const shotId of entityShots) {
        if (allShots[shotId]) {
          allShots[shotId].entityId = newEnt.id
        } else {
          console.log(`[api/v1/edit/entities] warning: the LLM generated a non-existing shot (shot "${shotId}", but we only have ${allShots.length} shots)`)
        }
      }
    }
  }

  // otherwise try to add what's new
  for (const { name, category, age, variant, region, identityImage, identityVoice } of entityPrompts) {
    const newEnt = newEntity({
      category,
      triggerName: name,
      label: name,
      description: name,
      author: "auto",
      thumbnailUrl: "",

      imagePrompt: "",
      imageSourceType: getClapAssetSourceType(identityImage),
      imageEngine: "SD Lightning", 
      imageId: identityImage,
      audioPrompt: "",
      audioSourceType: getClapAssetSourceType(identityVoice),
      audioEngine: "Parler-TTS", // <- TODO: use OpenVoice 2, that way it can be personalized
      audioId: identityVoice,

      // note: using a numeric age should be deprecated,
      // instead we should be able to specify things using text,
      // eg. "8 months", "25 years old", "12th century"
      age: getValidNumber(age, 0, 120, 25),
 
      // TODO: delete gender and appearance, replace by a single concept of "variant"
      gender: "",
      appearance: variant,
      region: region,
    })
    
    existingClap.entities.push(newEnt)
  }

  if (!existingClap.entities.length) { throw new Error(`please provide at least one entity`) }

  // then we try to automatically repair, edit, complete.. all the existing entities

  for (const entity of existingClap.entities) {

    let entityHasBeenModified = false

    // TASK 1: GENERATE THE IMAGE PROMPT IF MISSING
    if (!entity.imagePrompt) {
      entity.imagePrompt = "a man with a beard"
      entityHasBeenModified = true
    }

    // TASK 2: GENERATE THE IMAGE ID IF MISSING
    if (!entity.imageId) {
      entity.imageId = await generateImageID({
        prompt: entity.imagePrompt,
        seed: entity.seed,
        turbo,
      })
      entity.imageSourceType = getClapAssetSourceType(entity.imageId)
      entityHasBeenModified = true
    }

    // TASK 3: GENERATE THE AUDIO PROMPT IF MISSING
    if (!entity.audioPrompt) {
      entity.audioPrompt = "a man with a beard"
      entityHasBeenModified = true
    }

    // TASK 4: GENERATE THE AUDIO ID IF MISSING

    // TODO here: call Parler-TTS or a generic audio generator
    if (!entity.audioId) {
      entity.audioId = await generateAudioID({
        prompt: entity.audioPrompt,
        seed: entity.seed
      })
      entity.audioSourceType = getClapAssetSourceType(entity.audioId)
      entityHasBeenModified = true
    }

    // in case we are doing a partial update
    if (mode !== ClapCompletionMode.FULL && entityHasBeenModified && !newerClap.entityIndex[entity.id]) {
      newerClap.entities.push(entity)
      newerClap.entityIndex[entity.id] = entity
    }
  }

  // console.log(`api/edit/entities(): returning the newerClap`)

  return newerClap
}