Spaces:
Sleeping
Sleeping
File size: 4,084 Bytes
8919651 6d66622 8919651 d4a1dc1 8919651 d4a1dc1 8919651 d4a1dc1 8919651 d4a1dc1 8919651 58b1ffb 8919651 d4a1dc1 8919651 58b1ffb 8919651 6d66622 8919651 d4a1dc1 8919651 58b1ffb 8919651 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
"use server"
import YAML from "yaml"
import { ClapSegmentCategory, generateSeed } from "@aitube/clap"
import { ClapEntityPrompt } from "@aitube/client"
import { sleep } from "@/lib/utils/sleep"
import { predict } from "@/app/api/providers/huggingface/predictWithHuggingFace"
import { parseRawStringToYAML } from "@/app/api/parsers/parseRawStringToYAML"
import { LatentEntity, LatentStory } from "@/app/api/v1/types"
import { systemPrompt } from "./systemPrompt"
import { generateImageID } from "./generateImageID"
export type EntityPromptResult = {
entityPrompt: ClapEntityPrompt
shots: number[]
}
// a helper to generate Clap stories from a few sentences
// this is mostly used by external apps such as the Stories Factory
export async function generateEntityPrompts({
prompt = "",
latentStory = [],
turbo = false,
}: {
prompt?: string
latentStory?: LatentStory[]
turbo?: boolean
} = {
prompt: "",
latentStory: [],
turbo: false
}): Promise<EntityPromptResult[]> {
if (!prompt.length) { throw new Error(`please provide a prompt`) }
console.log("generateEntityPrompts(): prompt:", prompt)
if (!latentStory.length) { throw new Error(`please provide a story`) }
console.log("generateEntityPrompts(): latentStory:", latentStory)
const userPrompt = `The input story is about: ${prompt}.
The input story timeline is:
\`\`\`yaml
${YAML.stringify(
// we need to help the LLM by marking the shots with a simple numeric ID
latentStory.map((shot, i) => ({
shot: i,
...shot,
}))
)}
\`\`\`
Now please generate the output entities:`
const prefix = "```yaml\n"
const nbMaxNewTokens = 1400
// TODO use streaming for the Hugging Face prediction
//
// note that a Clap file is actually a YAML stream of documents
// so technically we could stream everything from end-to-end
// (but I haven't coded the helpers to do this yet)
let rawString = await predict({
systemPrompt,
userPrompt,
nbMaxNewTokens,
prefix,
turbo,
})
// console.log("generateEntityPrompts(): rawString: ", rawString)
let results: EntityPromptResult[] = []
let maybeEntities = parseRawStringToYAML<LatentEntity[]>(rawString, [])
if (!Array.isArray(maybeEntities) || maybeEntities.length === 0) {
console.log(`generateEntityPrompts(): failed to generate entities.. trying again`)
await sleep(2000)
rawString = await predict({
systemPrompt,
userPrompt: userPrompt + ".", // we trick the Hugging Face cache
nbMaxNewTokens,
prefix,
turbo,
})
// console.log("generateEntityPrompts(): rawString: ", rawString)
maybeEntities = parseRawStringToYAML<LatentEntity[]>(rawString, [])
if (!Array.isArray(maybeEntities) || maybeEntities.length === 0) {
console.log(`generateEntityPrompts(): failed to generate shots for the second time, which indicates an issue with the Hugging Face API`)
}
}
if (maybeEntities.length) {
results = await Promise.all(
maybeEntities
// the LLM generates unrelated catrgories unfortunately,
// that we still turn into image.. so we fix that by filtering
.filter(({ category }) => category !== ClapSegmentCategory.CHARACTER)
.map(async ({
name,
category,
image,
audio,
shots,
}) => {
const entityPrompt: ClapEntityPrompt = {
name,
category,
age: "",
variant: image,
region: "",
identityImage: await generateImageID({
prompt: image,
seed: generateSeed(),
turbo,
}),
// TODO later
identityVoice: "" // await generateAudioID({ prompt: e.audio, seed: generateSeed() })
}
const result: EntityPromptResult = {
entityPrompt,
shots
}
return result
}))
} else {
throw new Error(`Hugging Face Inference API failure (the model failed to generate the entities)`)
}
// console.log(`generateEntityPrompts(): generated ${results.length} entities with their images and voice ids`)
return results
}
|