Spaces:
Sleeping
Sleeping
File size: 2,952 Bytes
8e3d721 8919651 58b1ffb 8919651 8e3d721 8c7d08c 8e3d721 f24ad59 db70195 8e3d721 f24ad59 db70195 8e3d721 6419aeb 58b1ffb 6419aeb f24ad59 8e3d721 6419aeb 8e3d721 58b1ffb 8e3d721 6419aeb 8e3d721 6419aeb 8e3d721 f24ad59 58b1ffb f24ad59 8e3d721 8c7d08c 3b780fb f24ad59 3b780fb 8c7d08c 8e3d721 6419aeb 8e3d721 6419aeb f24ad59 8919651 f24ad59 8e3d721 6419aeb 8e3d721 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import {
ClapProject,
ClapSegment,
getClapAssetSourceType,
filterSegments,
ClapSegmentFilteringMode,
ClapSegmentCategory
} from "@aitube/clap"
import { ClapCompletionMode } from "@aitube/client"
import { getSpeechBackgroundAudioPrompt } from "@aitube/engine"
import { generateSpeechWithParlerTTS } from "@/app/api/generators/speech/generateVoiceWithParlerTTS"
import { getMediaInfo } from "@/app/api/utils/getMediaInfo"
export async function processShot({
shotSegment,
existingClap,
newerClap,
mode,
turbo,
}: {
shotSegment: ClapSegment
existingClap: ClapProject
newerClap: ClapProject
mode: ClapCompletionMode
turbo: boolean
}): Promise<void> {
const shotSegments: ClapSegment[] = filterSegments(
ClapSegmentFilteringMode.BOTH,
shotSegment,
existingClap.segments
)
const shotDialogueSegments: ClapSegment[] = shotSegments.filter(s =>
s.category === ClapSegmentCategory.DIALOGUE
)
let shotDialogueSegment: ClapSegment | undefined = shotDialogueSegments.at(0)
console.log(`[api/edit/dialogues] processShot: shot [${shotSegment.startTimeInMs}:${shotSegment.endTimeInMs}] has ${shotSegments.length} segments (${shotDialogueSegments.length} dialogues)`)
if (shotDialogueSegment && !shotDialogueSegment.assetUrl) {
// console.log(`[api/edit/dialogues] generating audio..`)
try {
// this generates a mp3
shotDialogueSegment.assetUrl = await generateSpeechWithParlerTTS({
text: shotDialogueSegment.prompt,
audioId: getSpeechBackgroundAudioPrompt(
shotSegments,
existingClap.entityIndex,
// TODO: use the entity description if it exists
["high quality", "crisp", "detailed"]
),
debug: true,
})
shotDialogueSegment.assetSourceType = getClapAssetSourceType(shotDialogueSegment.assetUrl)
const { durationInMs, durationInSec, hasAudio } = await getMediaInfo(shotDialogueSegment.assetUrl)
if (hasAudio && durationInMs > 1000) {
shotDialogueSegment.assetDurationInMs = durationInMs
shotSegment.assetDurationInMs = durationInMs
// we update the duration of all the segments for this shot
// (it is possible that this makes the two previous lines redundant)
existingClap.segments.forEach(s => {
s.assetDurationInMs = durationInMs
})
}
} catch (err) {
console.log(`[api/edit/dialogues] processShot: failed to generate audio: ${err}`)
throw err
}
console.log(`[api/edit/dialogues] processShot: generated dialogue audio: ${shotDialogueSegment?.assetUrl?.slice?.(0, 50)}...`)
// if it's partial, we need to manually add it
if (mode !== ClapCompletionMode.FULL) {
newerClap.segments.push(shotDialogueSegment)
}
} else {
console.log(`[api/edit/dialogues] processShot: there is already a dialogue audio: ${shotDialogueSegment?.assetUrl?.slice?.(0, 50)}...`)
}
}
|