Spaces:
Paused
Paused
Julian Bilcke
commited on
Commit
·
a65e95e
1
Parent(s):
29598d1
work in progress to create the video service
Browse files- Dockerfile +1 -1
- package-lock.json +9 -0
- package.json +2 -0
- src/data/all_words.json +0 -0
- src/data/good_words.json +0 -0
- src/index.mts +47 -61
- src/services/addAudioToVideo.mts +31 -15
- src/services/generateActor.mts +50 -0
- src/services/generateAudio.mts +45 -22
- src/services/generateAudioLegacy.mts +33 -0
- src/services/generateShot.mts +209 -0
- src/services/generateVideo.mts +2 -1
- src/services/generateVoice.mts +56 -0
- src/services/interpolateVideo.mts +35 -22
- src/services/interpolateVideoLegacy.mts +39 -0
- src/services/mergeAudio.mts +49 -0
- src/services/postInterpolation.mts +57 -0
- src/test2.mts +7 -0
- src/types.mts +23 -2
Dockerfile
CHANGED
|
@@ -30,6 +30,6 @@ RUN npm install
|
|
| 30 |
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
| 31 |
COPY --chown=user . $HOME/app
|
| 32 |
|
| 33 |
-
EXPOSE 7860
|
| 34 |
|
| 35 |
CMD [ "npm", "run", "start" ]
|
|
|
|
| 30 |
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
| 31 |
COPY --chown=user . $HOME/app
|
| 32 |
|
| 33 |
+
EXPOSE 7860
|
| 34 |
|
| 35 |
CMD [ "npm", "run", "start" ]
|
package-lock.json
CHANGED
|
@@ -10,6 +10,7 @@
|
|
| 10 |
"license": "Apache License",
|
| 11 |
"dependencies": {
|
| 12 |
"@gradio/client": "^0.1.4",
|
|
|
|
| 13 |
"@types/express": "^4.17.17",
|
| 14 |
"@types/uuid": "^9.0.2",
|
| 15 |
"express": "^4.18.2",
|
|
@@ -78,6 +79,14 @@
|
|
| 78 |
"node": ">=18.0.0"
|
| 79 |
}
|
| 80 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
"node_modules/@jridgewell/resolve-uri": {
|
| 82 |
"version": "3.1.1",
|
| 83 |
"resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.1.tgz",
|
|
|
|
| 10 |
"license": "Apache License",
|
| 11 |
"dependencies": {
|
| 12 |
"@gradio/client": "^0.1.4",
|
| 13 |
+
"@huggingface/inference": "^2.6.1",
|
| 14 |
"@types/express": "^4.17.17",
|
| 15 |
"@types/uuid": "^9.0.2",
|
| 16 |
"express": "^4.18.2",
|
|
|
|
| 79 |
"node": ">=18.0.0"
|
| 80 |
}
|
| 81 |
},
|
| 82 |
+
"node_modules/@huggingface/inference": {
|
| 83 |
+
"version": "2.6.1",
|
| 84 |
+
"resolved": "https://registry.npmjs.org/@huggingface/inference/-/inference-2.6.1.tgz",
|
| 85 |
+
"integrity": "sha512-qFYchgOCPeEkZJKiSr7Kz62QwukJtgkeQCT7Q0SSKUcvHpTQVNJp6i/JrJMR4dBdzQysJ1SZDC0pLBBnnskTag==",
|
| 86 |
+
"engines": {
|
| 87 |
+
"node": ">=18"
|
| 88 |
+
}
|
| 89 |
+
},
|
| 90 |
"node_modules/@jridgewell/resolve-uri": {
|
| 91 |
"version": "3.1.1",
|
| 92 |
"resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.1.tgz",
|
package.json
CHANGED
|
@@ -6,6 +6,7 @@
|
|
| 6 |
"scripts": {
|
| 7 |
"start": "node --loader ts-node/esm src/index.mts",
|
| 8 |
"test": "node --loader ts-node/esm src/test.mts",
|
|
|
|
| 9 |
"docker": "npm run docker:build && npm run docker:run",
|
| 10 |
"docker:build": "docker build -t ai-webtv .",
|
| 11 |
"docker:run": "docker run -it -p 7860:7860 video-service"
|
|
@@ -14,6 +15,7 @@
|
|
| 14 |
"license": "Apache License",
|
| 15 |
"dependencies": {
|
| 16 |
"@gradio/client": "^0.1.4",
|
|
|
|
| 17 |
"@types/express": "^4.17.17",
|
| 18 |
"@types/uuid": "^9.0.2",
|
| 19 |
"express": "^4.18.2",
|
|
|
|
| 6 |
"scripts": {
|
| 7 |
"start": "node --loader ts-node/esm src/index.mts",
|
| 8 |
"test": "node --loader ts-node/esm src/test.mts",
|
| 9 |
+
"test2": "node --loader ts-node/esm src/test2.mts",
|
| 10 |
"docker": "npm run docker:build && npm run docker:run",
|
| 11 |
"docker:build": "docker build -t ai-webtv .",
|
| 12 |
"docker:run": "docker run -it -p 7860:7860 video-service"
|
|
|
|
| 15 |
"license": "Apache License",
|
| 16 |
"dependencies": {
|
| 17 |
"@gradio/client": "^0.1.4",
|
| 18 |
+
"@huggingface/inference": "^2.6.1",
|
| 19 |
"@types/express": "^4.17.17",
|
| 20 |
"@types/uuid": "^9.0.2",
|
| 21 |
"express": "^4.18.2",
|
src/data/all_words.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
src/data/good_words.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
src/index.mts
CHANGED
|
@@ -1,63 +1,62 @@
|
|
| 1 |
-
import { promises as fs } from
|
| 2 |
-
import path from 'node:path'
|
| 3 |
|
| 4 |
-
import
|
| 5 |
-
import express from 'express'
|
| 6 |
|
| 7 |
-
import {
|
| 8 |
-
import {
|
| 9 |
-
import {
|
| 10 |
-
import { generateSeed } from './services/generateSeed.mts'
|
| 11 |
-
import { addAudioToVideo } from './services/addAudioToVideo.mts'
|
| 12 |
-
|
| 13 |
-
import { MakeShot } from './types.mts'
|
| 14 |
|
| 15 |
const app = express()
|
| 16 |
const port = 7860
|
| 17 |
|
| 18 |
app.use(express.json())
|
| 19 |
|
|
|
|
| 20 |
|
| 21 |
-
app.post(
|
| 22 |
-
const query = req.body as
|
| 23 |
|
| 24 |
-
|
| 25 |
-
const token = `${query.token || ''}`
|
| 26 |
if (token !== process.env.VS_SECRET_ACCESS_TOKEN) {
|
| 27 |
console.log("couldn't find access token in the query")
|
| 28 |
-
res.write(JSON.stringify({ error: true, message:
|
| 29 |
res.end()
|
| 30 |
return
|
| 31 |
}
|
| 32 |
|
| 33 |
-
const shotPrompt = `${query.shotPrompt ||
|
| 34 |
if (shotPrompt.length < 5) {
|
| 35 |
-
res.write(JSON.stringify({ error: true, message:
|
| 36 |
res.end()
|
| 37 |
return
|
| 38 |
}
|
| 39 |
|
| 40 |
// optional video URL
|
| 41 |
-
// const inputVideo = `${req.query.inputVideo ||
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
-
// optional audio prompt
|
| 44 |
-
const
|
| 45 |
|
| 46 |
// optional seed
|
| 47 |
const defaultSeed = generateSeed()
|
| 48 |
const seedStr = Number(`${query.seed || defaultSeed}`)
|
| 49 |
const maybeSeed = Number(seedStr)
|
| 50 |
const seed = isNaN(maybeSeed) || ! isFinite(maybeSeed) ? defaultSeed : maybeSeed
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
-
// should we upscale or not?
|
| 54 |
-
const upscale = `${query.upscale || 'true'}` === 'true'
|
| 55 |
|
| 56 |
-
// duration of the prompt, in seconds
|
| 57 |
const defaultDuration = 3
|
|
|
|
| 58 |
const durationStr = Number(`${query.duration || defaultDuration}`)
|
| 59 |
const maybeDuration = Number(durationStr)
|
| 60 |
-
const duration = Math.min(
|
| 61 |
|
| 62 |
const defaultSteps = 35
|
| 63 |
const stepsStr = Number(`${query.steps || defaultSteps}`)
|
|
@@ -68,58 +67,45 @@ app.post('/shot', async (req, res) => {
|
|
| 68 |
const defaultFps = 24
|
| 69 |
const fpsStr = Number(`${query.fps || defaultFps}`)
|
| 70 |
const maybeFps = Number(fpsStr)
|
| 71 |
-
const
|
| 72 |
|
| 73 |
const defaultResolution = 576
|
| 74 |
const resolutionStr = Number(`${query.resolution || defaultResolution}`)
|
| 75 |
const maybeResolution = Number(resolutionStr)
|
| 76 |
const resolution = Math.min(1080, Math.max(256, isNaN(maybeResolution) || !isFinite(maybeResolution) ? defaultResolution : maybeResolution))
|
| 77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
-
const shotFileName = `${Date.now()}.mp4`
|
| 80 |
|
| 81 |
-
|
|
|
|
|
|
|
| 82 |
shotPrompt,
|
| 83 |
-
|
| 84 |
-
|
|
|
|
|
|
|
| 85 |
duration,
|
|
|
|
|
|
|
| 86 |
nbSteps,
|
| 87 |
-
fps,
|
| 88 |
-
seed,
|
| 89 |
upscale,
|
| 90 |
-
|
|
|
|
| 91 |
})
|
| 92 |
-
console.log('generating base video ..')
|
| 93 |
-
const generatedVideoUrl = await generateVideo(shotPrompt, {
|
| 94 |
-
seed,
|
| 95 |
-
nbFrames: 24, // if we try more eg 48 frames, this will crash the upscaler (not enough memory)
|
| 96 |
-
nbSteps
|
| 97 |
-
})
|
| 98 |
-
|
| 99 |
|
| 100 |
-
console.log(
|
| 101 |
-
const videoFileName = await downloadVideo(generatedVideoUrl, shotFileName)
|
| 102 |
|
| 103 |
-
|
| 104 |
-
console.log('upscaling video..')
|
| 105 |
-
await upscaleVideo(videoFileName, shotPrompt)
|
| 106 |
-
}
|
| 107 |
-
|
| 108 |
-
// TODO call AudioLDM
|
| 109 |
-
if (audioPrompt) {
|
| 110 |
-
// const audioFileName = await callAudioLDM(audioPrompt)
|
| 111 |
-
console.log('calling audio prompt')
|
| 112 |
-
|
| 113 |
-
// await addAudioToVideo(videoFileName, audioFileName)
|
| 114 |
-
}
|
| 115 |
-
|
| 116 |
-
console.log('returning result to user..')
|
| 117 |
-
|
| 118 |
-
const filePath = path.resolve(tmpDir, videoFileName)
|
| 119 |
|
| 120 |
const buffer = await fs.readFile(filePath)
|
| 121 |
-
|
| 122 |
-
res.setHeader(
|
|
|
|
| 123 |
res.end(buffer)
|
| 124 |
})
|
| 125 |
|
|
|
|
| 1 |
+
import { promises as fs } from "fs"
|
|
|
|
| 2 |
|
| 3 |
+
import express from "express"
|
|
|
|
| 4 |
|
| 5 |
+
import { generateSeed } from "./services/generateSeed.mts"
|
| 6 |
+
import { Job, ShotQuery } from "./types.mts"
|
| 7 |
+
import { generateShot } from "./services/generateShot.mts"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
const app = express()
|
| 10 |
const port = 7860
|
| 11 |
|
| 12 |
app.use(express.json())
|
| 13 |
|
| 14 |
+
const queue: Job[] = []
|
| 15 |
|
| 16 |
+
app.post("/shot", async (req, res) => {
|
| 17 |
+
const query = req.body as ShotQuery
|
| 18 |
|
| 19 |
+
const token = `${query.token || ""}`
|
|
|
|
| 20 |
if (token !== process.env.VS_SECRET_ACCESS_TOKEN) {
|
| 21 |
console.log("couldn't find access token in the query")
|
| 22 |
+
res.write(JSON.stringify({ error: true, message: "access denied" }))
|
| 23 |
res.end()
|
| 24 |
return
|
| 25 |
}
|
| 26 |
|
| 27 |
+
const shotPrompt = `${query.shotPrompt || ""}`
|
| 28 |
if (shotPrompt.length < 5) {
|
| 29 |
+
res.write(JSON.stringify({ error: true, message: "prompt too short (must be at least 5 in length)" }))
|
| 30 |
res.end()
|
| 31 |
return
|
| 32 |
}
|
| 33 |
|
| 34 |
// optional video URL
|
| 35 |
+
// const inputVideo = `${req.query.inputVideo || ""}`
|
| 36 |
+
|
| 37 |
+
// optional background audio prompt
|
| 38 |
+
const backgroundAudioPrompt = `${query.backgroundAudioPrompt || ""}`
|
| 39 |
|
| 40 |
+
// optional foreground audio prompt
|
| 41 |
+
const foregroundAudioPrompt = `${query.foregroundAudioPrompt || ""}`
|
| 42 |
|
| 43 |
// optional seed
|
| 44 |
const defaultSeed = generateSeed()
|
| 45 |
const seedStr = Number(`${query.seed || defaultSeed}`)
|
| 46 |
const maybeSeed = Number(seedStr)
|
| 47 |
const seed = isNaN(maybeSeed) || ! isFinite(maybeSeed) ? defaultSeed : maybeSeed
|
| 48 |
+
|
| 49 |
+
// in production we want those ON by default
|
| 50 |
+
const upscale = `${query.upscale || "true"}` === "true"
|
| 51 |
+
const interpolate = `${query.upscale || "true"}` === "true"
|
| 52 |
+
const noise = `${query.noise || "true"}` === "true"
|
| 53 |
|
|
|
|
|
|
|
| 54 |
|
|
|
|
| 55 |
const defaultDuration = 3
|
| 56 |
+
const maxDuration = 5
|
| 57 |
const durationStr = Number(`${query.duration || defaultDuration}`)
|
| 58 |
const maybeDuration = Number(durationStr)
|
| 59 |
+
const duration = Math.min(maxDuration, Math.max(1, isNaN(maybeDuration) || !isFinite(maybeDuration) ? defaultDuration : maybeDuration))
|
| 60 |
|
| 61 |
const defaultSteps = 35
|
| 62 |
const stepsStr = Number(`${query.steps || defaultSteps}`)
|
|
|
|
| 67 |
const defaultFps = 24
|
| 68 |
const fpsStr = Number(`${query.fps || defaultFps}`)
|
| 69 |
const maybeFps = Number(fpsStr)
|
| 70 |
+
const nbFrames = Math.min(60, Math.max(8, isNaN(maybeFps) || !isFinite(maybeFps) ? defaultFps : maybeFps))
|
| 71 |
|
| 72 |
const defaultResolution = 576
|
| 73 |
const resolutionStr = Number(`${query.resolution || defaultResolution}`)
|
| 74 |
const maybeResolution = Number(resolutionStr)
|
| 75 |
const resolution = Math.min(1080, Math.max(256, isNaN(maybeResolution) || !isFinite(maybeResolution) ? defaultResolution : maybeResolution))
|
| 76 |
|
| 77 |
+
const actorPrompt = `${query.actorPrompt || ""}`
|
| 78 |
+
|
| 79 |
+
const actorVoicePrompt = `${query.actorVoicePrompt || ""}`
|
| 80 |
+
|
| 81 |
+
const actorDialoguePrompt = `${query.actorDialoguePrompt || ""}`
|
| 82 |
|
|
|
|
| 83 |
|
| 84 |
+
const { filePath } = await generateShot({
|
| 85 |
+
seed,
|
| 86 |
+
actorPrompt,
|
| 87 |
shotPrompt,
|
| 88 |
+
backgroundAudioPrompt,
|
| 89 |
+
foregroundAudioPrompt,
|
| 90 |
+
actorDialoguePrompt,
|
| 91 |
+
actorVoicePrompt,
|
| 92 |
duration,
|
| 93 |
+
nbFrames,
|
| 94 |
+
resolution,
|
| 95 |
nbSteps,
|
|
|
|
|
|
|
| 96 |
upscale,
|
| 97 |
+
interpolate,
|
| 98 |
+
noise,
|
| 99 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
+
console.log(`generated video in ${filePath}`)
|
|
|
|
| 102 |
|
| 103 |
+
console.log("returning result to user..")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
const buffer = await fs.readFile(filePath)
|
| 106 |
+
|
| 107 |
+
res.setHeader("Content-Type", "media/mp4")
|
| 108 |
+
res.setHeader("Content-Length", buffer.length)
|
| 109 |
res.end(buffer)
|
| 110 |
})
|
| 111 |
|
src/services/addAudioToVideo.mts
CHANGED
|
@@ -1,29 +1,45 @@
|
|
| 1 |
-
import
|
| 2 |
-
import
|
| 3 |
|
| 4 |
-
import tmpDir from
|
| 5 |
-
import
|
| 6 |
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
await new Promise((resolve, reject) => {
|
| 12 |
ffmpeg(videoFilePath)
|
| 13 |
.input(audioFilePath)
|
| 14 |
-
.
|
| 15 |
-
.outputOptions(
|
| 16 |
-
.outputOptions(
|
| 17 |
-
.outputOptions(
|
| 18 |
-
.outputOptions(
|
|
|
|
| 19 |
.output(tempOutputFilePath)
|
| 20 |
-
.on(
|
| 21 |
-
.on(
|
| 22 |
.run()
|
| 23 |
})
|
| 24 |
|
| 25 |
// Now we want to replace the original video file with the new file that has been created
|
| 26 |
await fs.rename(tempOutputFilePath, videoFilePath)
|
| 27 |
|
| 28 |
-
return
|
| 29 |
};
|
|
|
|
| 1 |
+
import { promises as fs } from "node:fs"
|
| 2 |
+
import path from "node:path"
|
| 3 |
|
| 4 |
+
import tmpDir from "temp-dir"
|
| 5 |
+
import { v4 as uuidv4 } from "uuid"
|
| 6 |
|
| 7 |
+
import ffmpeg from "fluent-ffmpeg"
|
| 8 |
+
|
| 9 |
+
export const addAudioToVideo = async (
|
| 10 |
+
videoFileName: string,
|
| 11 |
+
audioFileName: string,
|
| 12 |
+
|
| 13 |
+
/*
|
| 14 |
+
* 0.0: mute the audio completely
|
| 15 |
+
* 0.5: set the audio to 50% of original volume (half volume)
|
| 16 |
+
* 1.0: maintain the audio at original volume (100% of original volume)
|
| 17 |
+
* 2.0: amplify the audio to 200% of original volume (double volume - might cause clipping)
|
| 18 |
+
*/
|
| 19 |
+
volume: number = 1.0
|
| 20 |
+
): Promise<string> => {
|
| 21 |
|
| 22 |
+
const tempOutputFilePath = `${uuidv4()}.mp4`
|
| 23 |
+
const videoFilePath = path.resolve(tmpDir, videoFileName)
|
| 24 |
+
const audioFilePath = path.resolve(tmpDir, audioFileName)
|
| 25 |
+
|
| 26 |
await new Promise((resolve, reject) => {
|
| 27 |
ffmpeg(videoFilePath)
|
| 28 |
.input(audioFilePath)
|
| 29 |
+
.audioFilters({ filter: 'volume', options: volume }) // add audio filter for volume
|
| 30 |
+
.outputOptions("-c:v copy") // use video copy codec
|
| 31 |
+
.outputOptions("-c:a aac") // use audio codec
|
| 32 |
+
.outputOptions("-map 0:v:0") // map video from 0th to 0th
|
| 33 |
+
.outputOptions("-map 1:a:0") // map audio from 1st to 0th
|
| 34 |
+
.outputOptions("-shortest") // finish encoding when shortest input stream ends
|
| 35 |
.output(tempOutputFilePath)
|
| 36 |
+
.on("end", resolve)
|
| 37 |
+
.on("error", reject)
|
| 38 |
.run()
|
| 39 |
})
|
| 40 |
|
| 41 |
// Now we want to replace the original video file with the new file that has been created
|
| 42 |
await fs.rename(tempOutputFilePath, videoFilePath)
|
| 43 |
|
| 44 |
+
return videoFileName
|
| 45 |
};
|
src/services/generateActor.mts
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { promises as fs } from "node:fs"
|
| 2 |
+
import path from "node:path"
|
| 3 |
+
import tmpDir from "temp-dir"
|
| 4 |
+
|
| 5 |
+
import { HfInference } from "@huggingface/inference"
|
| 6 |
+
|
| 7 |
+
const hf = new HfInference(process.env.VS_HF_API_TOKEN)
|
| 8 |
+
|
| 9 |
+
export const generateActor = async (prompt: string, fileName: string, seed: number) => {
|
| 10 |
+
const positivePrompt = [
|
| 11 |
+
`profile photo of ${prompt || ""}`,
|
| 12 |
+
"id picture",
|
| 13 |
+
"photoshoot",
|
| 14 |
+
"portrait photography",
|
| 15 |
+
"neutral expression",
|
| 16 |
+
"neutral background",
|
| 17 |
+
"studio photo",
|
| 18 |
+
"award winning",
|
| 19 |
+
"high resolution",
|
| 20 |
+
"photo realistic",
|
| 21 |
+
"intricate details",
|
| 22 |
+
"beautiful",
|
| 23 |
+
]
|
| 24 |
+
const negativePrompt = [
|
| 25 |
+
"anime",
|
| 26 |
+
"drawing",
|
| 27 |
+
"painting",
|
| 28 |
+
"lowres",
|
| 29 |
+
"blurry",
|
| 30 |
+
"artificial"
|
| 31 |
+
]
|
| 32 |
+
|
| 33 |
+
console.log(`generating actor: ${positivePrompt.join(", ")}`)
|
| 34 |
+
|
| 35 |
+
const blob = await hf.textToImage({
|
| 36 |
+
inputs: positivePrompt.join(", "),
|
| 37 |
+
model: "stabilityai/stable-diffusion-2-1",
|
| 38 |
+
parameters: {
|
| 39 |
+
negative_prompt: negativePrompt.join(", "),
|
| 40 |
+
// seed, no seed?
|
| 41 |
+
}
|
| 42 |
+
})
|
| 43 |
+
|
| 44 |
+
const filePath = path.resolve(tmpDir, fileName)
|
| 45 |
+
|
| 46 |
+
const buffer = Buffer.from(await blob.arrayBuffer())
|
| 47 |
+
await fs.writeFile(filePath, buffer, "utf8")
|
| 48 |
+
|
| 49 |
+
return filePath
|
| 50 |
+
}
|
src/services/generateAudio.mts
CHANGED
|
@@ -1,33 +1,56 @@
|
|
| 1 |
-
import
|
| 2 |
-
|
| 3 |
-
import { generateSeed } from "./generateSeed.mts"
|
| 4 |
|
| 5 |
const instances: string[] = [
|
| 6 |
process.env.VS_AUDIO_GENERATION_SPACE_API_URL
|
| 7 |
]
|
| 8 |
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
nbFrames: number;
|
| 12 |
-
nbSteps: number;
|
| 13 |
-
}) => {
|
| 14 |
-
const seed = options?.seed || generateSeed()
|
| 15 |
-
const nbFrames = options?.nbFrames || 24 // we can go up to 48 frames, but then upscaling quill require too much memory!
|
| 16 |
-
const nbSteps = options?.nbSteps || 35
|
| 17 |
-
|
| 18 |
const instance = instances.shift()
|
| 19 |
instances.push(instance)
|
| 20 |
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
const rawResponse = await api.predict('/run', [
|
| 24 |
-
prompt, // string in 'Prompt' Textbox component
|
| 25 |
-
seed, // number (numeric value between 0 and 2147483647) in 'Seed' Slider component
|
| 26 |
-
nbFrames, // 24 // it is the nb of frames per seconds I think?
|
| 27 |
-
nbSteps, // 10, (numeric value between 10 and 50) in 'Number of inference steps' Slider component
|
| 28 |
-
]) as any
|
| 29 |
|
| 30 |
-
const
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
-
return
|
| 33 |
}
|
|
|
|
| 1 |
+
import puppeteer from "puppeteer"
|
| 2 |
+
import { downloadVideo } from "./downloadVideo.mts"
|
|
|
|
| 3 |
|
| 4 |
const instances: string[] = [
|
| 5 |
process.env.VS_AUDIO_GENERATION_SPACE_API_URL
|
| 6 |
]
|
| 7 |
|
| 8 |
+
// TODO we should use an inference endpoint instead
|
| 9 |
+
export async function generateAudio(prompt: string, audioFileName: string) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
const instance = instances.shift()
|
| 11 |
instances.push(instance)
|
| 12 |
|
| 13 |
+
console.log("instance:", instance)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
+
const browser = await puppeteer.launch({
|
| 16 |
+
headless: false,
|
| 17 |
+
protocolTimeout: 800000,
|
| 18 |
+
})
|
| 19 |
+
|
| 20 |
+
const page = await browser.newPage()
|
| 21 |
+
|
| 22 |
+
await page.goto(instance, {
|
| 23 |
+
waitUntil: "networkidle2",
|
| 24 |
+
})
|
| 25 |
+
|
| 26 |
+
await new Promise(r => setTimeout(r, 3000))
|
| 27 |
+
|
| 28 |
+
const firstTextboxInput = await page.$('input[data-testid="textbox"]')
|
| 29 |
+
|
| 30 |
+
await firstTextboxInput.type(prompt)
|
| 31 |
+
|
| 32 |
+
// console.log("looking for the button to submit")
|
| 33 |
+
const submitButton = await page.$("button.lg")
|
| 34 |
+
|
| 35 |
+
// console.log("clicking on the button")
|
| 36 |
+
await submitButton.click()
|
| 37 |
+
|
| 38 |
+
await page.waitForSelector("a[download]", {
|
| 39 |
+
timeout: 800000, // need to be large enough in case someone else attemps to use our space
|
| 40 |
+
})
|
| 41 |
+
|
| 42 |
+
const audioRemoteUrl = await page.$$eval("a[download]", el => el.map(x => x.getAttribute("href"))[0])
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
console.log({
|
| 46 |
+
audioRemoteUrl,
|
| 47 |
+
})
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
// console.log("downloading file from space..")
|
| 51 |
+
console.log(`- downloading ${audioFileName} from ${audioRemoteUrl}`)
|
| 52 |
+
|
| 53 |
+
await downloadVideo(audioRemoteUrl, audioFileName)
|
| 54 |
|
| 55 |
+
return audioFileName
|
| 56 |
}
|
src/services/generateAudioLegacy.mts
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { client } from '@gradio/client'
|
| 2 |
+
|
| 3 |
+
import { generateSeed } from "./generateSeed.mts"
|
| 4 |
+
|
| 5 |
+
const instances: string[] = [
|
| 6 |
+
process.env.VS_AUDIO_GENERATION_SPACE_API_URL
|
| 7 |
+
]
|
| 8 |
+
|
| 9 |
+
export const generateAudio = async (prompt: string, options?: {
|
| 10 |
+
seed: number;
|
| 11 |
+
nbFrames: number;
|
| 12 |
+
nbSteps: number;
|
| 13 |
+
}) => {
|
| 14 |
+
const seed = options?.seed || generateSeed()
|
| 15 |
+
const nbFrames = options?.nbFrames || 24 // we can go up to 48 frames, but then upscaling quill require too much memory!
|
| 16 |
+
const nbSteps = options?.nbSteps || 35
|
| 17 |
+
|
| 18 |
+
const instance = instances.shift()
|
| 19 |
+
instances.push(instance)
|
| 20 |
+
|
| 21 |
+
const api = await client(instance)
|
| 22 |
+
|
| 23 |
+
const rawResponse = await api.predict('/run', [
|
| 24 |
+
prompt, // string in 'Prompt' Textbox component
|
| 25 |
+
seed, // number (numeric value between 0 and 2147483647) in 'Seed' Slider component
|
| 26 |
+
nbFrames, // 24 // it is the nb of frames per seconds I think?
|
| 27 |
+
nbSteps, // 10, (numeric value between 10 and 50) in 'Number of inference steps' Slider component
|
| 28 |
+
]) as any
|
| 29 |
+
|
| 30 |
+
const { name } = rawResponse?.data?.[0]?.[0] as { name: string, orig_name: string }
|
| 31 |
+
|
| 32 |
+
return `${instance}/file=${name}`
|
| 33 |
+
}
|
src/services/generateShot.mts
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import path from "node:path"
|
| 2 |
+
|
| 3 |
+
import { v4 as uuidv4 } from "uuid"
|
| 4 |
+
import tmpDir from "temp-dir"
|
| 5 |
+
|
| 6 |
+
import { downloadVideo } from "./downloadVideo.mts"
|
| 7 |
+
import { generateAudio } from "./generateAudio.mts"
|
| 8 |
+
import { generateVideo } from "./generateVideo.mts"
|
| 9 |
+
import { upscaleVideo } from "./upscaleVideo.mts"
|
| 10 |
+
import { generateVoice } from "./generateVoice.mts"
|
| 11 |
+
import { generateSeed } from "./generateSeed.mts"
|
| 12 |
+
import { mergeAudio } from "./mergeAudio.mts"
|
| 13 |
+
import { addAudioToVideo } from "./addAudioToVideo.mts"
|
| 14 |
+
import { interpolateVideo } from "./interpolateVideo.mts"
|
| 15 |
+
import { postInterpolation } from "./postInterpolation.mts"
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
export const generateShot = async ({
|
| 19 |
+
seed = 0,
|
| 20 |
+
shotId = "",
|
| 21 |
+
actorPrompt = "",
|
| 22 |
+
shotPrompt = "",
|
| 23 |
+
backgroundAudioPrompt = "",
|
| 24 |
+
foregroundAudioPrompt = "",
|
| 25 |
+
actorDialoguePrompt = "",
|
| 26 |
+
actorVoicePrompt = "",
|
| 27 |
+
duration = 2,
|
| 28 |
+
nbFrames = 24,
|
| 29 |
+
resolution = 576,
|
| 30 |
+
nbSteps = 35,
|
| 31 |
+
upscale = true,
|
| 32 |
+
interpolate = true,
|
| 33 |
+
noise = true,
|
| 34 |
+
}: {
|
| 35 |
+
seed?: number;
|
| 36 |
+
shotId?: string;
|
| 37 |
+
actorPrompt?: string;
|
| 38 |
+
shotPrompt?: string;
|
| 39 |
+
backgroundAudioPrompt?: string;
|
| 40 |
+
foregroundAudioPrompt?: string;
|
| 41 |
+
actorDialoguePrompt?: string;
|
| 42 |
+
actorVoicePrompt?: string;
|
| 43 |
+
duration?: number; // 2 seconds
|
| 44 |
+
nbFrames?: number; // 24 FPS
|
| 45 |
+
resolution?: number; // 256, 320, 512, 576, 720, 1080..
|
| 46 |
+
nbSteps?: number;
|
| 47 |
+
upscale?: boolean;
|
| 48 |
+
interpolate?: boolean;
|
| 49 |
+
noise?: boolean;
|
| 50 |
+
}) => {
|
| 51 |
+
seed = seed || generateSeed()
|
| 52 |
+
shotId = shotId || uuidv4()
|
| 53 |
+
|
| 54 |
+
const shotFileName = `${shotId}.mp4`
|
| 55 |
+
|
| 56 |
+
console.log("generating video shot:", {
|
| 57 |
+
seed,
|
| 58 |
+
shotId,
|
| 59 |
+
actorPrompt,
|
| 60 |
+
shotPrompt,
|
| 61 |
+
backgroundAudioPrompt,
|
| 62 |
+
foregroundAudioPrompt,
|
| 63 |
+
actorDialoguePrompt,
|
| 64 |
+
actorVoicePrompt,
|
| 65 |
+
duration,
|
| 66 |
+
nbFrames,
|
| 67 |
+
resolution,
|
| 68 |
+
nbSteps,
|
| 69 |
+
upscale,
|
| 70 |
+
interpolate,
|
| 71 |
+
noise,
|
| 72 |
+
})
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
if (actorPrompt) {
|
| 76 |
+
console.log("generating actor..")
|
| 77 |
+
const actorIdentityFileName = `actor_${Date.now()}.png`
|
| 78 |
+
// await generateActor(actorPrompt, actorIdentityFileName, seed)
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
console.log("generating base video ..")
|
| 82 |
+
let generatedVideoUrl = ""
|
| 83 |
+
|
| 84 |
+
// currenty the base model is incapable of generating more than 24 FPS,
|
| 85 |
+
// because otherwise the upscaler will have trouble
|
| 86 |
+
|
| 87 |
+
// so for now, we fix it to 24 frames
|
| 88 |
+
// const nbFramesForBaseModel = Math.min(3, Math.max(1, Math.round(duration))) * 8
|
| 89 |
+
const nbFramesForBaseModel = 24
|
| 90 |
+
|
| 91 |
+
try {
|
| 92 |
+
generatedVideoUrl = await generateVideo(shotPrompt, {
|
| 93 |
+
seed,
|
| 94 |
+
nbFrames: nbFramesForBaseModel,
|
| 95 |
+
nbSteps
|
| 96 |
+
})
|
| 97 |
+
} catch (err) {
|
| 98 |
+
// upscaling can be finicky, if it fails we try again
|
| 99 |
+
console.log('- trying again to generate base shot..')
|
| 100 |
+
generatedVideoUrl = await generateVideo(shotPrompt, {
|
| 101 |
+
seed,
|
| 102 |
+
nbFrames: nbFramesForBaseModel,
|
| 103 |
+
nbSteps
|
| 104 |
+
})
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
console.log("downloading video..")
|
| 108 |
+
|
| 109 |
+
const videoFileName = await downloadVideo(generatedVideoUrl, shotFileName)
|
| 110 |
+
|
| 111 |
+
if (upscale) {
|
| 112 |
+
console.log("upscaling video..")
|
| 113 |
+
try {
|
| 114 |
+
await upscaleVideo(videoFileName, shotPrompt)
|
| 115 |
+
} catch (err) {
|
| 116 |
+
// upscaling can be finicky, if it fails we try again
|
| 117 |
+
console.log('- trying again to upscale shot..')
|
| 118 |
+
await upscaleVideo(videoFileName, shotPrompt)
|
| 119 |
+
}
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
if (interpolate) {
|
| 123 |
+
console.log("upscaling video..")
|
| 124 |
+
// ATTENTION 1:
|
| 125 |
+
// the interpolation step always create a SLOW MOTION video
|
| 126 |
+
// it means it can last a lot longer (eg. 2x, 3x, 4x.. longer)
|
| 127 |
+
// than the duration generated by the original video model
|
| 128 |
+
|
| 129 |
+
// ATTENTION 2:
|
| 130 |
+
// the interpolation step generates videos in 910x512!
|
| 131 |
+
|
| 132 |
+
// ATTENTION 3:
|
| 133 |
+
// the interpolation step parameters are currently not passed to the space,
|
| 134 |
+
// so changing those two variables below will have no effect!
|
| 135 |
+
const interpolationSteps = 3
|
| 136 |
+
const interpolatedFramesPerSecond = 24
|
| 137 |
+
await interpolateVideo(
|
| 138 |
+
videoFileName,
|
| 139 |
+
interpolationSteps,
|
| 140 |
+
interpolatedFramesPerSecond
|
| 141 |
+
)
|
| 142 |
+
console.log('creating slow-mo video (910x512 @ 24 FPS)')
|
| 143 |
+
|
| 144 |
+
// with our current interpolation settings, the 3 seconds video generated by the model
|
| 145 |
+
// become a 7 seconds video, at 24 FPS
|
| 146 |
+
|
| 147 |
+
// so we want to scale it back to the desired duration length
|
| 148 |
+
// also, as a last trick we want to upscale it (without AI) and add some FXs
|
| 149 |
+
console.log('performing final scaling (1280x720 @ 24 FPS)')
|
| 150 |
+
await postInterpolation(videoFileName, duration, nbFrames)
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
let backgroundAudioFileName = ''
|
| 154 |
+
if (backgroundAudioPrompt) {
|
| 155 |
+
console.log("generating background audio..")
|
| 156 |
+
backgroundAudioFileName = await generateAudio(backgroundAudioPrompt, `shot_${shotId}_audio_${uuidv4}.m4a`)
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
let foregroundAudioFileName = ''
|
| 160 |
+
if (foregroundAudioPrompt) {
|
| 161 |
+
console.log("generating foreground audio..")
|
| 162 |
+
foregroundAudioFileName = await generateAudio(foregroundAudioPrompt, `shot_${shotId}_audio_${uuidv4()}.m4a`)
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
let voiceAudioFileName = ''
|
| 167 |
+
if (actorDialoguePrompt) {
|
| 168 |
+
console.log("configuring dialogue..")
|
| 169 |
+
if (actorVoicePrompt) {
|
| 170 |
+
console.log("configuring voice..")
|
| 171 |
+
// well.. that's a TODO!
|
| 172 |
+
// for now let's always use the same voice model
|
| 173 |
+
|
| 174 |
+
console.log('TODO this should be done in the sequence, not the prompt!')
|
| 175 |
+
voiceAudioFileName = await generateVoice(actorDialoguePrompt, `shot_${shotId}_voice_${uuidv4()}.m4a`)
|
| 176 |
+
}
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
console.log('merging audio with video..')
|
| 180 |
+
if (backgroundAudioFileName || foregroundAudioFileName) {
|
| 181 |
+
let audioFileName = ''
|
| 182 |
+
|
| 183 |
+
// we have both background and foreground
|
| 184 |
+
if (backgroundAudioFileName && foregroundAudioFileName) {
|
| 185 |
+
audioFileName = await mergeAudio({
|
| 186 |
+
input1FileName: backgroundAudioFileName,
|
| 187 |
+
input1Volume: 0.2,// 20% volume
|
| 188 |
+
input2FileName: foregroundAudioFileName,
|
| 189 |
+
input2Volume: 0.7, // 70% volume
|
| 190 |
+
})
|
| 191 |
+
} else if (backgroundAudioFileName) {
|
| 192 |
+
audioFileName = backgroundAudioFileName
|
| 193 |
+
} else if (foregroundAudioFileName) {
|
| 194 |
+
audioFileName = foregroundAudioFileName
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
await addAudioToVideo(videoFileName, audioFileName)
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
+
console.log("returning result to user..")
|
| 201 |
+
|
| 202 |
+
const filePath = path.resolve(tmpDir, videoFileName)
|
| 203 |
+
|
| 204 |
+
return {
|
| 205 |
+
shotId,
|
| 206 |
+
filePath,
|
| 207 |
+
videoFileName
|
| 208 |
+
}
|
| 209 |
+
}
|
src/services/generateVideo.mts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
-
import { client } from
|
|
|
|
| 2 |
|
| 3 |
import { generateSeed } from "./generateSeed.mts"
|
| 4 |
|
|
|
|
| 1 |
+
import { client } from "@gradio/client"
|
| 2 |
+
|
| 3 |
|
| 4 |
import { generateSeed } from "./generateSeed.mts"
|
| 5 |
|
src/services/generateVoice.mts
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import puppeteer from "puppeteer"
|
| 2 |
+
|
| 3 |
+
import { downloadVideo } from "./downloadVideo.mts"
|
| 4 |
+
|
| 5 |
+
const instances: string[] = [
|
| 6 |
+
process.env.VS_VOICE_GENERATION_SPACE_API_URL
|
| 7 |
+
]
|
| 8 |
+
|
| 9 |
+
// TODO we should use an inference endpoint instead
|
| 10 |
+
export async function generateVoice(prompt: string, voiceFileName: string) {
|
| 11 |
+
const instance = instances.shift()
|
| 12 |
+
instances.push(instance)
|
| 13 |
+
|
| 14 |
+
console.log("instance:", instance)
|
| 15 |
+
|
| 16 |
+
const browser = await puppeteer.launch({
|
| 17 |
+
headless: false,
|
| 18 |
+
protocolTimeout: 800000,
|
| 19 |
+
})
|
| 20 |
+
|
| 21 |
+
const page = await browser.newPage()
|
| 22 |
+
|
| 23 |
+
await page.goto(instance, {
|
| 24 |
+
waitUntil: "networkidle2",
|
| 25 |
+
})
|
| 26 |
+
|
| 27 |
+
await new Promise(r => setTimeout(r, 3000))
|
| 28 |
+
|
| 29 |
+
const firstTextarea = await page.$('textarea[data-testid="textbox"]')
|
| 30 |
+
|
| 31 |
+
await firstTextarea.type(prompt)
|
| 32 |
+
|
| 33 |
+
// console.log("looking for the button to submit")
|
| 34 |
+
const submitButton = await page.$("button.lg")
|
| 35 |
+
|
| 36 |
+
// console.log("clicking on the button")
|
| 37 |
+
await submitButton.click()
|
| 38 |
+
|
| 39 |
+
await page.waitForSelector("audio", {
|
| 40 |
+
timeout: 800000, // need to be large enough in case someone else attemps to use our space
|
| 41 |
+
})
|
| 42 |
+
|
| 43 |
+
const voiceRemoteUrl = await page.$$eval("audio", el => el.map(x => x.getAttribute("src"))[0])
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
console.log({
|
| 47 |
+
voiceRemoteUrl,
|
| 48 |
+
})
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
console.log(`- downloading ${voiceFileName} from ${voiceRemoteUrl}`)
|
| 52 |
+
|
| 53 |
+
await downloadVideo(voiceRemoteUrl, voiceFileName)
|
| 54 |
+
|
| 55 |
+
return voiceFileName
|
| 56 |
+
}
|
src/services/interpolateVideo.mts
CHANGED
|
@@ -1,40 +1,53 @@
|
|
| 1 |
-
import { promises as fs } from "node:fs"
|
| 2 |
import path from "node:path"
|
| 3 |
-
import { Blob } from "buffer"
|
| 4 |
-
// import { blobFrom } from "fetch-blob"
|
| 5 |
|
| 6 |
-
import
|
| 7 |
import tmpDir from "temp-dir"
|
| 8 |
-
|
| 9 |
-
import { downloadVideo } from './downloadVideo.mts'
|
| 10 |
|
| 11 |
const instances: string[] = [
|
| 12 |
process.env.VS_VIDEO_INTERPOLATION_SPACE_API_URL
|
| 13 |
]
|
| 14 |
|
| 15 |
-
export const interpolateVideo = async (fileName: string) => {
|
| 16 |
|
|
|
|
|
|
|
| 17 |
const inputFilePath = path.join(tmpDir, fileName)
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
const instance = instances.shift()
|
| 20 |
instances.push(instance)
|
| 21 |
|
| 22 |
-
const
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
-
const
|
| 25 |
|
| 26 |
-
|
| 27 |
-
// const blob = blobFrom(filePath)
|
| 28 |
-
const result = await api.predict(1, [
|
| 29 |
-
blob, // blob in 'parameter_5' Video component
|
| 30 |
-
1, // number (numeric value between 1 and 4) in 'Interpolation Steps' Slider component
|
| 31 |
-
24, // string in 'FPS output' Radio component
|
| 32 |
-
])
|
| 33 |
|
| 34 |
-
|
| 35 |
-
console.log('raw data:', data)
|
| 36 |
-
const { orig_name, data: remoteFilePath } = data
|
| 37 |
-
const remoteUrl = `${instance}/file=${remoteFilePath}`
|
| 38 |
-
console.log("remoteUrl:", remoteUrl)
|
| 39 |
-
await downloadVideo(remoteUrl, fileName)
|
| 40 |
}
|
|
|
|
|
|
|
| 1 |
import path from "node:path"
|
|
|
|
|
|
|
| 2 |
|
| 3 |
+
import puppeteer from "puppeteer"
|
| 4 |
import tmpDir from "temp-dir"
|
| 5 |
+
import { downloadVideo } from "./downloadVideo.mts"
|
|
|
|
| 6 |
|
| 7 |
const instances: string[] = [
|
| 8 |
process.env.VS_VIDEO_INTERPOLATION_SPACE_API_URL
|
| 9 |
]
|
| 10 |
|
|
|
|
| 11 |
|
| 12 |
+
// TODO we should use an inference endpoint instead
|
| 13 |
+
export async function interpolateVideo(fileName: string, steps: number, fps: number) {
|
| 14 |
const inputFilePath = path.join(tmpDir, fileName)
|
| 15 |
|
| 16 |
+
console.log(`interpolating ${fileName}`)
|
| 17 |
+
console.log(`warning: interpolateVideo parameter "${steps}" is ignored!`)
|
| 18 |
+
console.log(`warning: interpolateVideo parameter "${fps}" is ignored!`)
|
| 19 |
+
|
| 20 |
const instance = instances.shift()
|
| 21 |
instances.push(instance)
|
| 22 |
|
| 23 |
+
const browser = await puppeteer.launch({
|
| 24 |
+
headless: true,
|
| 25 |
+
protocolTimeout: 400000,
|
| 26 |
+
})
|
| 27 |
+
|
| 28 |
+
const page = await browser.newPage()
|
| 29 |
+
await page.goto(instance, { waitUntil: 'networkidle2' })
|
| 30 |
+
|
| 31 |
+
await new Promise(r => setTimeout(r, 3000))
|
| 32 |
+
|
| 33 |
+
const fileField = await page.$('input[type=file]')
|
| 34 |
+
|
| 35 |
+
// console.log(`uploading file..`)
|
| 36 |
+
await fileField.uploadFile(inputFilePath)
|
| 37 |
+
|
| 38 |
+
// console.log('looking for the button to submit')
|
| 39 |
+
const submitButton = await page.$('button.lg')
|
| 40 |
+
|
| 41 |
+
// console.log('clicking on the button')
|
| 42 |
+
await submitButton.click()
|
| 43 |
+
|
| 44 |
+
await page.waitForSelector('a[download="interpolated_result.mp4"]', {
|
| 45 |
+
timeout: 400000, // need to be large enough in case someone else attemps to use our space
|
| 46 |
+
})
|
| 47 |
|
| 48 |
+
const interpolatedFileUrl = await page.$$eval('a[download="interpolated_result.mp4"]', el => el.map(x => x.getAttribute("href"))[0])
|
| 49 |
|
| 50 |
+
await downloadVideo(interpolatedFileUrl, fileName)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
+
return fileName
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
}
|
src/services/interpolateVideoLegacy.mts
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { promises as fs } from "node:fs"
|
| 2 |
+
import path from "node:path"
|
| 3 |
+
import { Blob } from "buffer"
|
| 4 |
+
|
| 5 |
+
import { client } from "@gradio/client"
|
| 6 |
+
import tmpDir from "temp-dir"
|
| 7 |
+
|
| 8 |
+
import { downloadVideo } from './downloadVideo.mts'
|
| 9 |
+
|
| 10 |
+
const instances: string[] = [
|
| 11 |
+
process.env.VS_VIDEO_INTERPOLATION_SPACE_API_URL
|
| 12 |
+
]
|
| 13 |
+
|
| 14 |
+
export const interpolateVideo = async (fileName: string, steps: number, fps: number) => {
|
| 15 |
+
|
| 16 |
+
const inputFilePath = path.join(tmpDir, fileName)
|
| 17 |
+
|
| 18 |
+
const instance = instances.shift()
|
| 19 |
+
instances.push(instance)
|
| 20 |
+
|
| 21 |
+
const api = await client(instance)
|
| 22 |
+
|
| 23 |
+
const video = await fs.readFile(inputFilePath)
|
| 24 |
+
|
| 25 |
+
const blob = new Blob([video], { type: 'video/mp4' })
|
| 26 |
+
// const blob = blobFrom(filePath)
|
| 27 |
+
const result = await api.predict(1, [
|
| 28 |
+
blob, // blob in 'parameter_5' Video component
|
| 29 |
+
steps, // number (numeric value between 1 and 4) in 'Interpolation Steps' Slider component
|
| 30 |
+
fps, // string (FALSE! it's a number) in 'FPS output' Radio component
|
| 31 |
+
])
|
| 32 |
+
|
| 33 |
+
const data = (result as any).data[0]
|
| 34 |
+
console.log('raw data:', data)
|
| 35 |
+
const { orig_name, data: remoteFilePath } = data
|
| 36 |
+
const remoteUrl = `${instance}/file=${remoteFilePath}`
|
| 37 |
+
console.log("remoteUrl:", remoteUrl)
|
| 38 |
+
await downloadVideo(remoteUrl, fileName)
|
| 39 |
+
}
|
src/services/mergeAudio.mts
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import path from "node:path"
|
| 2 |
+
|
| 3 |
+
import tmpDir from "temp-dir"
|
| 4 |
+
import { v4 as uuidv4 } from "uuid"
|
| 5 |
+
import ffmpeg from "fluent-ffmpeg"
|
| 6 |
+
|
| 7 |
+
export const mergeAudio = async ({
|
| 8 |
+
input1FileName,
|
| 9 |
+
input1Volume,
|
| 10 |
+
input2FileName,
|
| 11 |
+
input2Volume,
|
| 12 |
+
outputFileName = ''
|
| 13 |
+
}: {
|
| 14 |
+
input1FileName: string,
|
| 15 |
+
input1Volume: number,
|
| 16 |
+
input2FileName: string,
|
| 17 |
+
input2Volume: number,
|
| 18 |
+
outputFileName?: string
|
| 19 |
+
}): Promise<string> => {
|
| 20 |
+
outputFileName = `${uuidv4()}.m4a`
|
| 21 |
+
|
| 22 |
+
const input1FilePath = path.resolve(tmpDir, input1FileName)
|
| 23 |
+
const input2FilePath = path.resolve(tmpDir, input2FileName)
|
| 24 |
+
const outputFilePath = path.resolve(tmpDir, outputFileName)
|
| 25 |
+
|
| 26 |
+
const input1Ffmpeg = ffmpeg(input1FilePath)
|
| 27 |
+
.outputOptions("-map 0:a:0")
|
| 28 |
+
.audioFilters([{ filter: 'volume', options: input1Volume }]); // set volume for main audio
|
| 29 |
+
|
| 30 |
+
const input2Ffmpeg = ffmpeg(input2FilePath)
|
| 31 |
+
.outputOptions("-map 1:a:0")
|
| 32 |
+
.audioFilters([{ filter: 'volume', options: input2Volume }]); // set volume for additional audio
|
| 33 |
+
|
| 34 |
+
await new Promise((resolve, reject) => {
|
| 35 |
+
ffmpeg()
|
| 36 |
+
.input(input1Ffmpeg)
|
| 37 |
+
.input(input2Ffmpeg)
|
| 38 |
+
.outputOptions("-c:a aac") // use audio codec
|
| 39 |
+
.outputOptions("-shortest") // finish encoding when shortest input stream ends
|
| 40 |
+
.output(outputFilePath)
|
| 41 |
+
.on("end", resolve)
|
| 42 |
+
.on("error", reject)
|
| 43 |
+
.run()
|
| 44 |
+
})
|
| 45 |
+
|
| 46 |
+
console.log(`merged audio from ${input1FileName} and ${input2FileName} into ${outputFileName}`)
|
| 47 |
+
|
| 48 |
+
return outputFileName
|
| 49 |
+
}
|
src/services/postInterpolation.mts
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import path from "node:path"
|
| 2 |
+
import fs from "node:fs"
|
| 3 |
+
|
| 4 |
+
import { v4 as uuidv4 } from "uuid"
|
| 5 |
+
import tmpDir from "temp-dir"
|
| 6 |
+
import ffmpeg from "fluent-ffmpeg"
|
| 7 |
+
|
| 8 |
+
export const postInterpolation = async (fileName: string, duration: number, nbFrames: number): Promise<string> => {
|
| 9 |
+
return new Promise((resolve,reject) => {
|
| 10 |
+
|
| 11 |
+
const tmpFileName = `${uuidv4()}.mp4`
|
| 12 |
+
|
| 13 |
+
const filePath = path.join(tmpDir, fileName)
|
| 14 |
+
const tmpFilePath = path.join(tmpDir, tmpFileName)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
ffmpeg.ffprobe(filePath, function(err, metadata) {
|
| 18 |
+
if (err) { reject(err); return; }
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
const currentVideoDuration = metadata.format.duration
|
| 22 |
+
|
| 23 |
+
// compute a ratio ex. 0.3 = 30% of the total length
|
| 24 |
+
const durationRatio = currentVideoDuration / duration
|
| 25 |
+
|
| 26 |
+
ffmpeg(filePath)
|
| 27 |
+
|
| 28 |
+
// convert to HD
|
| 29 |
+
.size("1280x720")
|
| 30 |
+
|
| 31 |
+
.videoFilters([
|
| 32 |
+
`setpts=${durationRatio}*PTS`, // we make the video faster
|
| 33 |
+
//'scale=-1:576:lanczos',
|
| 34 |
+
// 'unsharp=5:5:0.2:5:5:0.2', // not recommended, this make the video more "pixely"
|
| 35 |
+
'noise=c0s=10:c0f=t+u' // add a movie grain noise
|
| 36 |
+
])
|
| 37 |
+
.outputOptions([
|
| 38 |
+
`-r ${nbFrames}`,
|
| 39 |
+
])
|
| 40 |
+
|
| 41 |
+
.save(tmpFilePath)
|
| 42 |
+
.on("end", async () => {
|
| 43 |
+
await fs.promises.copyFile(tmpFilePath, filePath)
|
| 44 |
+
try {
|
| 45 |
+
await fs.promises.unlink(tmpFilePath)
|
| 46 |
+
} catch (err) {
|
| 47 |
+
console.log("failed to cleanup (no big deal..)")
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
resolve(fileName)
|
| 51 |
+
})
|
| 52 |
+
.on("error", (err) => {
|
| 53 |
+
reject(err)
|
| 54 |
+
})
|
| 55 |
+
})
|
| 56 |
+
})
|
| 57 |
+
}
|
src/test2.mts
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { generateAudio } from "./services/generateAudio.mts"
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
console.log('generating background audio..')
|
| 5 |
+
const audioFileName = await generateAudio("sounds of a castle bell ringing alarm", "test_juju_audio.mp3")
|
| 6 |
+
|
| 7 |
+
console.log('result:', audioFileName)
|
src/types.mts
CHANGED
|
@@ -26,19 +26,40 @@ export interface Database {
|
|
| 26 |
}
|
| 27 |
|
| 28 |
|
| 29 |
-
export interface
|
| 30 |
token: string
|
| 31 |
shotPrompt: string
|
| 32 |
// inputVideo?: string
|
| 33 |
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
seed?: number
|
| 36 |
upscale?: boolean
|
| 37 |
|
|
|
|
|
|
|
| 38 |
duration?: number
|
| 39 |
steps?: number
|
| 40 |
|
| 41 |
fps?: number // 8, 12, 24, 30, 60
|
| 42 |
|
| 43 |
resolution?: number // 256, 512, 576, 720, 1080
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
}
|
|
|
|
| 26 |
}
|
| 27 |
|
| 28 |
|
| 29 |
+
export interface ShotQuery {
|
| 30 |
token: string
|
| 31 |
shotPrompt: string
|
| 32 |
// inputVideo?: string
|
| 33 |
|
| 34 |
+
// describe the background audio (crowd, birds, wind, sea etc..)
|
| 35 |
+
backgroundAudioPrompt?: string
|
| 36 |
+
|
| 37 |
+
// describe the foreground audio (cars revving, footsteps, objects breaking, explosion etc)
|
| 38 |
+
foregroundAudioPrompt?: string
|
| 39 |
+
|
| 40 |
+
// describe the main actor visible in the shot (optional)
|
| 41 |
+
actorPrompt?: string
|
| 42 |
+
|
| 43 |
+
// describe the main actor voice (man, woman, old, young, amused, annoyed.. etc)
|
| 44 |
+
actorVoicePrompt?: string
|
| 45 |
+
|
| 46 |
+
// describe the main actor dialogue line
|
| 47 |
+
actorDialoguePrompt?: string
|
| 48 |
+
|
| 49 |
seed?: number
|
| 50 |
upscale?: boolean
|
| 51 |
|
| 52 |
+
noise?: boolean // add movie noise
|
| 53 |
+
|
| 54 |
duration?: number
|
| 55 |
steps?: number
|
| 56 |
|
| 57 |
fps?: number // 8, 12, 24, 30, 60
|
| 58 |
|
| 59 |
resolution?: number // 256, 512, 576, 720, 1080
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
export interface Job {
|
| 63 |
+
startedAt: string
|
| 64 |
+
query: ShotQuery
|
| 65 |
}
|