CharacterAI improvements
Browse files
App/Generate/Story/Story.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import json
|
| 2 |
from pydantic import BaseModel
|
| 3 |
-
from typing import List
|
| 4 |
|
| 5 |
|
| 6 |
class Scene(BaseModel):
|
|
@@ -13,6 +13,7 @@ class Scene(BaseModel):
|
|
| 13 |
|
| 14 |
class Story(BaseModel):
|
| 15 |
scenes: List[Scene]
|
|
|
|
| 16 |
|
| 17 |
@classmethod
|
| 18 |
def from_dict(cls, data):
|
|
|
|
| 1 |
import json
|
| 2 |
from pydantic import BaseModel
|
| 3 |
+
from typing import List, Optional
|
| 4 |
|
| 5 |
|
| 6 |
class Scene(BaseModel):
|
|
|
|
| 13 |
|
| 14 |
class Story(BaseModel):
|
| 15 |
scenes: List[Scene]
|
| 16 |
+
voice: Optional[str] = None
|
| 17 |
|
| 18 |
@classmethod
|
| 19 |
def from_dict(cls, data):
|
App/Generate/database/CharacterAPI.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import aiohttp
|
| 2 |
+
import asyncio
|
| 3 |
+
import os
|
| 4 |
+
import uuid
|
| 5 |
+
import tempfile
|
| 6 |
+
from typing import List, Dict, Any
|
| 7 |
+
|
| 8 |
+
from pydantic import BaseModel
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class AlignmentData(BaseModel):
|
| 12 |
+
word: str
|
| 13 |
+
start: float
|
| 14 |
+
end: float
|
| 15 |
+
|
| 16 |
+
def to_dict(self) -> dict:
|
| 17 |
+
return {
|
| 18 |
+
"word": self.word,
|
| 19 |
+
"alignedWord": self.word,
|
| 20 |
+
"startTime": self.start,
|
| 21 |
+
"endTime": self.end,
|
| 22 |
+
"hasFailedAlignment": False,
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class CharacterAITTS:
|
| 27 |
+
def __init__(self):
|
| 28 |
+
self.api_url = "https://yakova-embedding.hf.space"
|
| 29 |
+
self.dir = str(tempfile.mkdtemp())
|
| 30 |
+
self.descript = "https://yakova-embedding.hf.space"
|
| 31 |
+
self.headers = {"Connection": "keep-alive", "Content-Type": "application/json"}
|
| 32 |
+
|
| 33 |
+
async def _make_transcript(self, links, text):
|
| 34 |
+
|
| 35 |
+
data = {"audio_url": links, "text": text, "file_extenstion": ".mp3"}
|
| 36 |
+
response_data = await self._make_request(
|
| 37 |
+
"post", "descript_transcript", json=data, external=self.descript
|
| 38 |
+
)
|
| 39 |
+
if not response_data:
|
| 40 |
+
data["audio_url"] = data["audio_url"][0]
|
| 41 |
+
print(data)
|
| 42 |
+
response_data = await self.aligner(
|
| 43 |
+
"post",
|
| 44 |
+
"align/url",
|
| 45 |
+
json=data,
|
| 46 |
+
)
|
| 47 |
+
print(response_data)
|
| 48 |
+
response_data = self.process_alignments(
|
| 49 |
+
data=response_data["alignment"], offset=0
|
| 50 |
+
)
|
| 51 |
+
return response_data
|
| 52 |
+
|
| 53 |
+
def process_alignments(
|
| 54 |
+
self, data: List[Dict[str, Any]], offset: float = 0
|
| 55 |
+
) -> List[Dict[str, Any]]:
|
| 56 |
+
alignments = [AlignmentData(**item) for item in data]
|
| 57 |
+
return [alignment.to_dict() for alignment in alignments]
|
| 58 |
+
|
| 59 |
+
async def aligner(
|
| 60 |
+
self,
|
| 61 |
+
method,
|
| 62 |
+
endpoint,
|
| 63 |
+
json=None,
|
| 64 |
+
external="https://yakova-aligner.hf.space/align/url",
|
| 65 |
+
):
|
| 66 |
+
async with aiohttp.ClientSession() as session:
|
| 67 |
+
if external:
|
| 68 |
+
url = f"{external}"
|
| 69 |
+
else:
|
| 70 |
+
url = f"{self.api_url}/{endpoint}"
|
| 71 |
+
async with getattr(session, method)(url=url, json=json) as response:
|
| 72 |
+
return await response.json()
|
| 73 |
+
|
| 74 |
+
async def _make_request(self, method, endpoint, json=None, external=None):
|
| 75 |
+
async with aiohttp.ClientSession() as session:
|
| 76 |
+
if external:
|
| 77 |
+
url = f"{external}/{endpoint}"
|
| 78 |
+
else:
|
| 79 |
+
url = f"{self.api_url}/{endpoint}"
|
| 80 |
+
async with getattr(session, method)(url=url, json=json) as response:
|
| 81 |
+
return await response.json()
|
| 82 |
+
|
| 83 |
+
async def say(self, text, speaker=None):
|
| 84 |
+
|
| 85 |
+
data = {"text": text, "voice": speaker}
|
| 86 |
+
|
| 87 |
+
response_data = await self._make_request("post", "cai_tts", json=data)
|
| 88 |
+
# print(response_data)
|
| 89 |
+
audio_url = response_data["audio"]
|
| 90 |
+
temp = await self.download_file(audio_url)
|
| 91 |
+
return audio_url, temp
|
| 92 |
+
|
| 93 |
+
async def download_file(self, url):
|
| 94 |
+
filename = str(uuid.uuid4()) + ".mp3"
|
| 95 |
+
os.makedirs(self.dir, exist_ok=True)
|
| 96 |
+
save_path = os.path.join(self.dir, filename)
|
| 97 |
+
async with aiohttp.ClientSession() as session:
|
| 98 |
+
async with session.get(url) as response:
|
| 99 |
+
if response.status == 200:
|
| 100 |
+
with open(save_path, "wb") as file:
|
| 101 |
+
while True:
|
| 102 |
+
chunk = await response.content.read(1024)
|
| 103 |
+
if not chunk:
|
| 104 |
+
break
|
| 105 |
+
file.write(chunk)
|
| 106 |
+
|
| 107 |
+
return save_path
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
# # Usage
|
| 111 |
+
# async def main():
|
| 112 |
+
# tts = CharacterAITTS()
|
| 113 |
+
# url, temp = await tts.say(
|
| 114 |
+
# "Did you know that you don't have the balls to talk to me"
|
| 115 |
+
# )
|
| 116 |
+
# tranny = await tts._make_transcript(
|
| 117 |
+
# links=[url], text="Did you know that you don't have the balls to talk to me"
|
| 118 |
+
# )
|
| 119 |
+
# print(tranny)
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
# # Run the main function
|
| 123 |
+
# asyncio.run(main())
|
App/Generate/database/Model.py
CHANGED
|
@@ -4,7 +4,9 @@ import asyncio, os
|
|
| 4 |
import uuid, random
|
| 5 |
from pydub import AudioSegment
|
| 6 |
from .DescriptAPI import Speak
|
| 7 |
-
|
|
|
|
|
|
|
| 8 |
from .Vercel import AsyncImageGenerator
|
| 9 |
from .Video3d import VideoGenerator
|
| 10 |
import aiohttp
|
|
@@ -196,12 +198,13 @@ class Project(orm.Model):
|
|
| 196 |
|
| 197 |
|
| 198 |
class Scene(orm.Model):
|
| 199 |
-
tts =
|
| 200 |
# eleven = ElevenLab()
|
| 201 |
tablename = "scenes"
|
| 202 |
registry = models
|
| 203 |
fields = {
|
| 204 |
"id": orm.Integer(primary_key=True),
|
|
|
|
| 205 |
"project": orm.ForeignKey(Project),
|
| 206 |
"images": orm.JSON(default=None),
|
| 207 |
"narration": orm.String(max_length=10_000, allow_null=True, default=""),
|
|
@@ -237,7 +240,7 @@ class Scene(orm.Model):
|
|
| 237 |
while retry_count < 3:
|
| 238 |
try:
|
| 239 |
return await self.tts.say(
|
| 240 |
-
text=self.narration + " master"
|
| 241 |
) ### The blanks help to even stuff up.
|
| 242 |
except Exception as e:
|
| 243 |
print(f"Failed to generate narration: {e}")
|
|
|
|
| 4 |
import uuid, random
|
| 5 |
from pydub import AudioSegment
|
| 6 |
from .DescriptAPI import Speak
|
| 7 |
+
|
| 8 |
+
# from .ElevenLab import ElevenLab
|
| 9 |
+
from .CharacterAPI import CharacterAITTS
|
| 10 |
from .Vercel import AsyncImageGenerator
|
| 11 |
from .Video3d import VideoGenerator
|
| 12 |
import aiohttp
|
|
|
|
| 198 |
|
| 199 |
|
| 200 |
class Scene(orm.Model):
|
| 201 |
+
tts = CharacterAITTS()
|
| 202 |
# eleven = ElevenLab()
|
| 203 |
tablename = "scenes"
|
| 204 |
registry = models
|
| 205 |
fields = {
|
| 206 |
"id": orm.Integer(primary_key=True),
|
| 207 |
+
"voice": orm.String(max_length=100, allow_null=True, default=""),
|
| 208 |
"project": orm.ForeignKey(Project),
|
| 209 |
"images": orm.JSON(default=None),
|
| 210 |
"narration": orm.String(max_length=10_000, allow_null=True, default=""),
|
|
|
|
| 240 |
while retry_count < 3:
|
| 241 |
try:
|
| 242 |
return await self.tts.say(
|
| 243 |
+
text=self.narration + " master", speaker=self.voice
|
| 244 |
) ### The blanks help to even stuff up.
|
| 245 |
except Exception as e:
|
| 246 |
print(f"Failed to generate narration: {e}")
|
App/Generate/database/Vercel.py
CHANGED
|
@@ -44,15 +44,15 @@ class AsyncImageGenerator:
|
|
| 44 |
"cfg": 2.5,
|
| 45 |
# "seed": 42,
|
| 46 |
"image": "https://image.lexica.art/full_webp/e41b87fb-4cc3-43cd-a6e6-f3dbb08c2399",
|
| 47 |
-
"steps": 28,
|
| 48 |
-
"width": 1024,
|
| 49 |
-
"height": 1536,
|
| 50 |
-
"aspect_ratio": "16:9",
|
| 51 |
"prompt": payload,
|
| 52 |
# "sampler": "dpmpp_2m_sde_gpu",
|
| 53 |
# "scheduler": "karras",
|
| 54 |
# "output_format": "png",
|
| 55 |
-
"output_quality": 95,
|
| 56 |
# "negative_prompt": "clouds, people, buildings",
|
| 57 |
# "number_of_images": 1,
|
| 58 |
# "ip_adapter_weight": 1,
|
|
@@ -62,8 +62,8 @@ class AsyncImageGenerator:
|
|
| 62 |
# "path": "models/playgroundai/playground-v2.5-1024px-aesthetic/versions/a45f82a1382bed5c7aeb861dac7c7d191b0fdf74d8d57c4a0e6ed7d4d0bf7d24",
|
| 63 |
# "path": "models/fofr/any-comfyui-workflow/versions/cd385285ba75685a040afbded7b79814a971f3febf46c5eab7c716e200c784e1",
|
| 64 |
# "path": "models/fofr/sd3-explorer/versions/a9f4aebd943ad7db13de8e34debea359d5578d08f128e968f9a36c3e9b0148d4",
|
| 65 |
-
|
| 66 |
-
"path": "models/black-forest-labs/flux-schnell",
|
| 67 |
# "path": "models/lucataco/proteus-v0.4/versions/34a427535a3c45552b94369280b823fcd0e5c9710e97af020bf445c033d4569e",
|
| 68 |
# "path": "models/lucataco/juggernaut-xl-v9/versions/bea09cf018e513cef0841719559ea86d2299e05448633ac8fe270b5d5cd6777e",
|
| 69 |
}
|
|
@@ -140,18 +140,18 @@ async def process_images(payloads):
|
|
| 140 |
return results
|
| 141 |
|
| 142 |
|
| 143 |
-
# #
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
|
| 150 |
|
| 151 |
-
# #
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
|
| 156 |
|
| 157 |
-
|
|
|
|
| 44 |
"cfg": 2.5,
|
| 45 |
# "seed": 42,
|
| 46 |
"image": "https://image.lexica.art/full_webp/e41b87fb-4cc3-43cd-a6e6-f3dbb08c2399",
|
| 47 |
+
# "steps": 28,
|
| 48 |
+
# "width": 1024,
|
| 49 |
+
# "height": 1536,
|
| 50 |
+
# "aspect_ratio": "16:9",
|
| 51 |
"prompt": payload,
|
| 52 |
# "sampler": "dpmpp_2m_sde_gpu",
|
| 53 |
# "scheduler": "karras",
|
| 54 |
# "output_format": "png",
|
| 55 |
+
# "output_quality": 95,
|
| 56 |
# "negative_prompt": "clouds, people, buildings",
|
| 57 |
# "number_of_images": 1,
|
| 58 |
# "ip_adapter_weight": 1,
|
|
|
|
| 62 |
# "path": "models/playgroundai/playground-v2.5-1024px-aesthetic/versions/a45f82a1382bed5c7aeb861dac7c7d191b0fdf74d8d57c4a0e6ed7d4d0bf7d24",
|
| 63 |
# "path": "models/fofr/any-comfyui-workflow/versions/cd385285ba75685a040afbded7b79814a971f3febf46c5eab7c716e200c784e1",
|
| 64 |
# "path": "models/fofr/sd3-explorer/versions/a9f4aebd943ad7db13de8e34debea359d5578d08f128e968f9a36c3e9b0148d4",
|
| 65 |
+
"path": "models/bingbangboom-lab/flux-new-whimscape/versions/2e8de10f217bc56da163a0204cf09f89995eaf643459014803fae79753183682",
|
| 66 |
+
# "path": "models/black-forest-labs/flux-schnell",
|
| 67 |
# "path": "models/lucataco/proteus-v0.4/versions/34a427535a3c45552b94369280b823fcd0e5c9710e97af020bf445c033d4569e",
|
| 68 |
# "path": "models/lucataco/juggernaut-xl-v9/versions/bea09cf018e513cef0841719559ea86d2299e05448633ac8fe270b5d5cd6777e",
|
| 69 |
}
|
|
|
|
| 140 |
return results
|
| 141 |
|
| 142 |
|
| 143 |
+
# # Example payloads
|
| 144 |
+
payloads = [
|
| 145 |
+
"""
|
| 146 |
+
comicbook illustration artistic, beautiful Awsome cat
|
| 147 |
+
"""
|
| 148 |
+
]
|
| 149 |
|
| 150 |
|
| 151 |
+
# # Run the asyncio event loop
|
| 152 |
+
async def main():
|
| 153 |
+
results = await process_images(payloads)
|
| 154 |
+
pprint.pprint(results)
|
| 155 |
|
| 156 |
|
| 157 |
+
asyncio.run(main())
|
App/Generate/generatorRoutes.py
CHANGED
|
@@ -49,6 +49,7 @@ async def generate_assets(generated_story: Story, batch_size=4, threeD=True):
|
|
| 49 |
model_scene = await Scene.objects.create(project=x)
|
| 50 |
model_scene.image_prompts = story_scene.image_prompts
|
| 51 |
model_scene.narration = story_scene.narration
|
|
|
|
| 52 |
await model_scene.update(**model_scene.__dict__)
|
| 53 |
all_scenes.append(model_scene)
|
| 54 |
batch_updates.append(
|
|
|
|
| 49 |
model_scene = await Scene.objects.create(project=x)
|
| 50 |
model_scene.image_prompts = story_scene.image_prompts
|
| 51 |
model_scene.narration = story_scene.narration
|
| 52 |
+
model_scene.voice = story_scene.voice
|
| 53 |
await model_scene.update(**model_scene.__dict__)
|
| 54 |
all_scenes.append(model_scene)
|
| 55 |
batch_updates.append(
|