Update app.py
Browse files
app.py
CHANGED
|
@@ -18,6 +18,8 @@ import shutil
|
|
| 18 |
import matplotlib.pyplot as plt
|
| 19 |
import gc # Import the garbage collector
|
| 20 |
from audio import *
|
|
|
|
|
|
|
| 21 |
# Download necessary NLTK data
|
| 22 |
try:
|
| 23 |
nltk.data.find('tokenizers/punkt')
|
|
@@ -70,8 +72,6 @@ def load_min_dalle_model(models_root: str = 'pretrained', fp16: bool = True):
|
|
| 70 |
# Initialize the MinDalle model
|
| 71 |
min_dalle_model = load_min_dalle_model()
|
| 72 |
|
| 73 |
-
|
| 74 |
-
|
| 75 |
def generate_image_with_min_dalle(
|
| 76 |
model: MinDalle,
|
| 77 |
text: str,
|
|
@@ -114,10 +114,6 @@ def generate_image_with_min_dalle(
|
|
| 114 |
from pydub import AudioSegment
|
| 115 |
import os
|
| 116 |
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
# Function to generate video from text
|
| 122 |
def get_output_video(text):
|
| 123 |
print("DEBUG: Starting get_output_video function...")
|
|
@@ -169,31 +165,46 @@ def get_output_video(text):
|
|
| 169 |
assert len(generated_images) == len(sentences), "Mismatch in number of images and sentences."
|
| 170 |
sub_names = [nltk.tokenize.sent_tokenize(sentence) for sentence in sentences]
|
| 171 |
|
| 172 |
-
# Add subtitles to images
|
| 173 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
draw = ImageDraw.Draw(image)
|
| 175 |
-
image_width,
|
| 176 |
y_text = text_start_height
|
| 177 |
-
lines = textwrap.wrap(text, width=
|
| 178 |
for line in lines:
|
| 179 |
line_width, line_height = font.getbbox(line)[2:]
|
| 180 |
-
draw.text(((image_width - line_width) / 2, y_text),
|
| 181 |
-
|
| 182 |
-
y_text += line_height
|
| 183 |
|
| 184 |
def add_text_to_img(text1, image_input):
|
| 185 |
print(f"DEBUG: Adding text to image: '{text1}'")
|
| 186 |
-
|
|
|
|
|
|
|
|
|
|
| 187 |
path_font = "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf"
|
| 188 |
if not os.path.exists(path_font):
|
| 189 |
path_font = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
|
|
|
|
| 190 |
|
| 191 |
-
font = ImageFont.truetype(path_font, fontsize)
|
| 192 |
text_color = (255, 255, 0)
|
| 193 |
-
|
| 194 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
return image_input
|
| 196 |
|
|
|
|
|
|
|
| 197 |
generated_images_sub = []
|
| 198 |
for k, image in enumerate(generated_images):
|
| 199 |
text_to_add = sub_names[k][0]
|
|
@@ -201,6 +212,8 @@ def get_output_video(text):
|
|
| 201 |
generated_images_sub.append(result)
|
| 202 |
result.save(f"image_{k}/generated_image_with_subtitles.png")
|
| 203 |
|
|
|
|
|
|
|
| 204 |
# Generate audio for each subtitle
|
| 205 |
mp3_names = []
|
| 206 |
mp3_lengths = []
|
|
@@ -277,4 +290,4 @@ with demo:
|
|
| 277 |
button_gen_video.click(fn=get_output_video, inputs=input_start_text, outputs=output_interpolation)
|
| 278 |
|
| 279 |
# Launch the Gradio app
|
| 280 |
-
demo.launch(debug=True, share=
|
|
|
|
| 18 |
import matplotlib.pyplot as plt
|
| 19 |
import gc # Import the garbage collector
|
| 20 |
from audio import *
|
| 21 |
+
import os
|
| 22 |
+
|
| 23 |
# Download necessary NLTK data
|
| 24 |
try:
|
| 25 |
nltk.data.find('tokenizers/punkt')
|
|
|
|
| 72 |
# Initialize the MinDalle model
|
| 73 |
min_dalle_model = load_min_dalle_model()
|
| 74 |
|
|
|
|
|
|
|
| 75 |
def generate_image_with_min_dalle(
|
| 76 |
model: MinDalle,
|
| 77 |
text: str,
|
|
|
|
| 114 |
from pydub import AudioSegment
|
| 115 |
import os
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
# Function to generate video from text
|
| 118 |
def get_output_video(text):
|
| 119 |
print("DEBUG: Starting get_output_video function...")
|
|
|
|
| 165 |
assert len(generated_images) == len(sentences), "Mismatch in number of images and sentences."
|
| 166 |
sub_names = [nltk.tokenize.sent_tokenize(sentence) for sentence in sentences]
|
| 167 |
|
| 168 |
+
# Add subtitles to images with dynamic adjustments
|
| 169 |
+
def get_dynamic_wrap_width(font, text, image_width, padding):
|
| 170 |
+
# Estimate the number of characters per line dynamically
|
| 171 |
+
avg_char_width = sum(font.getbbox(c)[2] for c in text) / len(text)
|
| 172 |
+
return max(1, (image_width - padding * 2) // avg_char_width)
|
| 173 |
+
|
| 174 |
+
def draw_multiple_line_text(image, text, font, text_color, text_start_height, padding=10):
|
| 175 |
draw = ImageDraw.Draw(image)
|
| 176 |
+
image_width, _ = image.size
|
| 177 |
y_text = text_start_height
|
| 178 |
+
lines = textwrap.wrap(text, width=get_dynamic_wrap_width(font, text, image_width, padding))
|
| 179 |
for line in lines:
|
| 180 |
line_width, line_height = font.getbbox(line)[2:]
|
| 181 |
+
draw.text(((image_width - line_width) / 2, y_text), line, font=font, fill=text_color)
|
| 182 |
+
y_text += line_height + padding
|
|
|
|
| 183 |
|
| 184 |
def add_text_to_img(text1, image_input):
|
| 185 |
print(f"DEBUG: Adding text to image: '{text1}'")
|
| 186 |
+
# Scale font size dynamically
|
| 187 |
+
base_font_size = 30
|
| 188 |
+
image_width, image_height = image_input.size
|
| 189 |
+
scaled_font_size = max(10, int(base_font_size * (image_width / 800))) # Adjust 800 based on typical image width
|
| 190 |
path_font = "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf"
|
| 191 |
if not os.path.exists(path_font):
|
| 192 |
path_font = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
|
| 193 |
+
font = ImageFont.truetype(path_font, scaled_font_size)
|
| 194 |
|
|
|
|
| 195 |
text_color = (255, 255, 0)
|
| 196 |
+
padding = 10
|
| 197 |
+
|
| 198 |
+
# Estimate starting height dynamically
|
| 199 |
+
line_height = font.getbbox("A")[3] + padding
|
| 200 |
+
total_text_height = len(textwrap.wrap(text1, get_dynamic_wrap_width(font, text1, image_width, padding))) * line_height
|
| 201 |
+
text_start_height = image_height - total_text_height - 20
|
| 202 |
+
|
| 203 |
+
draw_multiple_line_text(image_input, text1, font, text_color, text_start_height, padding)
|
| 204 |
return image_input
|
| 205 |
|
| 206 |
+
|
| 207 |
+
# Process images with subtitles
|
| 208 |
generated_images_sub = []
|
| 209 |
for k, image in enumerate(generated_images):
|
| 210 |
text_to_add = sub_names[k][0]
|
|
|
|
| 212 |
generated_images_sub.append(result)
|
| 213 |
result.save(f"image_{k}/generated_image_with_subtitles.png")
|
| 214 |
|
| 215 |
+
|
| 216 |
+
|
| 217 |
# Generate audio for each subtitle
|
| 218 |
mp3_names = []
|
| 219 |
mp3_lengths = []
|
|
|
|
| 290 |
button_gen_video.click(fn=get_output_video, inputs=input_start_text, outputs=output_interpolation)
|
| 291 |
|
| 292 |
# Launch the Gradio app
|
| 293 |
+
demo.launch(debug=True, share=True)
|