ali-kanbar commited on
Commit
bba1aed
·
verified ·
1 Parent(s): ca4d93a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +535 -239
app.py CHANGED
@@ -1,239 +1,535 @@
1
- import gradio as gr
2
- import asyncio
3
- import os
4
- import traceback
5
- import numpy as np
6
- import re
7
- from functools import partial
8
- import torch
9
- import imageio
10
- import cv2
11
- from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler
12
- from huggingface_hub import hf_hub_download
13
- from safetensors.torch import load_file
14
- from PIL import Image
15
- import edge_tts
16
- from transformers import AutoTokenizer, pipeline
17
- from moviepy.editor import VideoFileClip, AudioFileClip
18
- from func_timeout import func_timeout, FunctionTimedOut
19
-
20
- # Initialize models with cache optimization
21
- def initialize_components():
22
- global tokenizer, text_pipe, sentiment_analyzer, pipe
23
-
24
- # Text generation components
25
- tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct", cache_dir="model_cache")
26
- text_pipe = pipeline(
27
- "text-generation",
28
- model="Qwen/Qwen2.5-1.5B-Instruct",
29
- tokenizer=tokenizer,
30
- device_map="auto",
31
- cache_dir="model_cache"
32
- )
33
-
34
- # Sentiment analysis
35
- sentiment_analyzer = pipeline("sentiment-analysis", cache_dir="model_cache")
36
-
37
- # Video generation setup
38
- device = "cuda" if torch.cuda.is_available() else "cpu"
39
- dtype = torch.float16 if device == "cuda" else torch.float32
40
- step = 8
41
- repo = "ByteDance/AnimateDiff-Lightning"
42
- ckpt = f"animatediff_lightning_{step}step_diffusers.safetensors"
43
- base = "emilianJR/epiCRealism"
44
-
45
- # Load motion adapter with caching
46
- adapter = MotionAdapter().to(device, dtype)
47
- model_path = hf_hub_download(repo, ckpt, cache_dir="model_cache")
48
- adapter.load_state_dict(load_file(model_path, device=device))
49
-
50
- # Initialize pipeline
51
- pipe = AnimateDiffPipeline.from_pretrained(
52
- base,
53
- motion_adapter=adapter,
54
- torch_dtype=dtype,
55
- cache_dir="model_cache"
56
- ).to(device)
57
-
58
- pipe.scheduler = EulerDiscreteScheduler.from_config(
59
- pipe.scheduler.config,
60
- timestep_spacing="trailing",
61
- beta_schedule="linear"
62
- )
63
-
64
- initialize_components()
65
-
66
- # Cleanup function for resource management
67
- def cleanup():
68
- torch.cuda.empty_cache()
69
- for f in ["generated_video.mp4", "final_video_with_audio.mp4", "output.mp3"]:
70
- if os.path.exists(f):
71
- try:
72
- os.remove(f)
73
- except:
74
- pass
75
-
76
- # Story generation functions (keep your original functions but add timeout)
77
- def generate_video(summary):
78
- def crossfade_transition(frames1, frames2, transition_length=10):
79
- blended_frames = []
80
- frames1_np = [np.array(frame) for frame in frames1[-transition_length:]]
81
- frames2_np = [np.array(frame) for frame in frames2[:transition_length]]
82
- for i in range(transition_length):
83
- alpha = i / transition_length
84
- beta = 1.0 - alpha
85
- blended = cv2.addWeighted(frames1_np[i], beta, frames2_np[i], alpha, 0)
86
- blended_frames.append(Image.fromarray(blended))
87
- return blended_frames
88
-
89
- sentences = []
90
- current_sentence = ""
91
- for char in summary:
92
- current_sentence += char
93
- if char in {'.', '!', '?'}:
94
- sentences.append(current_sentence.strip())
95
- current_sentence = ""
96
- sentences = [s.strip() for s in sentences if s.strip()]
97
-
98
- output_dir = "generated_frames"
99
- video_path = "generated_video.mp4"
100
- os.makedirs(output_dir, exist_ok=True)
101
-
102
- all_frames = []
103
- previous_frames = None
104
- transition_frames = 10
105
- batch_size = 1
106
-
107
- for i in range(0, len(sentences), batch_size):
108
- batch_prompts = sentences[i : i + batch_size]
109
- for idx, prompt in enumerate(batch_prompts):
110
- try:
111
- output = func_timeout(
112
- 300, # 5 minute timeout per scene
113
- pipe,
114
- args=(prompt,),
115
- kwargs={
116
- 'guidance_scale': 1.0,
117
- 'num_inference_steps': step,
118
- 'width': 128, # Reduced resolution
119
- 'height': 128
120
- }
121
- )
122
- frames = output.frames[0]
123
-
124
- if previous_frames is not None:
125
- transition = crossfade_transition(previous_frames, frames, transition_frames)
126
- all_frames.extend(transition)
127
-
128
- all_frames.extend(frames)
129
- previous_frames = frames
130
-
131
- except FunctionTimedOut:
132
- print(f"Timeout generating scene {i+idx+1}")
133
- return None
134
- except Exception as e:
135
- print(f"Error generating scene: {str(e)}")
136
- continue
137
-
138
- imageio.mimsave(video_path, all_frames, fps=6) # Reduced FPS
139
- return video_path
140
-
141
- # Modified main processing function with enhanced error handling
142
- def create_story_video(prompt, progress=gr.Progress()):
143
- cleanup() # Clear previous runs
144
-
145
- if not prompt or len(prompt.strip()) < 5:
146
- return "Prompt too short (min 5 characters)", None, None
147
- if len(prompt) > 500:
148
- return "Prompt too long (max 500 characters)", None, None
149
-
150
- try:
151
- progress(0, desc="Starting story generation...")
152
- story = generate_story(prompt)
153
- progress(25, desc="Story generated")
154
-
155
- progress(30, desc="Starting video generation...")
156
- video_path = generate_video(story)
157
- if not video_path:
158
- return story, None, "Video generation failed"
159
- progress(60, desc="Video rendered")
160
-
161
- progress(65, desc="Creating audio summary...")
162
- audio_summary = summary_of_summary(story, video_path)
163
-
164
- progress(75, desc="Generating voiceover...")
165
- try:
166
- loop = asyncio.new_event_loop()
167
- asyncio.set_event_loop(loop)
168
- audio_file = loop.run_until_complete(
169
- generate_audio_with_sentiment(audio_summary, sentiment_analyzer)
170
- )
171
- except Exception as e:
172
- return story, None, f"Audio error: {str(e)}"
173
-
174
- progress(90, desc="Finalizing video...")
175
- output_path = 'final_video_with_audio.mp4'
176
- combine_video_with_audio(video_path, audio_file, output_path)
177
-
178
- return story, output_path, audio_summary
179
-
180
- except Exception as e:
181
- error_msg = f"Error: {str(e)}"
182
- print(traceback.format_exc())
183
- return error_msg, None, None
184
-
185
- # Keep other functions (summarize, generate_story, etc.) unchanged from your original code
186
- # ...
187
-
188
- # Gradio interface setup with resource management
189
- EXAMPLE_PROMPTS = [
190
- "A nurse discovers an unusual pattern in patient symptoms.",
191
- "A family finds a time capsule during home renovation.",
192
- "A restaurant owner innovates to save their business.",
193
- "Wildlife tracking reveals climate changes.",
194
- "Community rebuilds after natural disaster."
195
- ]
196
-
197
- with gr.Blocks(title="AI Story Generator", theme=gr.themes.Soft()) as demo:
198
- gr.Markdown("# 🎬 AI Story Video Generator")
199
- gr.Markdown("Enter a short story idea (5-500 characters)")
200
-
201
- with gr.Row():
202
- prompt_input = gr.Textbox(
203
- label="Story Idea",
204
- placeholder="Example: A detective finds a hidden room...",
205
- max_lines=2
206
- )
207
-
208
- gr.Examples(
209
- examples=EXAMPLE_PROMPTS,
210
- inputs=prompt_input,
211
- label="Example Prompts"
212
- )
213
-
214
- with gr.Row():
215
- generate_btn = gr.Button("Generate", variant="primary")
216
- clear_btn = gr.Button("Clear", variant="secondary")
217
-
218
- with gr.Tabs():
219
- with gr.Tab("Results"):
220
- video_output = gr.Video(label="Generated Video", interactive=False)
221
- story_output = gr.Textbox(label="Full Story", lines=10)
222
- audio_summary = gr.Textbox(label="Audio Summary", lines=3)
223
-
224
- generate_btn.click(
225
- fn=create_story_video,
226
- inputs=prompt_input,
227
- outputs=[story_output, video_output, audio_summary]
228
- )
229
-
230
- clear_btn.click(
231
- fn=lambda: [None, None, None],
232
- outputs=[story_output, video_output, audio_summary]
233
- )
234
-
235
- demo.load(fn=cleanup)
236
- demo.unload(fn=cleanup)
237
-
238
- if __name__ == "__main__":
239
- demo.launch(server_port=7860, show_error=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import asyncio
3
+ import os
4
+ import traceback
5
+ import numpy as np
6
+ import re
7
+ from functools import partial
8
+
9
+ # Import all required libraries
10
+ import torch
11
+ import imageio
12
+ import cv2
13
+ from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler
14
+ from huggingface_hub import hf_hub_download
15
+ from safetensors.torch import load_file
16
+ from PIL import Image
17
+ import edge_tts
18
+ from transformers import AutoTokenizer, pipeline
19
+ from moviepy.editor import VideoFileClip, AudioFileClip
20
+
21
+ # Initialize the Qwen model
22
+ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct")
23
+ text_pipe = pipeline(
24
+ "text-generation",
25
+ model="Qwen/Qwen2.5-1.5B-Instruct",
26
+ tokenizer=tokenizer
27
+ )
28
+
29
+ # Initialize the sentiment analyzer
30
+ sentiment_analyzer = pipeline("sentiment-analysis")
31
+
32
+ # Initialize video generation components
33
+ device = "cuda" if torch.cuda.is_available() else "cpu"
34
+ dtype = torch.float16 if torch.cuda.is_available() else torch.float32
35
+ step = 8
36
+ repo = "ByteDance/AnimateDiff-Lightning"
37
+ ckpt = f"animatediff_lightning_{step}step_diffusers.safetensors"
38
+ base = "emilianJR/epiCRealism"
39
+
40
+ # Load motion adapter
41
+ adapter = MotionAdapter().to(device, dtype)
42
+ adapter.load_state_dict(load_file(hf_hub_download(repo, ckpt), device=device))
43
+
44
+ # Load pipeline
45
+ pipe = AnimateDiffPipeline.from_pretrained(base, motion_adapter=adapter, torch_dtype=dtype).to(device)
46
+ pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")
47
+
48
+ # Define all required functions
49
+ def summarize(text):
50
+ messages = [
51
+ {
52
+ "role": "system",
53
+ "content": (
54
+ "You are an expert summarizer focused on efficiency and clarity. "
55
+ "Create concise narrative summaries that: "
56
+ "1. Capture all key points and main ideas "
57
+ "2. Omit examples, repetitions, and secondary details "
58
+ "3. Maintain logical flow and coherence "
59
+ "4. Use clear, direct language without markdown formatting"
60
+ )
61
+ },
62
+ {
63
+ "role": "user",
64
+ "content": (
65
+ "Please summarize the following text in 10-15 sentences. "
66
+ "Focus on essential information, exclude non-critical details, "
67
+ f"and maintain natural storytelling flow:\n\n{text}"
68
+ )
69
+ }
70
+ ]
71
+
72
+ prompt = tokenizer.apply_chat_template(
73
+ messages,
74
+ tokenize=False,
75
+ add_generation_prompt=True
76
+ )
77
+
78
+ response = text_pipe(
79
+ prompt,
80
+ max_new_tokens=512,
81
+ num_beams=4,
82
+ early_stopping=True,
83
+ no_repeat_ngram_size=3,
84
+ temperature=0.7,
85
+ top_p=0.95,
86
+ do_sample=True
87
+ )
88
+
89
+ result = response[0]['generated_text']
90
+ summary = result.split("assistant\n")[-1].strip()
91
+ return summary
92
+
93
+ def generate_story(prompt):
94
+ messages = [
95
+ {
96
+ "role": "system",
97
+ "content": (
98
+ "You are a skilled storyteller specializing in tight, impactful narratives. "
99
+ "Create engaging stories that:\n"
100
+ "1. Contain exactly 15-20 sentences\n"
101
+ "2. Keep each sentence under 77 tokens\n"
102
+ "3. Maintain strong narrative flow and pacing\n"
103
+ "4. Focus on vivid imagery and concrete details\n"
104
+ "5. Avoid filler words and redundant phrases\n"
105
+ "6. Use simple, direct language without markdown"
106
+ )
107
+ },
108
+ {
109
+ "role": "user",
110
+ "content": (
111
+ f"Craft a compelling short story based on this premise: {prompt}\n"
112
+ "Structure requirements:\n"
113
+ "- Strict 15-20 sentence count\n"
114
+ "- Maximum 77 tokens per sentence\n"
115
+ "- Clear beginning-middle-end structure\n"
116
+ "- Emphasis on showing rather than telling\n"
117
+ "Output plain text only, no markdown formatting."
118
+ )
119
+ }
120
+ ]
121
+
122
+ chat_prompt = tokenizer.apply_chat_template(
123
+ messages,
124
+ tokenize=False,
125
+ add_generation_prompt=True
126
+ )
127
+
128
+ # First attempt to generate story
129
+ generated = text_pipe(
130
+ chat_prompt,
131
+ max_new_tokens=1024,
132
+ num_beams=5,
133
+ early_stopping=True,
134
+ no_repeat_ngram_size=4,
135
+ temperature=0.65,
136
+ top_k=30,
137
+ top_p=0.90,
138
+ do_sample=True,
139
+ length_penalty=0.9
140
+ )
141
+
142
+ full_output = generated[0]['generated_text']
143
+ story = full_output.split("assistant\n")[-1].strip()
144
+
145
+ # Process sentences and check constraints
146
+ sentences = []
147
+ for s in story.split('.'):
148
+ if s.strip():
149
+ sentences.append(s.strip())
150
+
151
+ # Check sentence count constraint
152
+ sentence_count = len(sentences)
153
+ if sentence_count < 15 or sentence_count > 20:
154
+ # Regenerate with stricter parameters if constraints not met
155
+ enhanced_prompt = f"{prompt} (IMPORTANT: Story MUST have EXACTLY 15-20 sentences, and each sentence MUST be under 77 tokens. Current attempt had {sentence_count} sentences.)"
156
+
157
+ messages[1]["content"] = (
158
+ f"Craft a compelling short story based on this premise: {enhanced_prompt}\n"
159
+ "Structure requirements:\n"
160
+ "- CRITICAL: Output EXACTLY 15-20 sentences, not more, not less\n"
161
+ "- CRITICAL: Maximum 77 tokens per sentence\n"
162
+ "- Clear beginning-middle-end structure\n"
163
+ "- Emphasis on showing rather than telling\n"
164
+ "Output plain text only, no markdown formatting."
165
+ )
166
+
167
+ chat_prompt = tokenizer.apply_chat_template(
168
+ messages,
169
+ tokenize=False,
170
+ add_generation_prompt=True
171
+ )
172
+
173
+ # Try with more strict parameters
174
+ generated = text_pipe(
175
+ chat_prompt,
176
+ max_new_tokens=1024,
177
+ num_beams=7,
178
+ early_stopping=True,
179
+ no_repeat_ngram_size=4,
180
+ temperature=0.5,
181
+ top_k=20,
182
+ top_p=0.85,
183
+ do_sample=True,
184
+ length_penalty=1.0
185
+ )
186
+
187
+ full_output = generated[0]['generated_text']
188
+ story = full_output.split("assistant\n")[-1].strip()
189
+
190
+ sentences = []
191
+ for s in story.split('.'):
192
+ if s.strip():
193
+ sentences.append(s.strip())
194
+
195
+ word_to_token_ratio = 1.3
196
+ constrained_sentences = []
197
+ for sentence in sentences:
198
+ words = sentence.split()
199
+ estimated_tokens = len(words) * word_to_token_ratio
200
+
201
+ if estimated_tokens > 77:
202
+ max_words = int(75 / word_to_token_ratio)
203
+ truncated = ' '.join(words[:max_words])
204
+ constrained_sentences.append(truncated)
205
+ else:
206
+ constrained_sentences.append(sentence)
207
+
208
+ while len(constrained_sentences) < 15:
209
+ constrained_sentences.append("The story continued with unexpected twists and turns.")
210
+ constrained_sentences = constrained_sentences[:20]
211
+
212
+ formatted_sentences = []
213
+ for s in constrained_sentences:
214
+ if not s.endswith(('.', '!', '?')):
215
+ s += '.'
216
+ formatted_sentences.append(s)
217
+
218
+ final_story = '\n'.join(formatted_sentences)
219
+ return final_story
220
+
221
+ def generate_video(summary):
222
+ def crossfade_transition(frames1, frames2, transition_length=10):
223
+ blended_frames = []
224
+ frames1_np = [np.array(frame) for frame in frames1[-transition_length:]]
225
+ frames2_np = [np.array(frame) for frame in frames2[:transition_length]]
226
+ for i in range(transition_length):
227
+ alpha = i / transition_length
228
+ beta = 1.0 - alpha
229
+ blended = cv2.addWeighted(frames1_np[i], beta, frames2_np[i], alpha, 0)
230
+ blended_frames.append(Image.fromarray(blended))
231
+ return blended_frames
232
+
233
+ # Sentence splitting
234
+ sentences = []
235
+ current_sentence = ""
236
+ for char in summary:
237
+ current_sentence += char
238
+ if char in {'.', '!', '?'}:
239
+ sentences.append(current_sentence.strip())
240
+ current_sentence = ""
241
+ sentences = [s.strip() for s in sentences if s.strip()]
242
+ print(f"Total scenes: {len(sentences)}")
243
+
244
+ # Output config
245
+ output_dir = "generated_frames"
246
+ video_path = "generated_video.mp4"
247
+ os.makedirs(output_dir, exist_ok=True)
248
+
249
+ # Generate animation
250
+ all_frames = []
251
+ previous_frames = None
252
+ transition_frames = 10
253
+ batch_size = 1
254
+
255
+ for i in range(0, len(sentences), batch_size):
256
+ batch_prompts = sentences[i : i + batch_size]
257
+ for idx, prompt in enumerate(batch_prompts):
258
+ print(f"Generating animation for prompt {i+idx+1}/{len(sentences)}: {prompt}")
259
+ output = pipe(
260
+ prompt=prompt,
261
+ guidance_scale=1.0,
262
+ num_inference_steps=step,
263
+ width=256,
264
+ height=256,
265
+ )
266
+ frames = output.frames[0]
267
+
268
+ if previous_frames is not None:
269
+ transition = crossfade_transition(previous_frames, frames, transition_frames)
270
+ all_frames.extend(transition)
271
+
272
+ all_frames.extend(frames)
273
+ previous_frames = frames
274
+
275
+ # Save video
276
+ imageio.mimsave(video_path, all_frames, fps=8)
277
+ print(f"Video saved at {video_path}")
278
+ return video_path
279
+
280
+ def estimate_voiceover_words(video_path):
281
+ try:
282
+ # Get video duration in seconds
283
+ video = VideoFileClip(video_path)
284
+ duration_minutes = video.duration / 60
285
+ # Estimate word count based on average speaking rate (150 words per minute)
286
+ estimated_words = int(duration_minutes * 150)
287
+ # Ensure a minimum word count
288
+ return max(estimated_words, 30)
289
+ except Exception as e:
290
+ print(f"Error estimating voiceover words: {str(e)}")
291
+ return 50 # Default fallback
292
+
293
+ def summary_of_summary(text, video_path):
294
+ target_word_count = estimate_voiceover_words(video_path)
295
+ messages_2 = [
296
+ {
297
+ "role": "system",
298
+ "content": (
299
+ "You are an expert summarizer focused on brevity and clarity. "
300
+ f"Create a summary that is exactly around {target_word_count} words: "
301
+ "1. Capture the most essential information\n"
302
+ "2. Omit unnecessary details and examples\n"
303
+ "3. Maintain logical flow and coherence\n"
304
+ "4. Use clear, direct language"
305
+ )
306
+ },
307
+ {
308
+ "role": "user",
309
+ "content": (
310
+ f"Please summarize the following text in approximately {target_word_count} words:\n\n{text}"
311
+ )
312
+ }
313
+ ]
314
+
315
+ # Generate prompt
316
+ prompt_for_resummarization = tokenizer.apply_chat_template(
317
+ messages_2,
318
+ tokenize=False,
319
+ add_generation_prompt=True
320
+ )
321
+
322
+ # Generate response
323
+ response = text_pipe(
324
+ prompt_for_resummarization,
325
+ max_new_tokens=target_word_count + 20,
326
+ num_beams=4,
327
+ early_stopping=True,
328
+ no_repeat_ngram_size=3,
329
+ temperature=0.7,
330
+ top_p=0.95,
331
+ do_sample=True
332
+ )
333
+
334
+ # Extract result
335
+ summary = response[0]['generated_text'].split("assistant\n")[-1].strip()
336
+ return summary
337
+
338
+ async def generate_audio_with_sentiment(text, sentiment_analyzer):
339
+ # Perform sentiment analysis on the text
340
+ sentiment = sentiment_analyzer(text)[0]
341
+ label = sentiment['label']
342
+ confidence = sentiment['score']
343
+
344
+ print(f"Sentiment: {label} with confidence {confidence:.2f}")
345
+
346
+ # Set voice parameters based on sentiment
347
+ if label == "POSITIVE":
348
+ voice = "en-US-AriaNeural" # Cheerful and energetic tone for positive sentiment
349
+ rate = "1.2" # Faster speech
350
+ pitch = "+2Hz" # Slightly higher pitch for a more positive tone
351
+ else:
352
+ voice = "en-US-GuyNeural" # Neutral tone for negative sentiment
353
+ rate = "0.9" # Slower speech
354
+ pitch = "-2Hz" # Lower pitch for a more somber tone
355
+
356
+ # Generate speech with EdgeTTS
357
+ communicate = edge_tts.Communicate(text, voice)
358
+
359
+ # Save the audio to a file
360
+ await communicate.save("output.mp3")
361
+
362
+ # Play the generated audio
363
+ return "output.mp3"
364
+
365
+ def combine_video_with_audio(video_path, audio_path, output_path):
366
+ # Load video and audio
367
+ video = VideoFileClip(video_path)
368
+ audio = AudioFileClip(audio_path)
369
+
370
+ # Set the audio to the video
371
+ video = video.set_audio(audio)
372
+
373
+ # Save the final video
374
+ video.write_videofile(output_path, codec='libx264', audio_codec='aac')
375
+
376
+ print("Video with audio saved successfully!")
377
+
378
+ # Main processing function
379
+ def create_story_video(prompt, progress=gr.Progress()):
380
+ # Input validation
381
+ if not prompt or len(prompt.strip()) < 5:
382
+ return "Please enter a longer prompt (at least 5 characters).", None, None
383
+
384
+ try:
385
+ # Step 1: Generate story
386
+ progress(0, desc="Starting story generation...")
387
+ story = generate_story(prompt)
388
+ progress(20, desc="Story generated successfully!")
389
+
390
+ # Step 2: Generate video
391
+ progress(25, desc="Creating video animation (this may take several minutes)...")
392
+ video_path = generate_video(story)
393
+ progress(60, desc="Video created successfully!")
394
+
395
+ # Step 3: Create audio summary
396
+ progress(65, desc="Creating audio summary...")
397
+ audio_summary = summary_of_summary(story, video_path)
398
+ progress(80, desc="Creating audio narration...")
399
+
400
+ # Step 4: Generate audio with sentiment (async)
401
+ try:
402
+ # Set up event loop handling
403
+ try:
404
+ loop = asyncio.get_event_loop()
405
+ except RuntimeError:
406
+ loop = asyncio.new_event_loop()
407
+ asyncio.set_event_loop(loop)
408
+
409
+ audio_file = loop.run_until_complete(
410
+ generate_audio_with_sentiment(audio_summary, sentiment_analyzer)
411
+ )
412
+ progress(90, desc="Audio created successfully!")
413
+ except Exception as e:
414
+ print(f"Audio generation error: {str(e)}")
415
+ return story, None, f"Audio generation failed: {str(e)}"
416
+
417
+ # Step 5: Combine video and audio
418
+ progress(95, desc="Combining video and audio...")
419
+ output_path = 'final_video_with_audio.mp4'
420
+ combine_video_with_audio(video_path, audio_file, output_path)
421
+
422
+ progress(100, desc="Process complete!")
423
+ return story, output_path, audio_summary
424
+
425
+ except Exception as e:
426
+ error_msg = f"Error: {str(e)}\n{traceback.format_exc()}"
427
+ print(error_msg)
428
+ return f"An error occurred: {str(e)}", None, None
429
+
430
+ # Sample prompt examples based on realistic scenarios
431
+ EXAMPLE_PROMPTS = [
432
+ "A nurse discovers an unusual pattern in patient symptoms that leads to an important medical breakthrough.",
433
+ "During a home renovation, a family uncovers a time capsule from the previous owners.",
434
+ "A struggling local restaurant owner finds an innovative way to save their business during an economic downturn.",
435
+ "An environmental scientist tracks mysterious wildlife behavior that reveals concerning climate changes.",
436
+ "A community comes together to rebuild after a devastating natural disaster.",
437
+ "A teacher develops a unique method that transforms learning for students with special needs.",
438
+ "An elderly person reconnects with a childhood friend through social media after sixty years apart.",
439
+ "A food delivery driver forms an unexpected friendship with an isolated elderly customer during the pandemic.",
440
+ "A first-generation college student overcomes significant obstacles to achieve academic success.",
441
+ "A wildlife photographer documents the surprising recovery of an endangered species."
442
+ ]
443
+
444
+ # Create the Gradio interface
445
+ with gr.Blocks(title="Animind AI Story Video Generator", theme=gr.themes.Soft()) as demo:
446
+ gr.Markdown("# 🎬 AI Story Video Generator")
447
+ gr.Markdown("Enter a one-sentence prompt to generate a complete story with video and narration.")
448
+
449
+ # Input section
450
+ with gr.Row():
451
+ prompt_input = gr.Textbox(
452
+ label="Your Story Idea",
453
+ placeholder="Enter a one-sentence prompt (e.g., 'A detective discovers a hidden room in an abandoned mansion')",
454
+ lines=2
455
+ )
456
+
457
+ # Example prompts section
458
+ gr.Markdown("### Try these example prompts:")
459
+
460
+ # Create examples using Gradio's examples feature
461
+ with gr.Row():
462
+ examples = gr.Examples(
463
+ examples=[[prompt] for prompt in EXAMPLE_PROMPTS],
464
+ inputs=prompt_input,
465
+ label="Click any example to load it"
466
+ )
467
+
468
+ with gr.Row():
469
+ generate_button = gr.Button("Generate Story Video", variant="primary")
470
+ clear_button = gr.Button("Clear", variant="secondary")
471
+
472
+ # Status indicator
473
+ status_indicator = gr.Markdown("Ready to generate your story video...")
474
+
475
+ # Output section with tabs
476
+ with gr.Tabs():
477
+ with gr.TabItem("Results"):
478
+ with gr.Row():
479
+ with gr.Column(scale=2):
480
+ video_output = gr.Video(label="Generated Video with Narration")
481
+ with gr.Column(scale=1):
482
+ story_output = gr.TextArea(label="Generated Story", lines=15, max_lines=30)
483
+ summary_output = gr.TextArea(label="Audio Summary", lines=5)
484
+
485
+ with gr.TabItem("Help & Information"):
486
+ gr.Markdown("""
487
+ ## How to use this tool
488
+
489
+ 1. Enter a creative one-sentence story idea in the input box
490
+ 2. Click "Generate Story Video" and wait for processing to complete
491
+ 3. View your complete AI-generated story video with narration
492
+
493
+ ## Processing Steps
494
+
495
+ 1. **Story Generation**: The AI expands your idea into a 15-20 sentence story
496
+ 2. **Video Creation**: Each sentence is visualized through AI-generated animation
497
+ 3. **Audio Narration**: The AI analyzes the sentiment and creates appropriate voiceover
498
+ 4. **Final Compilation**: Video and audio are combined into your final story
499
+
500
+ ## Tips for Great Results
501
+
502
+ - Use clear, specific prompts that suggest a narrative arc
503
+ - Include interesting characters, settings, or situations
504
+ - Make your prompt realistic but with potential for development
505
+ - Try to suggest a potential conflict or discovery
506
+
507
+ ## Troubleshooting
508
+
509
+ If you encounter errors:
510
+ - Try a different prompt
511
+ - Ensure your prompt is clear and specific
512
+ - Check that all required models are properly loaded
513
+ """)
514
+
515
+ # Handle clearing
516
+ def clear_outputs():
517
+ return "", None, ""
518
+
519
+ # Connect interface elements
520
+ generate_button.click(
521
+ fn=create_story_video,
522
+ inputs=prompt_input,
523
+ outputs=[story_output, video_output, summary_output],
524
+ api_name="generate"
525
+ )
526
+
527
+ clear_button.click(
528
+ fn=clear_outputs,
529
+ inputs=None,
530
+ outputs=[story_output, video_output, summary_output]
531
+ )
532
+
533
+ # Launch the app
534
+ if __name__ == "__main__":
535
+ demo.launch()