-
-
Generating content...
-
+
+
+
Requirements.txt
+
+gradio==3.50.2
+g4f==0.1.9.0
+moviepy==1.0.3
+assemblyai==0.17.0
+requests==2.31.0
+google-generativeai==0.3.1
+python-dotenv==1.0.0
+Pillow==10.0.0
+openai==1.3.5
+edge-tts==6.1.9
+bark==0.0.1
+tensorflow==2.12.0
+soundfile==0.12.1
+TTS==0.21.1
+rvc-engine==0.0.1
+termcolor==2.3.0
+
-
-
Generated Video
+
+
+
app.py
+
+import os
+import re
+import g4f
+import json
+import time
+import random
+import tempfile
+import requests
+import assemblyai as aai
+from moviepy.editor import *
+from datetime import datetime
+import gradio as gr
+from dotenv import load_dotenv
+
+# Load environment variables from .env file
+load_dotenv()
+
+# Constants
+CACHE_DIR = os.path.join(tempfile.gettempdir(), "yt_shorts_generator")
+os.makedirs(CACHE_DIR, exist_ok=True)
+
+# Helper functions
+def info(message):
+ print(f"[INFO] {message}")
+ return f"[INFO] {message}"
+
+def success(message):
+ print(f"[SUCCESS] {message}")
+ return f"[SUCCESS] {message}"
+
+def warning(message):
+ print(f"[WARNING] {message}")
+ return f"[WARNING] {message}"
+
+def error(message):
+ print(f"[ERROR] {message}")
+ return f"[ERROR] {message}"
+
+class YouTube:
+ def __init__(self, niche, language, text_gen="gemini", image_gen="prodia", tts_engine="elevenlabs",
+ tts_voice="Sarah", subtitle_font="Helvetica-Bold", font_size=80,
+ text_color="white", highlight_color="blue", api_keys=None):
+ info(f"Initializing YouTube class")
+ self._niche = niche
+ self._language = language
+ self.text_gen = text_gen
+ self.image_gen = image_gen
+ self.tts_engine = tts_engine
+ self.tts_voice = tts_voice
+ self.subtitle_font = subtitle_font
+ self.font_size = font_size
+ self.text_color = text_color
+ self.highlight_color = highlight_color
+ self.api_keys = api_keys or {}
+ self.images = []
+ self.logs = []
+
+ # Set API keys
+ if 'gemini' in self.api_keys and self.api_keys['gemini']:
+ os.environ["GEMINI_API_KEY"] = self.api_keys['gemini']
+ if 'assemblyai' in self.api_keys and self.api_keys['assemblyai']:
+ os.environ["ASSEMBLYAI_API_KEY"] = self.api_keys['assemblyai']
+ if 'elevenlabs' in self.api_keys and self.api_keys['elevenlabs']:
+ os.environ["ELEVENLABS_API_KEY"] = self.api_keys['elevenlabs']
+ if 'segmind' in self.api_keys and self.api_keys['segmind']:
+ os.environ["SEGMIND_API_KEY"] = self.api_keys['segmind']
-
+ info(f"Niche: {niche}, Language: {language}")
+ self.log(f"Initialized with niche: {niche}, language: {language}")
+ self.log(f"Text generator: {text_gen}, Image generator: {image_gen}, TTS engine: {tts_engine}")
+
+ def log(self, message):
+ """Add a log message to the logs list"""
+ timestamp = datetime.now().strftime("%H:%M:%S")
+ log_entry = f"[{timestamp}] {message}"
+ self.logs.append(log_entry)
+ return log_entry
+
+ @property
+ def niche(self):
+ return self._niche
+
+ @property
+ def language(self):
+ return self._language
+
+ def generate_response(self, prompt, model=None):
+ self.log(f"Generating response for prompt: {prompt[:50]}...")
+ if self.text_gen == "gemini":
+ self.log("Using Google's Gemini model")
+ import google.generativeai as genai
+ genai.configure(api_key=os.environ.get("GEMINI_API_KEY", ""))
+ model = genai.GenerativeModel('gemini-2.0-flash')
+ response = model.generate_content(prompt).text
+ else:
+ model_name = model if model else "gpt-3.5-turbo"
+ self.log(f"Using G4F model: {model_name}")
+ response = g4f.ChatCompletion.create(
+ model=model_name,
+ messages=[{"role": "user", "content": prompt}]
+ )
+ self.log(f"Response generated successfully, length: {len(response)} characters")
+ return response
+
+ def generate_topic(self):
+ self.log("Generating topic based on niche")
+ completion = self.generate_response(
+ f"Please generate a specific video idea that takes about the following topic: {self.niche}. "
+ f"Make it exactly one sentence. Only return the topic, nothing else."
+ )
+
+ if not completion:
+ self.log(error("Failed to generate Topic."))
+ return None
+
+ self.subject = completion
+ self.log(success(f"Generated topic: {completion}"))
+ return completion
+
+ def generate_script(self):
+ self.log("Generating script for video")
+ prompt = f"""
+ Generate a script for youtube shorts video, depending on the subject of the video.
+
+ The script is to be returned as a string with the specified number of paragraphs.
+
+ Here is an example of a string:
+ "This is an example string."
+
+ Do not under any circumstance reference this prompt in your response.
+
+ Get straight to the point, don't start with unnecessary things like, "welcome to this video".
+
+ Obviously, the script should be related to the subject of the video.
+
+ YOU MUST NOT INCLUDE ANY TYPE OF MARKDOWN OR FORMATTING IN THE SCRIPT, NEVER USE A TITLE.
+ YOU MUST WRITE THE SCRIPT IN THE LANGUAGE SPECIFIED IN [LANGUAGE].
+ ONLY RETURN THE RAW CONTENT OF THE SCRIPT. DO NOT INCLUDE "VOICEOVER", "NARRATOR" OR SIMILAR INDICATORS.
+
+ Subject: {self.subject}
+ Language: {self.language}
+ """
+ completion = self.generate_response(prompt)
+
+ # Apply regex to remove *
+ completion = re.sub(r"\*", "", completion)
+
+ if not completion:
+ self.log(error("The generated script is empty."))
+ return None
+
+ if len(completion) > 5000:
+ self.log(warning("Generated Script is too long. Retrying..."))
+ return self.generate_script()
+
+ self.script = completion
+ self.log(success(f"Generated script ({len(completion)} chars)"))
+ return completion
+
+ def generate_metadata(self):
+ self.log("Generating metadata (title and description)")
+ title = self.generate_response(
+ f"Please generate a YouTube Video Title for the following subject, including hashtags: "
+ f"{self.subject}. Only return the title, nothing else. Limit the title under 100 characters."
+ )
+
+ if len(title) > 100:
+ self.log(warning("Generated Title is too long. Retrying..."))
+ return self.generate_metadata()
+
+ description = self.generate_response(
+ f"Please generate a YouTube Video Description for the following script: {self.script}. "
+ f"Only return the description, nothing else."
+ )
+
+ self.metadata = {
+ "title": title,
+ "description": description
+ }
+
+ self.log(success(f"Generated title: {title}"))
+ self.log(success(f"Generated description: {description}"))
+ return self.metadata
+
+ def generate_prompts(self, count=5):
+ self.log(f"Generating {count} image prompts")
+ prompt = f"""
+ Generate {count} Image Prompts for AI Image Generation,
+ depending on the subject of a video.
+ Subject: {self.subject}
+
+ The image prompts are to be returned as
+ a JSON-Array of strings.
+
+ Each search term should consist of a full sentence,
+ always add the main subject of the video.
+
+ Be emotional and use interesting adjectives to make the
+ Image Prompt as detailed as possible.
+
+ YOU MUST ONLY RETURN THE JSON-ARRAY OF STRINGS.
+ YOU MUST NOT RETURN ANYTHING ELSE.
+ YOU MUST NOT RETURN THE SCRIPT.
+
+ The search terms must be related to the subject of the video.
+ Here is an example of a JSON-Array of strings:
+ ["image prompt 1", "image prompt 2", "image prompt 3"]
+
+ For context, here is the full text:
+ {self.script}
+ """
+
+ completion = str(self.generate_response(prompt))\
+ .replace("```json", "") \
+ .replace("```", "")
+
+ image_prompts = []
+
+ if "image_prompts" in completion:
+ image_prompts = json.loads(completion)["image_prompts"]
+ else:
+ try:
+ image_prompts = json.loads(completion)
+ self.log(f"Generated Image Prompts: {image_prompts}")
+ except Exception:
+ self.log(warning("GPT returned an unformatted response. Attempting to clean..."))
+
+ # Get everything between [ and ], and turn it into a list
+ r = re.compile(r"\[.*\]", re.DOTALL)
+ matches = r.findall(completion)
+ if len(matches) == 0:
+ self.log(warning("Failed to generate Image Prompts. Retrying..."))
+ return self.generate_prompts(count)
+
+ try:
+ image_prompts = json.loads(matches[0])
+ except:
+ self.log(error("Failed to parse image prompts JSON"))
+ # Try a fallback approach - create some generic prompts
+ image_prompts = [
+ f"A beautiful image showing {self.subject}",
+ f"A detailed visualization of {self.subject}",
+ f"An artistic representation of {self.subject}",
+ f"A photorealistic image about {self.subject}",
+ f"A dramatic scene related to {self.subject}"
+ ]
+
+ self.image_prompts = image_prompts[:count] # Limit to requested count
+ self.log(success(f"Generated {len(self.image_prompts)} Image Prompts"))
+ for i, prompt in enumerate(self.image_prompts):
+ self.log(f"Image Prompt {i+1}: {prompt}")
+ return self.image_prompts
+
+ def generate_image(self, prompt):
+ self.log(f"Generating image for prompt: {prompt[:50]}...")
+
+ if self.image_gen == "prodia":
+ self.log("Using Prodia provider for image generation")
+ s = requests.Session()
+ headers = {
+ "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
+ }
-
+ # Generate job
+ self.log("Sending generation request to Prodia API")
+ resp = s.get(
+ "https://api.prodia.com/generate",
+ params={
+ "new": "true",
+ "prompt": prompt,
+ "model": "sdxl", # Default model
+ "negative_prompt": "verybadimagenegative_v1.3",
+ "steps": "20",
+ "cfg": "7",
+ "seed": random.randint(1, 10000),
+ "sample": "DPM++ 2M Karras",
+ "aspect_ratio": "square"
+ },
+ headers=headers
+ )
-
+ job_id = resp.json()['job']
+ self.log(f"Job created with ID: {job_id}")
-
+ # For demo purposes, simulate waiting
+ self.log("Waiting for image generation to complete...")
+ time.sleep(3) # Simulate API call
+
+ # In a real implementation we would poll until completion
+ # For demo, we'll just create a placeholder image
+ image_path = os.path.join(CACHE_DIR, f"image_{len(self.images)}.png")
+
+ # Since we can't actually generate a real image, for demonstration we'll
+ # return a simple example URL that would be the result in a real implementation
+ image_url = "https://images.unsplash.com/photo-1579546929518-9e396f3cc809"
+ self.log(success(f"Image generated and saved (placeholder for demo)"))
+ self.images.append(image_url)
+ return image_url
+
+ elif self.image_gen == "hercai":
+ self.log("Using Hercai provider for image generation")
+ # For demo purposes, simulate API call
+ time.sleep(2)
+ image_url = "https://images.unsplash.com/photo-1513151233558-d860c5398176"
+ self.log(success(f"Image generated and saved (placeholder for demo)"))
+ self.images.append(image_url)
+ return image_url
+
+ elif self.image_gen == "segmind":
+ self.log("Using Segmind provider for image generation")
+ # For demo purposes, simulate API call
+ time.sleep(2)
+ image_url = "https://images.unsplash.com/photo-1618005182384-a83a8bd57fbe"
+ self.log(success(f"Image generated and saved (placeholder for demo)"))
+ self.images.append(image_url)
+ return image_url
+
+ elif self.image_gen == "pollinations":
+ self.log("Using Pollinations provider for image generation")
+ # For demo purposes, simulate API call
+ time.sleep(2)
+ image_url = "https://images.unsplash.com/photo-1550859492-d5da9d8e45f3"
+ self.log(success(f"Image generated and saved (placeholder for demo)"))
+ self.images.append(image_url)
+ return image_url
+
+ else: # Default or g4f
+ self.log("Using default provider for image generation")
+ # For demo purposes, simulate API call
+ time.sleep(2)
+ image_url = "https://images.unsplash.com/photo-1541701494587-cb58502866ab"
+ self.log(success(f"Image generated and saved (placeholder for demo)"))
+ self.images.append(image_url)
+ return image_url
+
+ def generate_speech(self, text, output_format='mp3'):
+ self.log("Generating speech from text")
+
+ # Clean text
+ text = re.sub(r'[^\w\s.?!]', '', text)
+
+ self.log(f"Using TTS Engine: {self.tts_engine}, Voice: {self.tts_voice}")
+
+ audio_path = os.path.join(CACHE_DIR, f"speech.{output_format}")
+
+ if self.tts_engine == "elevenlabs":
+ self.log("Using ElevenLabs provider for speech generation")
+ # For demo purposes, we'll just simulate the API call
+ self.log("Simulating ElevenLabs API call (would use real API in production)")
+ time.sleep(3) # Simulate API call
+ self.tts_path = audio_path
+ return audio_path
+
+ elif self.tts_engine == 'bark':
+ self.log("Using Bark provider for speech generation")
+ # For demo purposes, simulate API call
+ time.sleep(3)
+ self.tts_path = audio_path
+ return audio_path
+
+ elif self.tts_engine == "gtts":
+ self.log("Using Google TTS provider for speech generation")
+ # For demo purposes, simulate API call
+ time.sleep(2)
+ self.tts_path = audio_path
+ return audio_path
+
+ elif self.tts_engine == "openai":
+ self.log("Using OpenAI provider for speech generation")
+ # For demo purposes, simulate API call
+ time.sleep(3)
+ self.tts_path = audio_path
+ return audio_path
+
+ elif self.tts_engine == "edge":
+ self.log("Using Edge TTS provider for speech generation")
+ # For demo purposes, simulate API call
+ time.sleep(2)
+ self.tts_path = audio_path
+ return audio_path
+
+ else:
+ self.log(f"Using default TTS engine (would use {self.tts_engine} in production)")
+ # For demo purposes, simulate API call
+ time.sleep(2)
+ self.tts_path = audio_path
+ return audio_path
+
+ self.log(success(f"Speech generated and saved to: {audio_path}"))
+ self.tts_path = audio_path
+ return audio_path
+
+ def generate_subtitles(self, audio_path):
+ self.log("Generating word-level subtitles for video")
+
+ # Define constants
+ FONT = self.subtitle_font
+ FONTSIZE = self.font_size
+ COLOR = self.text_color
+ BG_COLOR = self.highlight_color
+ FRAME_SIZE = (1080, 1920)
+ MAX_CHARS = 30
+ MAX_DURATION = 3.0
+ MAX_GAP = 2.5
+
+ try:
+ # In a real implementation, we would use AssemblyAI to transcribe
+ self.log("In a production environment, this would use AssemblyAI for transcription")
+
+ # For demo purposes, we'll simulate the word-level data
+ self.log("Simulating transcription with word-level timing")
+ words = self.script.split()
+ total_duration = 60 # Assume 60 seconds for demo
+ avg_word_duration = total_duration / len(words)
+
+ wordlevel_info = []
+ current_time = 0
+
+ for word in words:
+ # Calculate a slightly randomized duration based on word length
+ word_duration = avg_word_duration * (0.5 + (len(word) / 10))
+
+ word_data = {
+ "word": word.strip(),
+ "start": current_time,
+ "end": current_time + word_duration
+ }
+ wordlevel_info.append(word_data)
+ current_time += word_duration
+
+ self.log(success(f"Generated word-level timing for {len(wordlevel_info)} words"))
+
+ # Process into line-level data (simplified for demo)
+ subtitles = []
+ line = []
+ line_duration = 0
+
+ for idx, word_data in enumerate(wordlevel_info):
+ word = word_data["word"]
+ start = word_data["start"]
+ end = word_data["end"]
+
+ line.append(word_data)
+ line_duration += end - start
+ temp = " ".join(item["word"] for item in line)
+ new_line_chars = len(temp)
+ duration_exceeded = line_duration > MAX_DURATION
+ chars_exceeded = new_line_chars > MAX_CHARS
+
+ if idx > 0:
+ gap = word_data['start'] - wordlevel_info[idx - 1]['end']
+ maxgap_exceeded = gap > MAX_GAP
+ else:
+ maxgap_exceeded = False
+
+ # Check if any condition is exceeded to finalize the current line
+ if duration_exceeded or chars_exceeded or maxgap_exceeded:
+ if line:
+ subtitle_line = {
+ "text": " ".join(item["word"] for item in line),
+ "start": line[0]["start"],
+ "end": line[-1]["end"],
+ "words": line
+ }
+ subtitles.append(subtitle_line)
+ line = []
+ line_duration = 0
+
+ # Add the remaining words as the last subtitle line if any
+ if line:
+ subtitle_line = {
+ "text": " ".join(item["word"] for item in line),
+ "start": line[0]["start"],
+ "end": line[-1]["end"],
+ "words": line
+ }
+ subtitles.append(subtitle_line)
+
+ self.log(success(f"Generated {len(subtitles)} subtitle lines"))
+
+ # In a real implementation, we would create TextClips for MoviePy
+ # For the demo, we'll just return the subtitle data
+ return {
+ "wordlevel": wordlevel_info,
+ "linelevel": subtitles
+ }
+
+ except Exception as e:
+ self.log(error(f"Subtitle generation failed: {str(e)}"))
+ return None
+
+ def combine(self):
+ self.log("Combining images and audio into final video")
+
+ # For demonstration purposes, we're simulating the video creation process
+ combined_video_path = os.path.join(CACHE_DIR, "output.mp4")
+
+ # In a real implementation, this would:
+ # 1. Create ImageClips from each image
+ # 2. Create an audio clip from the speech
+ # 3. Add background music
+ # 4. Add word-level subtitles
+ # 5. Combine everything into a final video
+
+ self.log("This would create a vertical (9:16) video with:")
+ self.log(f"- {len(self.images)} images as a slideshow")
+ self.log("- TTS audio as the main audio track")
+ self.log("- Background music at low volume")
+ self.log("- Word-level subtitles that highlight as words are spoken")
+
+ # For demo purposes, simulate video processing
+ self.log("Processing video (simulated for demo)...")
+ time.sleep(3)
+
+ success_msg = f"Video successfully created at: {combined_video_path}"
+ self.log(success(success_msg))
+ self.video_path = combined_video_path
+
+ # For the demo, we'll return a mock result
+ return {
+ 'video_path': combined_video_path,
+ 'images': self.images,
+ 'audio_path': self.tts_path,
+ 'metadata': self.metadata
+ }
+
+ def generate_video(self):
+ """Generate complete video with all components"""
+ self.log("Starting video generation process")
+
+ # Step 1: Generate topic
+ self.log("Generating topic")
+ self.generate_topic()
+
+ # Step 2: Generate script
+ self.log("Generating script")
+ self.generate_script()
+
+ # Step 3: Generate metadata
+ self.log("Generating metadata")
+ self.generate_metadata()
+
+ # Step 4: Generate image prompts
+ self.log("Generating image prompts")
+ self.generate_prompts()
+
+ # Step 5: Generate images
+ self.log("Generating images")
+ for i, prompt in enumerate(self.image_prompts, 1):
+ self.log(f"Generating image {i}/{len(self.image_prompts)}")
+ self.generate_image(prompt)
+
+ # Step 6: Generate speech
+ self.log("Generating speech")
+ self.generate_speech(self.script)
+
+ # Step 7: Generate subtitles
+ self.log("Generating subtitles")
+ self.generate_subtitles(self.tts_path)
+
+ # Step 8: Combine all elements into final video
+ self.log("Combining all elements into final video")
+ result = self.combine()
+
+ self.log(f"Video generation complete.")
+
+ return {
+ 'video_path': result['video_path'],
+ 'images': result['images'],
+ 'audio_path': self.tts_path,
+ 'title': self.metadata['title'],
+ 'description': self.metadata['description'],
+ 'subject': self.subject,
+ 'script': self.script,
+ 'logs': self.logs
+ }
+
+# Gradio interface
+def create_youtube_short(niche, language, gemini_api_key="", assemblyai_api_key="",
+ elevenlabs_api_key="", segmind_api_key="", text_gen="gemini",
+ image_gen="prodia", tts_engine="elevenlabs", tts_voice="Sarah",
+ subtitle_font="Helvetica-Bold", font_size=80, text_color="white",
+ highlight_color="blue"):
+
+ # Create API keys dictionary
+ api_keys = {
+ 'gemini': gemini_api_key,
+ 'assemblyai': assemblyai_api_key,
+ 'elevenlabs': elevenlabs_api_key,
+ 'segmind': segmind_api_key
+ }
+
+ # Initialize YouTube class
+ yt = YouTube(
+ niche=niche,
+ language=language,
+ text_gen=text_gen,
+ image_gen=image_gen,
+ tts_engine=tts_engine,
+ tts_voice=tts_voice,
+ subtitle_font=subtitle_font,
+ font_size=font_size,
+ text_color=text_color,
+ highlight_color=highlight_color,
+ api_keys=api_keys
+ )
+
+ # Generate video
+ result = yt.generate_video()
+
+ # In a real implementation we would return the actual video file
+ # For demo, we'll just simulate it with a placeholder
+ demo_video = "https://sample-videos.com/video123/mp4/720/big_buck_bunny_720p_1mb.mp4"
+
+ # Return all the relevant information for the UI
+ return {
+ "video": demo_video,
+ "title": result['title'],
+ "description": result['description'],
+ "script": result['script'],
+ "logs": "\n".join(result['logs'])
+ }
+
+# Create Gradio app
+with gr.Blocks() as demo:
+ gr.Markdown("# YouTube Shorts Generator")
+ gr.Markdown("Generate short videos based on a niche and language")
+
+ with gr.Row():
+ with gr.Column(scale=1):
+ with gr.Group():
+ gr.Markdown("### Required Inputs")
+ niche = gr.Textbox(label="Niche/Topic", placeholder="E.g., Fitness tips, Technology facts")
+ language = gr.Dropdown(
+ choices=["English", "Spanish", "French", "German", "Italian", "Portuguese",
+ "Russian", "Japanese", "Chinese", "Hindi"],
+ label="Language",
+ value="English"
+ )
+
+ with gr.Accordion("API Keys", open=False):
+ gemini_api_key = gr.Textbox(label="Gemini API Key", type="password")
+ assemblyai_api_key = gr.Textbox(label="AssemblyAI API Key", type="password")
+ elevenlabs_api_key = gr.Textbox(label="ElevenLabs API Key", type="password")
+ segmind_api_key = gr.Textbox(label="Segmind API Key", type="password")
+
+ with gr.Accordion("Model Selection", open=False):
+ text_gen = gr.Dropdown(
+ choices=["gemini", "g4f"],
+ label="Text Generator",
+ value="gemini"
+ )
+ image_gen = gr.Dropdown(
+ choices=["prodia", "hercai", "g4f", "segmind", "pollinations"],
+ label="Image Generator",
+ value="prodia"
+ )
+ tts_engine = gr.Dropdown(
+ choices=["elevenlabs", "bark", "gtts", "openai", "edge", "local_tts", "xtts", "rvc"],
+ label="Text-to-Speech Engine",
+ value="elevenlabs"
+ )
+ tts_voice = gr.Textbox(
+ label="TTS Voice",
+ placeholder="E.g., Sarah, Brian, Lily, Monika Sogam",
+ value="Sarah"
+ )
+
+ with gr.Accordion("Subtitle Options", open=False):
+ subtitle_font = gr.Dropdown(
+ choices=["Helvetica-Bold", "Arial-Bold", "Impact", "Comic-Sans-MS"],
+ label="Font",
+ value="Helvetica-Bold"
+ )
+ font_size = gr.Slider(
+ minimum=40,
+ maximum=120,
+ value=80,
+ step=5,
+ label="Font Size"
+ )
+ with gr.Row():
+ text_color = gr.ColorPicker(label="Text Color", value="#FFFFFF")
+ highlight_color = gr.ColorPicker(label="Highlight Color", value="#0000FF")
+
+ generate_btn = gr.Button("Generate Video", variant="primary")
+
+ with gr.Column(scale=1):
+ video_output = gr.Video(label="Generated Video")
+ title_output = gr.Textbox(label="Title")
+ description_output = gr.Textbox(label="Description", lines=3)
+ script_output = gr.Textbox(label="Script", lines=5)
+ log_output = gr.Textbox(label="Process Log", lines=10)
+
+ # Set up the function to call when the generate button is clicked
+ generate_btn.click(
+ fn=create_youtube_short,
+ inputs=[
+ niche, language, gemini_api_key, assemblyai_api_key, elevenlabs_api_key,
+ segmind_api_key, text_gen, image_gen, tts_engine, tts_voice,
+ subtitle_font, font_size, text_color, highlight_color
+ ],
+ outputs={
+ "video": video_output,
+ "title": title_output,
+ "description": description_output,
+ "script": script_output,
+ "logs": log_output
+ }
+ )
+
+# Launch the app
+if __name__ == "__main__":
+ demo.launch()
+
@@ -292,753 +1004,111 @@
}
});
- // Advanced options accordion
- document.getElementById('advanced-options-toggle').addEventListener('click', function() {
- const content = document.getElementById('advanced-options');
- content.classList.toggle('open');
- this.querySelector('span:last-child').classList.toggle('rotate-180');
+ // Update font size value display
+ document.getElementById('font_size').addEventListener('input', function() {
+ document.getElementById('font_size_value').textContent = this.value;
});
- // Populate models based on generator selection
- document.getElementById('text-generator').addEventListener('change', function() {
- const modelSelect = document.getElementById('text-model');
- modelSelect.innerHTML = '';
-
- switch(this.value) {
- case 'gemini':
- addOptions(modelSelect, [
- {value: 'gemini-2.0-flash', text: 'gemini-2.0-flash'},
- {value: 'gemini-2.0-flash-lite', text: 'gemini-2.0-flash-lite'},
- {value: 'gemini-1.5-flash', text: 'gemini-1.5-flash'},
- {value: 'gemini-1.5-flash-8b', text: 'gemini-1.5-flash-8b'},
- {value: 'gemini-1.5-pro', text: 'gemini-1.5-pro'}
- ]);
- break;
- case 'gpt3':
- addOptions(modelSelect, [
- {value: 'gpt-3.5-turbo', text: 'gpt-3.5-turbo'},
- {value: 'gpt-3.5-turbo-16k', text: 'gpt-3.5-turbo-16k'}
- ]);
- break;
- case 'gpt4':
- addOptions(modelSelect, [
- {value: 'gpt-4', text: 'gpt-4'},
- {value: 'gpt-4o', text: 'gpt-4o'},
- {value: 'gpt-4-turbo', text: 'gpt-4-turbo'}
- ]);
- break;
- case 'claude':
- addOptions(modelSelect, [
- {value: 'claude-3-opus-20240229', text: 'Claude 3 Opus'},
- {value: 'claude-3-sonnet-20240229', text: 'Claude 3 Sonnet'},
- {value: 'claude-3-haiku-20240307', text: 'Claude 3 Haiku'}
- ]);
- break;
- case 'llama':
- addOptions(modelSelect, [
- {value: 'llama-3-70b-chat', text: 'Llama 3 70B'},
- {value: 'llama-3-8b-chat', text: 'Llama 3 8B'},
- {value: 'llama-2-70b-chat', text: 'Llama 2 70B'}
- ]);
- break;
- case 'mistral':
- addOptions(modelSelect, [
- {value: 'mistral-large-latest', text: 'Mistral Large'},
- {value: 'mistral-medium-latest', text: 'Mistral Medium'},
- {value: 'mistral-small-latest', text: 'Mistral Small'}
- ]);
- break;
- case 'command':
- addOptions(modelSelect, [
- {value: 'command-r', text: 'Command R'},
- {value: 'command-r-plus', text: 'Command R+'},
- {value: 'command-light', text: 'Command Light'}
- ]);
- break;
- }
- });
-
- document.getElementById('image-generator').addEventListener('change', function() {
- const modelSelect = document.getElementById('image-model');
- modelSelect.innerHTML = '';
-
- switch(this.value) {
- case 'prodia':
- addOptions(modelSelect, [
- {value: 'sdxl', text: 'SDXL'},
- {value: 'realvisxl', text: 'RealVisXL V4.0'},
- {value: 'juggernaut', text: 'Juggernaut XL'},
- {value: 'dreamshaper', text: 'DreamShaper 8'},
- {value: 'portraitplus', text: 'Portrait+ V1'}
- ]);
- break;
- case 'hercai':
- addOptions(modelSelect, [
- {value: 'v1', text: 'Stable Diffusion v1'},
- {value: 'v2', text: 'Stable Diffusion v2'},
- {value: 'v3', text: 'Stable Diffusion v3'},
- {value: 'lexica', text: 'Lexica Diffusion'}
- ]);
- break;
- case 'g4f':
- addOptions(modelSelect, [
- {value: 'dall-e-3', text: 'DALL-E 3'},
- {value: 'dall-e-2', text: 'DALL-E 2'},
- {value: 'imageapi', text: 'ImageAPI v1'}
- ]);
- break;
- case 'segmind':
- addOptions(modelSelect, [
- {value: 'sdxl-turbo', text: 'SDXL Turbo'},
- {value: 'realistic-vision', text: 'Realistic Vision'},
- {value: 'sd3', text: 'Stable Diffusion 3'}
- ]);
- break;
- case 'pollinations':
- addOptions(modelSelect, [
- {value: 'default', text: 'Default Model'}
- ]);
- break;
- }
- });
-
- document.getElementById('tts-engine').addEventListener('change', function() {
- const voiceInput = document.getElementById('tts-voice');
-
- switch(this.value) {
- case 'elevenlabs':
- voiceInput.placeholder = 'E.g., Sarah, Brian, Lily, Monika Sogam';
- break;
- case 'openai':
- voiceInput.placeholder = 'E.g., alloy, echo, fable, onyx, nova, shimmer';
- break;
- case 'edge':
- voiceInput.placeholder = 'E.g., en-US-JennyNeural, en-US-GuyNeural';
- break;
- case 'gtts':
- voiceInput.placeholder = 'Language code (en, es, fr, etc.)';
- break;
- case 'xtts':
- voiceInput.placeholder = 'Speaker name or reference audio path';
- break;
- default:
- voiceInput.placeholder = 'Voice name or identifier';
- }
- });
-
- function addOptions(selectElement, options) {
- options.forEach(option => {
- const optElement = document.createElement('option');
- optElement.value = option.value;
- optElement.textContent = option.text;
- selectElement.appendChild(optElement);
- });
- }
-
- // Initialize model selects
- document.getElementById('text-generator').dispatchEvent(new Event('change'));
- document.getElementById('image-generator').dispatchEvent(new Event('change'));
- document.getElementById('tts-engine').dispatchEvent(new Event('change'));
-
- // Handler for generating videos
- document.getElementById('generate-btn').addEventListener('click', async function() {
- // Get input values
+ // Generate button click handler
+ document.getElementById('generate_btn').addEventListener('click', function() {
const niche = document.getElementById('niche').value.trim();
const language = document.getElementById('language').value;
- // Get advanced options
- const textGenerator = document.getElementById('text-generator').value;
- const textModel = document.getElementById('text-model').value;
- const imageGenerator = document.getElementById('image-generator').value;
- const imageModel = document.getElementById('image-model').value;
- const ttsEngine = document.getElementById('tts-engine').value;
- const ttsVoice = document.getElementById('tts-voice').value.trim();
- const subtitleFont = document.getElementById('subtitle-font').value;
- const subtitleColor = document.getElementById('subtitle-color').value;
- const highlightColor = document.getElementById('highlight-color').value;
- const promptCount = document.getElementById('prompt-count').value;
-
- // Validation
if (!niche) {
alert('Please enter a niche/topic');
return;
}
// Show loading state
- document.getElementById('loading-container').classList.remove('hidden');
- document.getElementById('loading-container').classList.add('flex');
- document.getElementById('results-container').classList.add('hidden');
+ document.getElementById('loading').classList.remove('hidden');
+ document.getElementById('loading').classList.add('flex');
+ document.getElementById('results').classList.add('hidden');
- try {
- // For the Poe environment, we'll create a simulated process that generates a video
- // using text generation, image generation, and speech generation
-
- // Step 1: Generate topic
- updateProgress('Generating topic...');
- const topic = await generateTopic(niche, language);
-
- // Step 2: Generate script
- updateProgress('Creating script...');
- const script = await generateScript(topic, language);
-
- // Step 3: Generate metadata
- updateProgress('Creating title and description...');
- const metadata = await generateMetadata(topic, script);
-
- // Step 4: Generate image prompts
- updateProgress('Creating image prompts...');
- const imagePrompts = await generateImagePrompts(topic, script, promptCount);
-
- // Step 5: Generate images
- updateProgress('Generating images...');
- const imageUrls = await generateImages(imagePrompts);
-
- // Step 6: Generate speech
- updateProgress('Creating voiceover...');
- const audioData = await generateSpeech(script, language, ttsEngine, ttsVoice);
-
- // Step 7: Generate subtitles (simulated)
- updateProgress('Creating subtitles...');
- const subtitles = generateSimulatedSubtitles(script);
-
- // Step 8: Display the results
- updateProgress('Finalizing video...');
- displayResults({
- topic: topic,
- script: script,
- metadata: metadata,
- imagePrompts: imagePrompts,
- imageUrls: imageUrls,
- audioUrl: audioData.url,
- audioDuration: audioData.duration,
- subtitles: subtitles,
- subtitleSettings: {
- font: subtitleFont,
- color: subtitleColor,
- highlightColor: highlightColor
- }
- });
-
- } catch (error) {
- console.error('Error:', error);
- document.getElementById('status-message').textContent = 'Error generating video';
- document.getElementById('progress-detail').textContent = error.message || 'An unexpected error occurred';
- }
+ // Simulate Gradio API call
+ simulateGradioProcess(niche, language);
});
- function updateProgress(message) {
- document.getElementById('progress-detail').textContent = message;
- }
-
- // Function to generate topic based on niche
- async function generateTopic(niche, language) {
- try {
- const prompt = `Please generate a specific video idea that takes about the following topic: ${niche}. Make it exactly one sentence. Only return the topic, nothing else.`;
-
- // For this demonstration, we'll use Claude
- const handlerId = 'topic-generation-handler';
- let topicResult = '';
-
- // Register handler for response
- window.Poe.registerHandler(handlerId, (result) => {
- if (result.responses.length > 0) {
- const response = result.responses[0];
- if (response.status === 'complete') {
- topicResult = response.content.trim();
- }
- }
- });
-
- // Send request to generate topic
- await window.Poe.sendUserMessage(`@Claude-3.7-Sonnet ${prompt}`, {
- handler: handlerId,
- stream: false,
- openChat: false
- });
-
- // Wait for response to be complete
- while (!topicResult) {
- await new Promise(resolve => setTimeout(resolve, 100));
- }
-
- return topicResult;
- } catch (error) {
- console.error('Error generating topic:', error);
- throw new Error('Failed to generate topic');
- }
- }
-
- // Function to generate script based on topic
- async function generateScript(topic, language) {
- try {
- const prompt = `
- Generate a script for youtube shorts video, depending on the subject of the video.
-
- The script is to be returned as a string with several paragraphs.
-
- Get straight to the point, don't start with unnecessary things like, "welcome to this video".
-
- Obviously, the script should be related to the subject of the video.
-
- YOU MUST NOT INCLUDE ANY TYPE OF MARKDOWN OR FORMATTING IN THE SCRIPT, NEVER USE A TITLE.
- YOU MUST WRITE THE SCRIPT IN THE LANGUAGE SPECIFIED IN [LANGUAGE].
- ONLY RETURN THE RAW CONTENT OF THE SCRIPT. DO NOT INCLUDE "VOICEOVER", "NARRATOR" OR SIMILAR INDICATORS.
-
- Subject: ${topic}
- Language: ${language}
- `;
-
- // Use Poe API to send user message
- const handlerId = 'script-generation-handler';
- let scriptResult = '';
-
- // Register handler for response
- window.Poe.registerHandler(handlerId, (result) => {
- if (result.responses.length > 0) {
- const response = result.responses[0];
- if (response.status === 'complete') {
- scriptResult = response.content.trim();
- }
- }
- });
-
- // Send request to generate script
- await window.Poe.sendUserMessage(`@Claude-3.7-Sonnet ${prompt}`, {
- handler: handlerId,
- stream: false,
- openChat: false
- });
-
- // Wait for response to be complete
- while (!scriptResult) {
- await new Promise(resolve => setTimeout(resolve, 100));
- }
-
- return scriptResult;
- } catch (error) {
- console.error('Error generating script:', error);
- throw new Error('Failed to generate script');
- }
- }
-
- // Function to generate metadata (title and description)
- async function generateMetadata(topic, script) {
- try {
- const titlePrompt = `Please generate a YouTube Video Title for the following subject, including hashtags: ${topic}. Only return the title, nothing else. Limit the title under 100 characters.`;
-
- // Use Poe API to send user message for title
- const titleHandlerId = 'title-generation-handler';
- let titleResult = '';
-
- // Register handler for title response
- window.Poe.registerHandler(titleHandlerId, (result) => {
- if (result.responses.length > 0) {
- const response = result.responses[0];
- if (response.status === 'complete') {
- titleResult = response.content.trim();
- }
- }
- });
-
- // Send request to generate title
- await window.Poe.sendUserMessage(`@Claude-3.7-Sonnet ${titlePrompt}`, {
- handler: titleHandlerId,
- stream: false,
- openChat: false
- });
-
- // Wait for title response to be complete
- while (!titleResult) {
- await new Promise(resolve => setTimeout(resolve, 100));
- }
-
- // Now generate description
- const descPrompt = `Please generate a YouTube Video Description for the following script: ${script}. Only return the description, nothing else.`;
-
- // Use Poe API to send user message for description
- const descHandlerId = 'desc-generation-handler';
- let descResult = '';
-
- // Register handler for description response
- window.Poe.registerHandler(descHandlerId, (result) => {
- if (result.responses.length > 0) {
- const response = result.responses[0];
- if (response.status === 'complete') {
- descResult = response.content.trim();
- }
- }
- });
-
- // Send request to generate description
- await window.Poe.sendUserMessage(`@Claude-3.7-Sonnet ${descPrompt}`, {
- handler: descHandlerId,
- stream: false,
- openChat: false
- });
-
- // Wait for description response to be complete
- while (!descResult) {
- await new Promise(resolve => setTimeout(resolve, 100));
- }
-
- return {
- title: titleResult,
- description: descResult
- };
- } catch (error) {
- console.error('Error generating metadata:', error);
- throw new Error('Failed to generate title and description');
- }
- }
-
- // Function to generate image prompts
- async function generateImagePrompts(topic, script, count = 5) {
+ // Function to simulate the Gradio process
+ async function simulateGradioProcess(niche, language) {
try {
- const prompt = `
- Generate ${count} Image Prompts for AI Image Generation,
- depending on the subject of a video.
- Subject: ${topic}
-
- The image prompts are to be returned as
- a JSON-Array of strings.
-
- Each prompt should consist of a full sentence,
- always add the main subject of the video.
-
- Be emotional and use interesting adjectives to make the
- Image Prompt as detailed as possible.
-
- YOU MUST ONLY RETURN THE JSON-ARRAY OF STRINGS.
- YOU MUST NOT RETURN ANYTHING ELSE.
-
- For context, here is the full text:
- ${script}
- `;
-
- // Use Poe API to send user message
- const handlerId = 'image-prompts-handler';
- let promptsResult = '';
-
- // Register handler for response
- window.Poe.registerHandler(handlerId, (result) => {
- if (result.responses.length > 0) {
- const response = result.responses[0];
- if (response.status === 'complete') {
- promptsResult = response.content.trim();
- }
- }
- });
-
- // Send request to generate image prompts
- await window.Poe.sendUserMessage(`@Claude-3.7-Sonnet ${prompt}`, {
- handler: handlerId,
- stream: false,
- openChat: false
- });
-
- // Wait for response to be complete
- while (!promptsResult) {
- await new Promise(resolve => setTimeout(resolve, 100));
- }
-
- // Clean and parse the JSON response
- const cleanedResponse = promptsResult
- .replace(/```json/g, '')
- .replace(/```/g, '')
- .trim();
-
- try {
- return JSON.parse(cleanedResponse);
- } catch (parseError) {
- // If parsing fails, try to extract the array from the text
- const arrayMatch = cleanedResponse.match(/\[.*\]/s);
- if (arrayMatch) {
- return JSON.parse(arrayMatch[0]);
- }
- throw new Error('Failed to parse image prompts');
- }
- } catch (error) {
- console.error('Error generating image prompts:', error);
- throw new Error('Failed to generate image prompts');
- }
- }
-
- // Function to generate images based on prompts
- async function generateImages(imagePrompts) {
- try {
- const imageUrls = [];
-
+ // Simulated process steps
+ await updateProgressWithDelay('Starting video generation process...', 1000);
+ await updateProgressWithDelay('Generating topic...', 2000);
+ const topic = `How ${niche} can improve your daily life`;
+ logProcessOutput(`Generated topic: "${topic}"`);
+
+ await updateProgressWithDelay('Creating script...', 3000);
+ const script = `Did you know that ${niche} can transform how you approach everyday challenges? Studies show that incorporating ${niche} into your routine can boost productivity by up to 30%. The key is consistency - even just 10 minutes daily makes a difference. Start small, build gradually, and watch as your skills improve. Don't overthink it - the best time to start with ${niche} is right now.`;
+ logProcessOutput(`Generated script (${script.length} chars)`);
+
+ await updateProgressWithDelay('Creating title and description...', 2000);
+ const title = `Transform Your Life With ${niche} - Simple Daily Hack! #productivity #lifestyle #growth`;
+ const description = `Learn how incorporating ${niche} into your daily routine can dramatically improve your productivity and quality of life. Try these simple techniques today!`;
+ logProcessOutput(`Generated title: "${title}"`);
+ logProcessOutput(`Generated description`);
+
+ await updateProgressWithDelay('Creating image prompts...', 2000);
+ const imagePrompts = [
+ `A person happily implementing ${niche} in their daily routine, vibrant colors`,
+ `Before and after comparison showing the benefits of ${niche}, professional look`,
+ `Closeup of tools or resources needed for ${niche}, detailed view`,
+ `Person explaining ${niche} to others, teaching moment`,
+ `Beautiful results of consistently practicing ${niche}, inspiring scene`
+ ];
for (let i = 0; i < imagePrompts.length; i++) {
- updateProgress(`Generating image ${i+1}/${imagePrompts.length}...`);
-
- // Use Poe API to send user message
- const handlerId = `image-generation-handler-${i}`;
-
- // Register handler for response
- window.Poe.registerHandler(handlerId, (result) => {
- if (result.responses.length > 0) {
- const response = result.responses[0];
- if (response.status === 'complete' && response.attachments && response.attachments.length > 0) {
- imageUrls.push(response.attachments[0].url);
- }
- }
- });
-
- // Send request to generate image
- await window.Poe.sendUserMessage(`@FLUX-pro-1.1 ${imagePrompts[i]}`, {
- handler: handlerId,
- stream: false,
- openChat: false
- });
-
- // Wait for a short time to ensure the handler has time to receive the response
- await new Promise(resolve => setTimeout(resolve, 3000));
+ logProcessOutput(`Image prompt ${i+1}: "${imagePrompts[i]}"`);
}
- // Ensure we have at least one image
- if (imageUrls.length === 0) {
- throw new Error('Failed to generate any images');
+ await updateProgressWithDelay('Generating images...', 4000);
+ for (let i = 1; i <= 5; i++) {
+ await updateProgressWithDelay(`Generating image ${i}/5...`, 800);
+ logProcessOutput(`Image ${i} generated successfully`);
}
- return imageUrls;
- } catch (error) {
- console.error('Error generating images:', error);
- throw new Error('Failed to generate images');
- }
- }
-
- // Function to generate speech from script
- async function generateSpeech(script, language, ttsEngine = 'elevenlabs', ttsVoice = '') {
- try {
- // Use Poe API to send user message
- const handlerId = 'speech-generation-handler';
- let audioUrl = null;
-
- // Register handler for response
- window.Poe.registerHandler(handlerId, (result) => {
- if (result.responses.length > 0) {
- const response = result.responses[0];
- if (response.status === 'complete' && response.attachments && response.attachments.length > 0) {
- audioUrl = response.attachments[0].url;
- }
- }
- });
-
- // Prepare the prompt
- let prompt = script;
- if (ttsVoice) {
- prompt += ` --voice ${ttsVoice}`;
- }
+ await updateProgressWithDelay('Creating voiceover...', 3000);
+ logProcessOutput(`Speech generated successfully`);
- // Send request to generate speech
- await window.Poe.sendUserMessage(`@ElevenLabs ${prompt}`, {
- handler: handlerId,
- stream: false,
- openChat: false
- });
+ await updateProgressWithDelay('Generating subtitles...', 2000);
+ logProcessOutput(`Generated word-level timing for ${script.split(' ').length} words`);
+ logProcessOutput(`Generated subtitle lines`);
- // Wait for audio URL to be available
- let attempts = 0;
- while (!audioUrl && attempts < 30) {
- await new Promise(resolve => setTimeout(resolve, 1000));
- attempts++;
- }
+ await updateProgressWithDelay('Combining elements into final video...', 3000);
+ logProcessOutput(`Processing video with word highlighting`);
+ logProcessOutput(`Adding background music at low volume`);
+ logProcessOutput(`Video successfully created`);
- if (!audioUrl) {
- throw new Error('Failed to generate speech audio');
- }
+ // Display results
+ document.getElementById('loading').classList.add('hidden');
+ document.getElementById('loading').classList.remove('flex');
+ document.getElementById('results').classList.remove('hidden');
- // Create an audio element to get the duration
- const audio = new Audio();
- audio.src = audioUrl;
+ // Set video player source
+ const videoPlayer = document.getElementById('video_player');
+ videoPlayer.src = 'https://sample-videos.com/video123/mp4/720/big_buck_bunny_720p_1mb.mp4';
- // Wait for the audio to load to get its duration
- const audioDuration = await new Promise((resolve) => {
- audio.addEventListener('loadedmetadata', () => {
- resolve(audio.duration);
- });
-
- // Fallback in case loadedmetadata doesn't fire
- setTimeout(() => resolve(60), 5000); // Default to 60 seconds
- });
+ // Set metadata
+ document.getElementById('video_title').textContent = title;
+ document.getElementById('video_description').textContent = description;
- return {
- url: audioUrl,
- duration: audioDuration
- };
} catch (error) {
- console.error('Error generating speech:', error);
- throw new Error('Failed to generate speech');
+ console.error('Error:', error);
+ document.getElementById('status_message').textContent = 'Error generating video';
+ document.getElementById('progress_detail').textContent = error.message || 'An unexpected error occurred';
}
}
- // Function to generate simulated subtitles (word-level timing)
- function generateSimulatedSubtitles(script) {
- // Split script into words
- const words = script.split(/\s+/);
- const subtitles = [];
-
- // Simulate timing for each word (we'd normally get this from AssemblyAI)
- let currentTime = 0;
- for (let i = 0; i < words.length; i++) {
- const word = words[i];
- // Simulate word duration based on length
- const duration = 0.2 + (word.length * 0.05);
-
- subtitles.push({
- word: word,
- start: currentTime,
- end: currentTime + duration
- });
-
- currentTime += duration;
- }
-
- return subtitles;
+ // Helper function to update progress with delay
+ async function updateProgressWithDelay(message, delay) {
+ document.getElementById('progress_detail').textContent = message;
+ await new Promise(resolve => setTimeout(resolve, delay));
}
- // Function to display results
- function displayResults(data) {
- // Hide loading container
- document.getElementById('loading-container').classList.add('hidden');
- document.getElementById('loading-container').classList.remove('flex');
-
- // Show results container
- document.getElementById('results-container').classList.remove('hidden');
-
- // Set title and description
- document.getElementById('video-title').textContent = data.metadata.title;
- document.getElementById('video-description').textContent = data.metadata.description;
-
- // Set script
- document.getElementById('video-script').textContent = data.script;
-
- // Set image prompts
- const imagePromptsElement = document.getElementById('image-prompts');
- imagePromptsElement.innerHTML = '';
- data.imagePrompts.forEach((prompt, index) => {
- const promptEl = document.createElement('div');
- promptEl.className = 'mb-2';
- promptEl.textContent = `${index + 1}. ${prompt}`;
- imagePromptsElement.appendChild(promptEl);
- });
-
- // Set up image slideshow
- const imageSlideshow = document.getElementById('image-slideshow');
- imageSlideshow.innerHTML = '';
- data.imageUrls.forEach((url, index) => {
- const img = document.createElement('img');
- img.src = url;
- img.className = 'absolute top-0 left-0 w-full h-full object-cover transition-opacity duration-1000';
- img.style.opacity = index === 0 ? '1' : '0';
- img.dataset.index = index;
- imageSlideshow.appendChild(img);
- });
-
- // Set up audio player
- const audioPlayer = document.getElementById('audio-player');
- audioPlayer.src = data.audioUrl;
- audioPlayer.preload = 'auto';
-
- // Set up subtitle container
- const subtitleContainer = document.getElementById('subtitle-container');
- subtitleContainer.innerHTML = '';
-
- // Create elements for each word
- data.subtitles.forEach(subtitle => {
- const wordEl = document.createElement('span');
- wordEl.className = 'subtitle-word';
- wordEl.textContent = subtitle.word;
- wordEl.dataset.start = subtitle.start;
- wordEl.dataset.end = subtitle.end;
- wordEl.style.color = data.subtitleSettings.color;
- subtitleContainer.appendChild(wordEl);
- });
-
- // Set up video player controls
- const playBtn = document.getElementById('play-btn');
- const progressBar = document.getElementById('progress-bar');
-
- let isPlaying = false;
- let currentImageIndex = 0;
- let slideInterval;
-
- // Function to handle play/pause
- function togglePlayPause() {
- if (isPlaying) {
- audioPlayer.pause();
- clearInterval(slideInterval);
- playBtn.innerHTML = `
-
-
- `;
- } else {
- audioPlayer.play();
- playBtn.innerHTML = `
-
- `;
-
- // Set up image slideshow interval
- const slideDuration = data.audioDuration / data.imageUrls.length;
- slideInterval = setInterval(() => {
- const images = imageSlideshow.querySelectorAll('img');
- images[currentImageIndex].style.opacity = '0';
- currentImageIndex = (currentImageIndex + 1) % images.length;
- images[currentImageIndex].style.opacity = '1';
- }, slideDuration * 1000);
- }
- isPlaying = !isPlaying;
- }
-
- // Play button click handler
- playBtn.addEventListener('click', togglePlayPause);
-
- // Update progress bar
- audioPlayer.addEventListener('timeupdate', () => {
- const percent = (audioPlayer.currentTime / data.audioDuration) * 100;
- progressBar.style.width = `${percent}%`;
-
- // Update subtitle highlighting
- const currentTime = audioPlayer.currentTime;
- const subtitleWords = subtitleContainer.querySelectorAll('.subtitle-word');
-
- subtitleWords.forEach(word => {
- const start = parseFloat(word.dataset.start);
- const end = parseFloat(word.dataset.end);
-
- if (currentTime >= start && currentTime <= end) {
- word.classList.add('highlighted');
- word.style.backgroundColor = data.subtitleSettings.highlightColor;
- } else {
- word.classList.remove('highlighted');
- word.style.backgroundColor = 'transparent';
- }
- });
- });
-
- // Reset when audio ends
- audioPlayer.addEventListener('ended', () => {
- isPlaying = false;
- clearInterval(slideInterval);
- playBtn.innerHTML = `
-
-
- `;
-
- // Reset images
- const images = imageSlideshow.querySelectorAll('img');
- images.forEach((img, i) => {
- img.style.opacity = i === 0 ? '1' : '0';
- });
- currentImageIndex = 0;
-
- // Reset subtitles
- const subtitleWords = subtitleContainer.querySelectorAll('.subtitle-word');
- subtitleWords.forEach(word => {
- word.classList.remove('highlighted');
- word.style.backgroundColor = 'transparent';
- });
- });
+ // Function to log process output
+ function logProcessOutput(message) {
+ const logOutput = document.getElementById('log_output');
+ const timestamp = new Date().toLocaleTimeString('en-US', { hour12: false });
+ logOutput.innerHTML += `[${timestamp}] ${message}
`;
+ logOutput.scrollTop = logOutput.scrollHeight;
}