Spaces:
Build error
Build error
<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<meta charset="UTF-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
<title>YouTube Shorts Generator - Gradio App</title> | |
<script src="https://cdn.tailwindcss.com"></script> | |
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script> | |
<script src="https://cdn.jsdelivr.net/npm/@gradio/[email protected]/dist/index.js"></script> | |
<style> | |
/* Custom styles to make it look like Gradio */ | |
.gradio-container { | |
font-family: 'Source Sans Pro', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', sans-serif; | |
} | |
.gradio-button { | |
background-color: #2A6AC7; | |
color: white; | |
border: none; | |
border-radius: 4px; | |
padding: 10px 20px; | |
font-weight: 600; | |
cursor: pointer; | |
transition: background-color 0.3s; | |
} | |
.gradio-button:hover { | |
background-color: #1D5BBF; | |
} | |
.gradio-button:disabled { | |
background-color: #B2C9F1; | |
cursor: not-allowed; | |
} | |
.gradio-checkbox { | |
width: 18px; | |
height: 18px; | |
margin-right: 10px; | |
} | |
.gradio-slider::-webkit-slider-thumb { | |
background: #2A6AC7; | |
} | |
.dark .gradio-container { | |
background-color: #0B0F19; | |
color: #F3F4F7; | |
} | |
.dark .gradio-input { | |
background-color: #1F2937; | |
border-color: #374151; | |
color: #F9FAFB; | |
} | |
.loading-spinner { | |
border: 4px solid rgba(0, 0, 0, 0.1); | |
border-left-color: #2A6AC7; | |
border-radius: 50%; | |
width: 30px; | |
height: 30px; | |
animation: spin 1s linear infinite; | |
} | |
to { transform: rotate(360deg); } | |
} | |
.dark .loading-spinner { | |
border-color: rgba(255, 255, 255, 0.1); | |
border-left-color: #2A6AC7; | |
} | |
</style> | |
</head> | |
<body class="bg-white dark:bg-gray-900 text-gray-800 dark:text-gray-200 min-h-screen"> | |
<div class="container mx-auto px-4 py-8 max-w-5xl gradio-container"> | |
<h1 class="text-3xl font-bold mb-2 text-center">YouTube Shorts Generator</h1> | |
<p class="text-center mb-6 text-gray-600 dark:text-gray-400">Generate short videos based on a niche and language</p> | |
<div class="grid grid-cols-1 md:grid-cols-5 gap-6"> | |
<!-- Left Column: Inputs --> | |
<div class="md:col-span-2 bg-gray-100 dark:bg-gray-800 p-6 rounded-lg shadow-md"> | |
<!-- Required Inputs Section --> | |
<div class="mb-6"> | |
<h2 class="text-xl font-semibold mb-4 pb-2 border-b border-gray-300 dark:border-gray-700">Required Inputs</h2> | |
<div class="mb-4"> | |
<label for="niche" class="block text-sm font-medium mb-1">Niche/Topic <span class="text-red-500">*</span></label> | |
<input type="text" id="niche" class="w-full px-4 py-2 rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-700 text-base gradio-input" placeholder="E.g., Fitness tips, Technology facts"> | |
</div> | |
<div class="mb-4"> | |
<label for="language" class="block text-sm font-medium mb-1">Language <span class="text-red-500">*</span></label> | |
<select id="language" class="w-full px-4 py-2 rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-700 text-base gradio-input"> | |
<option value="English">English</option> | |
<option value="Spanish">Spanish</option> | |
<option value="French">French</option> | |
<option value="German">German</option> | |
<option value="Italian">Italian</option> | |
<option value="Portuguese">Portuguese</option> | |
<option value="Russian">Russian</option> | |
<option value="Japanese">Japanese</option> | |
<option value="Chinese">Chinese</option> | |
<option value="Hindi">Hindi</option> | |
</select> | |
</div> | |
</div> | |
<!-- API Keys Section --> | |
<div class="mb-6"> | |
<h2 class="text-xl font-semibold mb-4 pb-2 border-b border-gray-300 dark:border-gray-700">API Keys</h2> | |
<div class="mb-4"> | |
<label for="gemini_api_key" class="block text-sm font-medium mb-1">Gemini API Key</label> | |
<input type="password" id="gemini_api_key" class="w-full px-4 py-2 rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-700 text-base gradio-input" placeholder="Enter your Gemini API key"> | |
</div> | |
<div class="mb-4"> | |
<label for="assemblyai_api_key" class="block text-sm font-medium mb-1">AssemblyAI API Key</label> | |
<input type="password" id="assemblyai_api_key" class="w-full px-4 py-2 rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-700 text-base gradio-input" placeholder="Enter your AssemblyAI API key"> | |
</div> | |
<div class="mb-4"> | |
<label for="elevenlabs_api_key" class="block text-sm font-medium mb-1">ElevenLabs API Key</label> | |
<input type="password" id="elevenlabs_api_key" class="w-full px-4 py-2 rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-700 text-base gradio-input" placeholder="Enter your ElevenLabs API key"> | |
</div> | |
<div class="mb-4"> | |
<label for="segmind_api_key" class="block text-sm font-medium mb-1">Segmind API Key</label> | |
<input type="password" id="segmind_api_key" class="w-full px-4 py-2 rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-700 text-base gradio-input" placeholder="Enter your Segmind API key"> | |
</div> | |
</div> | |
<!-- Model Selection Section --> | |
<div class="mb-6"> | |
<h2 class="text-xl font-semibold mb-4 pb-2 border-b border-gray-300 dark:border-gray-700">Model Selection</h2> | |
<div class="mb-4"> | |
<label for="text_gen" class="block text-sm font-medium mb-1">Text Generator</label> | |
<select id="text_gen" class="w-full px-4 py-2 rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-700 text-base gradio-input"> | |
<option value="gemini">Gemini</option> | |
<option value="g4f">G4F Models</option> | |
</select> | |
</div> | |
<div class="mb-4"> | |
<label for="image_gen" class="block text-sm font-medium mb-1">Image Generator</label> | |
<select id="image_gen" class="w-full px-4 py-2 rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-700 text-base gradio-input"> | |
<option value="prodia">Prodia</option> | |
<option value="hercai">Hercai</option> | |
<option value="g4f">G4F</option> | |
<option value="segmind">Segmind</option> | |
<option value="pollinations">Pollinations</option> | |
</select> | |
</div> | |
<div class="mb-4"> | |
<label for="tts_engine" class="block text-sm font-medium mb-1">Text-to-Speech Engine</label> | |
<select id="tts_engine" class="w-full px-4 py-2 rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-700 text-base gradio-input"> | |
<option value="elevenlabs">ElevenLabs</option> | |
<option value="bark">Bark</option> | |
<option value="gtts">Google TTS</option> | |
<option value="openai">OpenAI TTS</option> | |
<option value="edge">Edge TTS</option> | |
<option value="local_tts">Local TTS</option> | |
<option value="xtts">XTTS</option> | |
<option value="rvc">RVC</option> | |
</select> | |
</div> | |
<div class="mb-4"> | |
<label for="tts_voice" class="block text-sm font-medium mb-1">TTS Voice</label> | |
<input type="text" id="tts_voice" class="w-full px-4 py-2 rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-700 text-base gradio-input" placeholder="E.g., Sarah, Brian, Lily, Monika Sogam"> | |
</div> | |
</div> | |
<!-- Subtitle Options Section --> | |
<div class="mb-6"> | |
<h2 class="text-xl font-semibold mb-4 pb-2 border-b border-gray-300 dark:border-gray-700">Subtitle Options</h2> | |
<div class="mb-4"> | |
<label for="subtitle_font" class="block text-sm font-medium mb-1">Font</label> | |
<select id="subtitle_font" class="w-full px-4 py-2 rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-700 text-base gradio-input"> | |
<option value="Helvetica-Bold">Helvetica Bold</option> | |
<option value="Arial-Bold">Arial Bold</option> | |
<option value="Impact">Impact</option> | |
<option value="Comic-Sans-MS">Comic Sans MS</option> | |
</select> | |
</div> | |
<div class="mb-4"> | |
<label for="font_size" class="block text-sm font-medium mb-1">Font Size: <span id="font_size_value">80</span></label> | |
<input type="range" id="font_size" min="40" max="120" value="80" class="w-full gradio-slider"> | |
</div> | |
<div class="grid grid-cols-2 gap-4 mb-4"> | |
<div> | |
<label for="text_color" class="block text-sm font-medium mb-1">Text Color</label> | |
<input type="color" id="text_color" value="#FFFFFF" class="w-full h-10 gradio-input"> | |
</div> | |
<div> | |
<label for="highlight_color" class="block text-sm font-medium mb-1">Highlight Color</label> | |
<input type="color" id="highlight_color" value="#0000FF" class="w-full h-10 gradio-input"> | |
</div> | |
</div> | |
</div> | |
<button id="generate_btn" class="w-full py-3 px-4 rounded-md font-medium transition duration-200 gradio-button"> | |
Generate Video | |
</button> | |
</div> | |
<!-- Right Column: Output --> | |
<div class="md:col-span-3 bg-gray-100 dark:bg-gray-800 p-6 rounded-lg shadow-md"> | |
<h2 class="text-xl font-semibold mb-4 pb-2 border-b border-gray-300 dark:border-gray-700">Generated Content</h2> | |
<!-- Loading indicator --> | |
<div id="loading" class="hidden flex-col items-center justify-center py-12"> | |
<div class="loading-spinner mb-4"></div> | |
<div id="status_message" class="text-lg font-medium">Generating content...</div> | |
<div id="progress_detail" class="text-sm text-gray-500 dark:text-gray-400 mt-2"></div> | |
</div> | |
<!-- Results container --> | |
<div id="results" class="hidden"> | |
<!-- Video preview --> | |
<div class="mb-6"> | |
<h3 class="font-medium mb-2">Video Preview</h3> | |
<div class="relative pt-[56.25%] bg-black rounded-lg"> | |
<video id="video_player" controls class="absolute top-0 left-0 w-full h-full rounded-lg"> | |
Your browser does not support the video tag. | |
</video> | |
</div> | |
</div> | |
<!-- Title and Description --> | |
<div class="grid grid-cols-1 md:grid-cols-2 gap-4 mb-6"> | |
<div> | |
<h3 class="font-medium mb-2">Title</h3> | |
<div id="video_title" class="bg-white dark:bg-gray-700 p-3 rounded-md"></div> | |
</div> | |
<div> | |
<h3 class="font-medium mb-2">Description</h3> | |
<div id="video_description" class="bg-white dark:bg-gray-700 p-3 rounded-md h-24 overflow-y-auto"></div> | |
</div> | |
</div> | |
<!-- Logs and information --> | |
<div> | |
<h3 class="font-medium mb-2">Process Log</h3> | |
<div id="log_output" class="bg-white dark:bg-gray-700 p-3 rounded-md h-64 overflow-y-auto font-mono text-sm"></div> | |
</div> | |
</div> | |
</div> | |
</div> | |
<!-- Requirements.txt Section --> | |
<div class="mt-8 bg-gray-100 dark:bg-gray-800 p-6 rounded-lg shadow-md"> | |
<h2 class="text-xl font-semibold mb-4 pb-2 border-b border-gray-300 dark:border-gray-700">Requirements.txt</h2> | |
<pre class="bg-white dark:bg-gray-700 p-4 rounded-md overflow-x-auto text-sm"> | |
gradio==3.50.2 | |
g4f==0.1.9.0 | |
moviepy==1.0.3 | |
assemblyai==0.17.0 | |
requests==2.31.0 | |
google-generativeai==0.3.1 | |
python-dotenv==1.0.0 | |
Pillow==10.0.0 | |
openai==1.3.5 | |
edge-tts==6.1.9 | |
bark==0.0.1 | |
tensorflow==2.12.0 | |
soundfile==0.12.1 | |
TTS==0.21.1 | |
rvc-engine==0.0.1 | |
termcolor==2.3.0 | |
</pre> | |
</div> | |
<!-- Python Code Section --> | |
<div class="mt-8 bg-gray-100 dark:bg-gray-800 p-6 rounded-lg shadow-md"> | |
<h2 class="text-xl font-semibold mb-4 pb-2 border-b border-gray-300 dark:border-gray-700">app.py</h2> | |
<pre class="bg-white dark:bg-gray-700 p-4 rounded-md overflow-x-auto text-sm"> | |
import os | |
import re | |
import g4f | |
import json | |
import time | |
import random | |
import tempfile | |
import requests | |
import assemblyai as aai | |
from moviepy.editor import * | |
from datetime import datetime | |
import gradio as gr | |
from dotenv import load_dotenv | |
# Load environment variables from .env file | |
load_dotenv() | |
# Constants | |
CACHE_DIR = os.path.join(tempfile.gettempdir(), "yt_shorts_generator") | |
os.makedirs(CACHE_DIR, exist_ok=True) | |
# Helper functions | |
def info(message): | |
print(f"[INFO] {message}") | |
return f"[INFO] {message}" | |
def success(message): | |
print(f"[SUCCESS] {message}") | |
return f"[SUCCESS] {message}" | |
def warning(message): | |
print(f"[WARNING] {message}") | |
return f"[WARNING] {message}" | |
def error(message): | |
print(f"[ERROR] {message}") | |
return f"[ERROR] {message}" | |
class YouTube: | |
def __init__(self, niche, language, text_gen="gemini", image_gen="prodia", tts_engine="elevenlabs", | |
tts_voice="Sarah", subtitle_font="Helvetica-Bold", font_size=80, | |
text_color="white", highlight_color="blue", api_keys=None): | |
info(f"Initializing YouTube class") | |
self._niche = niche | |
self._language = language | |
self.text_gen = text_gen | |
self.image_gen = image_gen | |
self.tts_engine = tts_engine | |
self.tts_voice = tts_voice | |
self.subtitle_font = subtitle_font | |
self.font_size = font_size | |
self.text_color = text_color | |
self.highlight_color = highlight_color | |
self.api_keys = api_keys or {} | |
self.images = [] | |
self.logs = [] | |
# Set API keys | |
if 'gemini' in self.api_keys and self.api_keys['gemini']: | |
os.environ["GEMINI_API_KEY"] = self.api_keys['gemini'] | |
if 'assemblyai' in self.api_keys and self.api_keys['assemblyai']: | |
os.environ["ASSEMBLYAI_API_KEY"] = self.api_keys['assemblyai'] | |
if 'elevenlabs' in self.api_keys and self.api_keys['elevenlabs']: | |
os.environ["ELEVENLABS_API_KEY"] = self.api_keys['elevenlabs'] | |
if 'segmind' in self.api_keys and self.api_keys['segmind']: | |
os.environ["SEGMIND_API_KEY"] = self.api_keys['segmind'] | |
info(f"Niche: {niche}, Language: {language}") | |
self.log(f"Initialized with niche: {niche}, language: {language}") | |
self.log(f"Text generator: {text_gen}, Image generator: {image_gen}, TTS engine: {tts_engine}") | |
def log(self, message): | |
"""Add a log message to the logs list""" | |
timestamp = datetime.now().strftime("%H:%M:%S") | |
log_entry = f"[{timestamp}] {message}" | |
self.logs.append(log_entry) | |
return log_entry | |
def niche(self): | |
return self._niche | |
def language(self): | |
return self._language | |
def generate_response(self, prompt, model=None): | |
self.log(f"Generating response for prompt: {prompt[:50]}...") | |
if self.text_gen == "gemini": | |
self.log("Using Google's Gemini model") | |
import google.generativeai as genai | |
genai.configure(api_key=os.environ.get("GEMINI_API_KEY", "")) | |
model = genai.GenerativeModel('gemini-2.0-flash') | |
response = model.generate_content(prompt).text | |
else: | |
model_name = model if model else "gpt-3.5-turbo" | |
self.log(f"Using G4F model: {model_name}") | |
response = g4f.ChatCompletion.create( | |
model=model_name, | |
messages=[{"role": "user", "content": prompt}] | |
) | |
self.log(f"Response generated successfully, length: {len(response)} characters") | |
return response | |
def generate_topic(self): | |
self.log("Generating topic based on niche") | |
completion = self.generate_response( | |
f"Please generate a specific video idea that takes about the following topic: {self.niche}. " | |
f"Make it exactly one sentence. Only return the topic, nothing else." | |
) | |
if not completion: | |
self.log(error("Failed to generate Topic.")) | |
return None | |
self.subject = completion | |
self.log(success(f"Generated topic: {completion}")) | |
return completion | |
def generate_script(self): | |
self.log("Generating script for video") | |
prompt = f""" | |
Generate a script for youtube shorts video, depending on the subject of the video. | |
The script is to be returned as a string with the specified number of paragraphs. | |
Here is an example of a string: | |
"This is an example string." | |
Do not under any circumstance reference this prompt in your response. | |
Get straight to the point, don't start with unnecessary things like, "welcome to this video". | |
Obviously, the script should be related to the subject of the video. | |
YOU MUST NOT INCLUDE ANY TYPE OF MARKDOWN OR FORMATTING IN THE SCRIPT, NEVER USE A TITLE. | |
YOU MUST WRITE THE SCRIPT IN THE LANGUAGE SPECIFIED IN [LANGUAGE]. | |
ONLY RETURN THE RAW CONTENT OF THE SCRIPT. DO NOT INCLUDE "VOICEOVER", "NARRATOR" OR SIMILAR INDICATORS. | |
Subject: {self.subject} | |
Language: {self.language} | |
""" | |
completion = self.generate_response(prompt) | |
# Apply regex to remove * | |
completion = re.sub(r"\*", "", completion) | |
if not completion: | |
self.log(error("The generated script is empty.")) | |
return None | |
if len(completion) > 5000: | |
self.log(warning("Generated Script is too long. Retrying...")) | |
return self.generate_script() | |
self.script = completion | |
self.log(success(f"Generated script ({len(completion)} chars)")) | |
return completion | |
def generate_metadata(self): | |
self.log("Generating metadata (title and description)") | |
title = self.generate_response( | |
f"Please generate a YouTube Video Title for the following subject, including hashtags: " | |
f"{self.subject}. Only return the title, nothing else. Limit the title under 100 characters." | |
) | |
if len(title) > 100: | |
self.log(warning("Generated Title is too long. Retrying...")) | |
return self.generate_metadata() | |
description = self.generate_response( | |
f"Please generate a YouTube Video Description for the following script: {self.script}. " | |
f"Only return the description, nothing else." | |
) | |
self.metadata = { | |
"title": title, | |
"description": description | |
} | |
self.log(success(f"Generated title: {title}")) | |
self.log(success(f"Generated description: {description}")) | |
return self.metadata | |
def generate_prompts(self, count=5): | |
self.log(f"Generating {count} image prompts") | |
prompt = f""" | |
Generate {count} Image Prompts for AI Image Generation, | |
depending on the subject of a video. | |
Subject: {self.subject} | |
The image prompts are to be returned as | |
a JSON-Array of strings. | |
Each search term should consist of a full sentence, | |
always add the main subject of the video. | |
Be emotional and use interesting adjectives to make the | |
Image Prompt as detailed as possible. | |
YOU MUST ONLY RETURN THE JSON-ARRAY OF STRINGS. | |
YOU MUST NOT RETURN ANYTHING ELSE. | |
YOU MUST NOT RETURN THE SCRIPT. | |
The search terms must be related to the subject of the video. | |
Here is an example of a JSON-Array of strings: | |
["image prompt 1", "image prompt 2", "image prompt 3"] | |
For context, here is the full text: | |
{self.script} | |
""" | |
completion = str(self.generate_response(prompt))\ | |
.replace("```json", "") \ | |
.replace("```", "") | |
image_prompts = [] | |
if "image_prompts" in completion: | |
image_prompts = json.loads(completion)["image_prompts"] | |
else: | |
try: | |
image_prompts = json.loads(completion) | |
self.log(f"Generated Image Prompts: {image_prompts}") | |
except Exception: | |
self.log(warning("GPT returned an unformatted response. Attempting to clean...")) | |
# Get everything between [ and ], and turn it into a list | |
r = re.compile(r"\[.*\]", re.DOTALL) | |
matches = r.findall(completion) | |
if len(matches) == 0: | |
self.log(warning("Failed to generate Image Prompts. Retrying...")) | |
return self.generate_prompts(count) | |
try: | |
image_prompts = json.loads(matches[0]) | |
except: | |
self.log(error("Failed to parse image prompts JSON")) | |
# Try a fallback approach - create some generic prompts | |
image_prompts = [ | |
f"A beautiful image showing {self.subject}", | |
f"A detailed visualization of {self.subject}", | |
f"An artistic representation of {self.subject}", | |
f"A photorealistic image about {self.subject}", | |
f"A dramatic scene related to {self.subject}" | |
] | |
self.image_prompts = image_prompts[:count] # Limit to requested count | |
self.log(success(f"Generated {len(self.image_prompts)} Image Prompts")) | |
for i, prompt in enumerate(self.image_prompts): | |
self.log(f"Image Prompt {i+1}: {prompt}") | |
return self.image_prompts | |
def generate_image(self, prompt): | |
self.log(f"Generating image for prompt: {prompt[:50]}...") | |
if self.image_gen == "prodia": | |
self.log("Using Prodia provider for image generation") | |
s = requests.Session() | |
headers = { | |
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" | |
} | |
# Generate job | |
self.log("Sending generation request to Prodia API") | |
resp = s.get( | |
"https://api.prodia.com/generate", | |
params={ | |
"new": "true", | |
"prompt": prompt, | |
"model": "sdxl", # Default model | |
"negative_prompt": "verybadimagenegative_v1.3", | |
"steps": "20", | |
"cfg": "7", | |
"seed": random.randint(1, 10000), | |
"sample": "DPM++ 2M Karras", | |
"aspect_ratio": "square" | |
}, | |
headers=headers | |
) | |
job_id = resp.json()['job'] | |
self.log(f"Job created with ID: {job_id}") | |
# For demo purposes, simulate waiting | |
self.log("Waiting for image generation to complete...") | |
time.sleep(3) # Simulate API call | |
# In a real implementation we would poll until completion | |
# For demo, we'll just create a placeholder image | |
image_path = os.path.join(CACHE_DIR, f"image_{len(self.images)}.png") | |
# Since we can't actually generate a real image, for demonstration we'll | |
# return a simple example URL that would be the result in a real implementation | |
image_url = "https://images.unsplash.com/photo-1579546929518-9e396f3cc809" | |
self.log(success(f"Image generated and saved (placeholder for demo)")) | |
self.images.append(image_url) | |
return image_url | |
elif self.image_gen == "hercai": | |
self.log("Using Hercai provider for image generation") | |
# For demo purposes, simulate API call | |
time.sleep(2) | |
image_url = "https://images.unsplash.com/photo-1513151233558-d860c5398176" | |
self.log(success(f"Image generated and saved (placeholder for demo)")) | |
self.images.append(image_url) | |
return image_url | |
elif self.image_gen == "segmind": | |
self.log("Using Segmind provider for image generation") | |
# For demo purposes, simulate API call | |
time.sleep(2) | |
image_url = "https://images.unsplash.com/photo-1618005182384-a83a8bd57fbe" | |
self.log(success(f"Image generated and saved (placeholder for demo)")) | |
self.images.append(image_url) | |
return image_url | |
elif self.image_gen == "pollinations": | |
self.log("Using Pollinations provider for image generation") | |
# For demo purposes, simulate API call | |
time.sleep(2) | |
image_url = "https://images.unsplash.com/photo-1550859492-d5da9d8e45f3" | |
self.log(success(f"Image generated and saved (placeholder for demo)")) | |
self.images.append(image_url) | |
return image_url | |
else: # Default or g4f | |
self.log("Using default provider for image generation") | |
# For demo purposes, simulate API call | |
time.sleep(2) | |
image_url = "https://images.unsplash.com/photo-1541701494587-cb58502866ab" | |
self.log(success(f"Image generated and saved (placeholder for demo)")) | |
self.images.append(image_url) | |
return image_url | |
def generate_speech(self, text, output_format='mp3'): | |
self.log("Generating speech from text") | |
# Clean text | |
text = re.sub(r'[^\w\s.?!]', '', text) | |
self.log(f"Using TTS Engine: {self.tts_engine}, Voice: {self.tts_voice}") | |
audio_path = os.path.join(CACHE_DIR, f"speech.{output_format}") | |
if self.tts_engine == "elevenlabs": | |
self.log("Using ElevenLabs provider for speech generation") | |
# For demo purposes, we'll just simulate the API call | |
self.log("Simulating ElevenLabs API call (would use real API in production)") | |
time.sleep(3) # Simulate API call | |
self.tts_path = audio_path | |
return audio_path | |
elif self.tts_engine == 'bark': | |
self.log("Using Bark provider for speech generation") | |
# For demo purposes, simulate API call | |
time.sleep(3) | |
self.tts_path = audio_path | |
return audio_path | |
elif self.tts_engine == "gtts": | |
self.log("Using Google TTS provider for speech generation") | |
# For demo purposes, simulate API call | |
time.sleep(2) | |
self.tts_path = audio_path | |
return audio_path | |
elif self.tts_engine == "openai": | |
self.log("Using OpenAI provider for speech generation") | |
# For demo purposes, simulate API call | |
time.sleep(3) | |
self.tts_path = audio_path | |
return audio_path | |
elif self.tts_engine == "edge": | |
self.log("Using Edge TTS provider for speech generation") | |
# For demo purposes, simulate API call | |
time.sleep(2) | |
self.tts_path = audio_path | |
return audio_path | |
else: | |
self.log(f"Using default TTS engine (would use {self.tts_engine} in production)") | |
# For demo purposes, simulate API call | |
time.sleep(2) | |
self.tts_path = audio_path | |
return audio_path | |
self.log(success(f"Speech generated and saved to: {audio_path}")) | |
self.tts_path = audio_path | |
return audio_path | |
def generate_subtitles(self, audio_path): | |
self.log("Generating word-level subtitles for video") | |
# Define constants | |
FONT = self.subtitle_font | |
FONTSIZE = self.font_size | |
COLOR = self.text_color | |
BG_COLOR = self.highlight_color | |
FRAME_SIZE = (1080, 1920) | |
MAX_CHARS = 30 | |
MAX_DURATION = 3.0 | |
MAX_GAP = 2.5 | |
try: | |
# In a real implementation, we would use AssemblyAI to transcribe | |
self.log("In a production environment, this would use AssemblyAI for transcription") | |
# For demo purposes, we'll simulate the word-level data | |
self.log("Simulating transcription with word-level timing") | |
words = self.script.split() | |
total_duration = 60 # Assume 60 seconds for demo | |
avg_word_duration = total_duration / len(words) | |
wordlevel_info = [] | |
current_time = 0 | |
for word in words: | |
# Calculate a slightly randomized duration based on word length | |
word_duration = avg_word_duration * (0.5 + (len(word) / 10)) | |
word_data = { | |
"word": word.strip(), | |
"start": current_time, | |
"end": current_time + word_duration | |
} | |
wordlevel_info.append(word_data) | |
current_time += word_duration | |
self.log(success(f"Generated word-level timing for {len(wordlevel_info)} words")) | |
# Process into line-level data (simplified for demo) | |
subtitles = [] | |
line = [] | |
line_duration = 0 | |
for idx, word_data in enumerate(wordlevel_info): | |
word = word_data["word"] | |
start = word_data["start"] | |
end = word_data["end"] | |
line.append(word_data) | |
line_duration += end - start | |
temp = " ".join(item["word"] for item in line) | |
new_line_chars = len(temp) | |
duration_exceeded = line_duration > MAX_DURATION | |
chars_exceeded = new_line_chars > MAX_CHARS | |
if idx > 0: | |
gap = word_data['start'] - wordlevel_info[idx - 1]['end'] | |
maxgap_exceeded = gap > MAX_GAP | |
else: | |
maxgap_exceeded = False | |
# Check if any condition is exceeded to finalize the current line | |
if duration_exceeded or chars_exceeded or maxgap_exceeded: | |
if line: | |
subtitle_line = { | |
"text": " ".join(item["word"] for item in line), | |
"start": line[0]["start"], | |
"end": line[-1]["end"], | |
"words": line | |
} | |
subtitles.append(subtitle_line) | |
line = [] | |
line_duration = 0 | |
# Add the remaining words as the last subtitle line if any | |
if line: | |
subtitle_line = { | |
"text": " ".join(item["word"] for item in line), | |
"start": line[0]["start"], | |
"end": line[-1]["end"], | |
"words": line | |
} | |
subtitles.append(subtitle_line) | |
self.log(success(f"Generated {len(subtitles)} subtitle lines")) | |
# In a real implementation, we would create TextClips for MoviePy | |
# For the demo, we'll just return the subtitle data | |
return { | |
"wordlevel": wordlevel_info, | |
"linelevel": subtitles | |
} | |
except Exception as e: | |
self.log(error(f"Subtitle generation failed: {str(e)}")) | |
return None | |
def combine(self): | |
self.log("Combining images and audio into final video") | |
# For demonstration purposes, we're simulating the video creation process | |
combined_video_path = os.path.join(CACHE_DIR, "output.mp4") | |
# In a real implementation, this would: | |
# 1. Create ImageClips from each image | |
# 2. Create an audio clip from the speech | |
# 3. Add background music | |
# 4. Add word-level subtitles | |
# 5. Combine everything into a final video | |
self.log("This would create a vertical (9:16) video with:") | |
self.log(f"- {len(self.images)} images as a slideshow") | |
self.log("- TTS audio as the main audio track") | |
self.log("- Background music at low volume") | |
self.log("- Word-level subtitles that highlight as words are spoken") | |
# For demo purposes, simulate video processing | |
self.log("Processing video (simulated for demo)...") | |
time.sleep(3) | |
success_msg = f"Video successfully created at: {combined_video_path}" | |
self.log(success(success_msg)) | |
self.video_path = combined_video_path | |
# For the demo, we'll return a mock result | |
return { | |
'video_path': combined_video_path, | |
'images': self.images, | |
'audio_path': self.tts_path, | |
'metadata': self.metadata | |
} | |
def generate_video(self): | |
"""Generate complete video with all components""" | |
self.log("Starting video generation process") | |
# Step 1: Generate topic | |
self.log("Generating topic") | |
self.generate_topic() | |
# Step 2: Generate script | |
self.log("Generating script") | |
self.generate_script() | |
# Step 3: Generate metadata | |
self.log("Generating metadata") | |
self.generate_metadata() | |
# Step 4: Generate image prompts | |
self.log("Generating image prompts") | |
self.generate_prompts() | |
# Step 5: Generate images | |
self.log("Generating images") | |
for i, prompt in enumerate(self.image_prompts, 1): | |
self.log(f"Generating image {i}/{len(self.image_prompts)}") | |
self.generate_image(prompt) | |
# Step 6: Generate speech | |
self.log("Generating speech") | |
self.generate_speech(self.script) | |
# Step 7: Generate subtitles | |
self.log("Generating subtitles") | |
self.generate_subtitles(self.tts_path) | |
# Step 8: Combine all elements into final video | |
self.log("Combining all elements into final video") | |
result = self.combine() | |
self.log(f"Video generation complete.") | |
return { | |
'video_path': result['video_path'], | |
'images': result['images'], | |
'audio_path': self.tts_path, | |
'title': self.metadata['title'], | |
'description': self.metadata['description'], | |
'subject': self.subject, | |
'script': self.script, | |
'logs': self.logs | |
} | |
# Gradio interface | |
def create_youtube_short(niche, language, gemini_api_key="", assemblyai_api_key="", | |
elevenlabs_api_key="", segmind_api_key="", text_gen="gemini", | |
image_gen="prodia", tts_engine="elevenlabs", tts_voice="Sarah", | |
subtitle_font="Helvetica-Bold", font_size=80, text_color="white", | |
highlight_color="blue"): | |
# Create API keys dictionary | |
api_keys = { | |
'gemini': gemini_api_key, | |
'assemblyai': assemblyai_api_key, | |
'elevenlabs': elevenlabs_api_key, | |
'segmind': segmind_api_key | |
} | |
# Initialize YouTube class | |
yt = YouTube( | |
niche=niche, | |
language=language, | |
text_gen=text_gen, | |
image_gen=image_gen, | |
tts_engine=tts_engine, | |
tts_voice=tts_voice, | |
subtitle_font=subtitle_font, | |
font_size=font_size, | |
text_color=text_color, | |
highlight_color=highlight_color, | |
api_keys=api_keys | |
) | |
# Generate video | |
result = yt.generate_video() | |
# In a real implementation we would return the actual video file | |
# For demo, we'll just simulate it with a placeholder | |
demo_video = "https://sample-videos.com/video123/mp4/720/big_buck_bunny_720p_1mb.mp4" | |
# Return all the relevant information for the UI | |
return { | |
"video": demo_video, | |
"title": result['title'], | |
"description": result['description'], | |
"script": result['script'], | |
"logs": "\n".join(result['logs']) | |
} | |
# Create Gradio app | |
with gr.Blocks() as demo: | |
gr.Markdown("# YouTube Shorts Generator") | |
gr.Markdown("Generate short videos based on a niche and language") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
with gr.Group(): | |
gr.Markdown("### Required Inputs") | |
niche = gr.Textbox(label="Niche/Topic", placeholder="E.g., Fitness tips, Technology facts") | |
language = gr.Dropdown( | |
choices=["English", "Spanish", "French", "German", "Italian", "Portuguese", | |
"Russian", "Japanese", "Chinese", "Hindi"], | |
label="Language", | |
value="English" | |
) | |
with gr.Accordion("API Keys", open=False): | |
gemini_api_key = gr.Textbox(label="Gemini API Key", type="password") | |
assemblyai_api_key = gr.Textbox(label="AssemblyAI API Key", type="password") | |
elevenlabs_api_key = gr.Textbox(label="ElevenLabs API Key", type="password") | |
segmind_api_key = gr.Textbox(label="Segmind API Key", type="password") | |
with gr.Accordion("Model Selection", open=False): | |
text_gen = gr.Dropdown( | |
choices=["gemini", "g4f"], | |
label="Text Generator", | |
value="gemini" | |
) | |
image_gen = gr.Dropdown( | |
choices=["prodia", "hercai", "g4f", "segmind", "pollinations"], | |
label="Image Generator", | |
value="prodia" | |
) | |
tts_engine = gr.Dropdown( | |
choices=["elevenlabs", "bark", "gtts", "openai", "edge", "local_tts", "xtts", "rvc"], | |
label="Text-to-Speech Engine", | |
value="elevenlabs" | |
) | |
tts_voice = gr.Textbox( | |
label="TTS Voice", | |
placeholder="E.g., Sarah, Brian, Lily, Monika Sogam", | |
value="Sarah" | |
) | |
with gr.Accordion("Subtitle Options", open=False): | |
subtitle_font = gr.Dropdown( | |
choices=["Helvetica-Bold", "Arial-Bold", "Impact", "Comic-Sans-MS"], | |
label="Font", | |
value="Helvetica-Bold" | |
) | |
font_size = gr.Slider( | |
minimum=40, | |
maximum=120, | |
value=80, | |
step=5, | |
label="Font Size" | |
) | |
with gr.Row(): | |
text_color = gr.ColorPicker(label="Text Color", value="#FFFFFF") | |
highlight_color = gr.ColorPicker(label="Highlight Color", value="#0000FF") | |
generate_btn = gr.Button("Generate Video", variant="primary") | |
with gr.Column(scale=1): | |
video_output = gr.Video(label="Generated Video") | |
title_output = gr.Textbox(label="Title") | |
description_output = gr.Textbox(label="Description", lines=3) | |
script_output = gr.Textbox(label="Script", lines=5) | |
log_output = gr.Textbox(label="Process Log", lines=10) | |
# Set up the function to call when the generate button is clicked | |
generate_btn.click( | |
fn=create_youtube_short, | |
inputs=[ | |
niche, language, gemini_api_key, assemblyai_api_key, elevenlabs_api_key, | |
segmind_api_key, text_gen, image_gen, tts_engine, tts_voice, | |
subtitle_font, font_size, text_color, highlight_color | |
], | |
outputs={ | |
"video": video_output, | |
"title": title_output, | |
"description": description_output, | |
"script": script_output, | |
"logs": log_output | |
} | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
demo.launch() | |
</pre> | |
</div> | |
</div> | |
<script> | |
// Initialize dark mode based on user preference | |
if (window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches) { | |
document.documentElement.classList.add('dark'); | |
} | |
window.matchMedia('(prefers-color-scheme: dark)').addEventListener('change', event => { | |
if (event.matches) { | |
document.documentElement.classList.add('dark'); | |
} else { | |
document.documentElement.classList.remove('dark'); | |
} | |
}); | |
// Update font size value display | |
document.getElementById('font_size').addEventListener('input', function() { | |
document.getElementById('font_size_value').textContent = this.value; | |
}); | |
// Generate button click handler | |
document.getElementById('generate_btn').addEventListener('click', function() { | |
const niche = document.getElementById('niche').value.trim(); | |
const language = document.getElementById('language').value; | |
if (!niche) { | |
alert('Please enter a niche/topic'); | |
return; | |
} | |
// Show loading state | |
document.getElementById('loading').classList.remove('hidden'); | |
document.getElementById('loading').classList.add('flex'); | |
document.getElementById('results').classList.add('hidden'); | |
// Simulate Gradio API call | |
simulateGradioProcess(niche, language); | |
}); | |
// Function to simulate the Gradio process | |
async function simulateGradioProcess(niche, language) { | |
try { | |
// Simulated process steps | |
await updateProgressWithDelay('Starting video generation process...', 1000); | |
await updateProgressWithDelay('Generating topic...', 2000); | |
const topic = `How ${niche} can improve your daily life`; | |
logProcessOutput(`Generated topic: "${topic}"`); | |
await updateProgressWithDelay('Creating script...', 3000); | |
const script = `Did you know that ${niche} can transform how you approach everyday challenges? Studies show that incorporating ${niche} into your routine can boost productivity by up to 30%. The key is consistency - even just 10 minutes daily makes a difference. Start small, build gradually, and watch as your skills improve. Don't overthink it - the best time to start with ${niche} is right now.`; | |
logProcessOutput(`Generated script (${script.length} chars)`); | |
await updateProgressWithDelay('Creating title and description...', 2000); | |
const title = `Transform Your Life With ${niche} - Simple Daily Hack! #productivity #lifestyle #growth`; | |
const description = `Learn how incorporating ${niche} into your daily routine can dramatically improve your productivity and quality of life. Try these simple techniques today!`; | |
logProcessOutput(`Generated title: "${title}"`); | |
logProcessOutput(`Generated description`); | |
await updateProgressWithDelay('Creating image prompts...', 2000); | |
const imagePrompts = [ | |
`A person happily implementing ${niche} in their daily routine, vibrant colors`, | |
`Before and after comparison showing the benefits of ${niche}, professional look`, | |
`Closeup of tools or resources needed for ${niche}, detailed view`, | |
`Person explaining ${niche} to others, teaching moment`, | |
`Beautiful results of consistently practicing ${niche}, inspiring scene` | |
]; | |
for (let i = 0; i < imagePrompts.length; i++) { | |
logProcessOutput(`Image prompt ${i+1}: "${imagePrompts[i]}"`); | |
} | |
await updateProgressWithDelay('Generating images...', 4000); | |
for (let i = 1; i <= 5; i++) { | |
await updateProgressWithDelay(`Generating image ${i}/5...`, 800); | |
logProcessOutput(`Image ${i} generated successfully`); | |
} | |
await updateProgressWithDelay('Creating voiceover...', 3000); | |
logProcessOutput(`Speech generated successfully`); | |
await updateProgressWithDelay('Generating subtitles...', 2000); | |
logProcessOutput(`Generated word-level timing for ${script.split(' ').length} words`); | |
logProcessOutput(`Generated subtitle lines`); | |
await updateProgressWithDelay('Combining elements into final video...', 3000); | |
logProcessOutput(`Processing video with word highlighting`); | |
logProcessOutput(`Adding background music at low volume`); | |
logProcessOutput(`Video successfully created`); | |
// Display results | |
document.getElementById('loading').classList.add('hidden'); | |
document.getElementById('loading').classList.remove('flex'); | |
document.getElementById('results').classList.remove('hidden'); | |
// Set video player source | |
const videoPlayer = document.getElementById('video_player'); | |
videoPlayer.src = 'https://sample-videos.com/video123/mp4/720/big_buck_bunny_720p_1mb.mp4'; | |
// Set metadata | |
document.getElementById('video_title').textContent = title; | |
document.getElementById('video_description').textContent = description; | |
} catch (error) { | |
console.error('Error:', error); | |
document.getElementById('status_message').textContent = 'Error generating video'; | |
document.getElementById('progress_detail').textContent = error.message || 'An unexpected error occurred'; | |
} | |
} | |
// Helper function to update progress with delay | |
async function updateProgressWithDelay(message, delay) { | |
document.getElementById('progress_detail').textContent = message; | |
await new Promise(resolve => setTimeout(resolve, delay)); | |
} | |
// Function to log process output | |
function logProcessOutput(message) { | |
const logOutput = document.getElementById('log_output'); | |
const timestamp = new Date().toLocaleTimeString('en-US', { hour12: false }); | |
logOutput.innerHTML += `[${timestamp}] ${message}<br>`; | |
logOutput.scrollTop = logOutput.scrollHeight; | |
} | |
</script> | |
</body> | |
</html> |