VeoFlux

Running

File size: 14,733 Bytes

# Import necessary libraries
import gradio as gr
import os
import shutil
import tempfile
import random
import requests
import soundfile as sf
from moviepy.editor import (
    VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip, CompositeVideoClip, TextClip
)
import moviepy.video.fx.all as vfx
from kokoro import KPipeline
from gtts import gTTS
from pydub import AudioSegment
import math
import re
from PIL import Image

# Initialize Kokoro TTS pipeline (using American English)
pipeline = KPipeline(lang_code='a')

# Global Configuration
PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
OPENROUTER_API_KEY = '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'
OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
OUTPUT_VIDEO_FILENAME = "final_video.mp4"
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"

# Helper Functions
def generate_script(user_input):
    """Generate a documentary script using OpenRouter API."""
    headers = {
        'Authorization': f'Bearer {OPENROUTER_API_KEY}',
        'HTTP-Referer': 'https://your-domain.com',
        'X-Title': 'AI Documentary Maker'
    }
    prompt = f"""Short Documentary Script GeneratorInstructions:
If I say "use this," output the script exactly as given.
If I give topics, generate a script based on them.
If I provide a full script, rewrite it unchanged. Keep it short, simple, humorous, and serious but funny. Use normal conversational text.
Formatting Rules:
- Title in square brackets: [Title]
- Each section starts with a one-word title in [ ] (max two words).
- Narration: 5-10 words, casual, funny, unpredictable.
- No special formatting, just script text.
- Generalized search terms for Pexels.
- End with a funny subscribe statement.
Example:
[North Korea]
Top 5 unknown facts about North Korea.
[Invisibility]
North Korea’s internet speed doesn’t exist.
[Leadership]
Kim Jong-un won 100% votes… against himself.
[Subscribe]
Subscribe, or Kim sends you a ticket to nowhere.
Topic: {user_input}
"""
    data = {
        'model': OPENROUTER_MODEL,
        'messages': [{'role': 'user', 'content': prompt}],
        'temperature': 0.4,
        'max_tokens': 5000
    }
    try:
        response = requests.post('https://openrouter.ai/api/v1/chat/completions', headers=headers, json=data, timeout=30)
        response.raise_for_status()
        return response.json()['choices'][0]['message']['content']
    except Exception as e:
        print(f"Script generation failed: {e}")
        return None

def parse_script(script_text):
    """Parse script into a list of elements with media prompts and TTS text."""
    elements = []
    lines = script_text.splitlines()
    for i in range(0, len(lines), 2):
        if i + 1 < len(lines) and lines[i].startswith('[') and lines[i].endswith(']'):
            title = lines[i][1:-1].strip()
            text = lines[i + 1].strip()
            if title and text:
                elements.append({'type': 'media', 'prompt': title})
                elements.append({'type': 'tts', 'text': text, 'voice': 'en'})
    return elements

def search_pexels_videos(query, api_key):
    """Search Pexels for a random HD video."""
    headers = {'Authorization': api_key}
    params = {"query": query, "per_page": 15}
    try:
        response = requests.get("https://api.pexels.com/videos/search", headers=headers, params=params, timeout=10)
        response.raise_for_status()
        videos = response.json().get("videos", [])
        hd_videos = [v["video_files"][0]["link"] for v in videos if v["video_files"] and v["video_files"][0]["quality"] == "hd"]
        return random.choice(hd_videos) if hd_videos else None
    except Exception as e:
        print(f"Pexels video search failed: {e}")
        return None

def search_pexels_images(query, api_key):
    """Search Pexels for a random image."""
    headers = {'Authorization': api_key}
    params = {"query": query, "per_page": 5, "orientation": "landscape"}
    try:
        response = requests.get("https://api.pexels.com/v1/search", headers=headers, params=params, timeout=10)
        response.raise_for_status()
        photos = response.json().get("photos", [])
        return random.choice(photos)["src"]["original"] if photos else None
    except Exception as e:
        print(f"Pexels image search failed: {e}")
        return None

def download_file(url, filename):
    """Download a file from a URL."""
    try:
        response = requests.get(url, stream=True, timeout=15)
        response.raise_for_status()
        with open(filename, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
        return filename
    except Exception as e:
        print(f"Download failed: {e}")
        return None

def generate_media(prompt, video_percentage, temp_folder):
    """Generate media based on prompt and video percentage."""
    safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
    if random.random() < video_percentage / 100:
        video_file = os.path.join(temp_folder, f"{safe_prompt}_video.mp4")
        video_url = search_pexels_videos(prompt, PEXELS_API_KEY)
        if video_url and download_file(video_url, video_file):
            return {"path": video_file, "asset_type": "video"}
    image_file = os.path.join(temp_folder, f"{safe_prompt}.jpg")
    image_url = search_pexels_images(prompt, PEXELS_API_KEY)
    if image_url and download_file(image_url, image_file):
        return {"path": image_file, "asset_type": "image"}
    return None

def generate_tts(text, voice, temp_folder):
    """Generate TTS audio with fallback."""
    safe_text = re.sub(r'[^\w\s-]', '', text[:10]).strip().replace(' ', '_')
    file_path = os.path.join(temp_folder, f"tts_{safe_text}.wav")
    try:
        generator = pipeline(text, voice='af_heart', speed=0.9)
        audio = next(generator)[2]
        sf.write(file_path, audio, 24000)
        return file_path
    except Exception:
        try:
            tts = gTTS(text=text, lang='en')
            mp3_path = os.path.join(temp_folder, f"tts_{safe_text}.mp3")
            tts.save(mp3_path)
            audio = AudioSegment.from_mp3(mp3_path)
            audio.export(file_path, format="wav")
            os.remove(mp3_path)
            return file_path
        except Exception as e:
            print(f"TTS generation failed: {e}")
            return None

def resize_to_fill(clip, target_resolution):
    """Resize and crop clip to fill target resolution."""
    target_w, target_h = target_resolution
    clip_aspect = clip.w / clip.h
    target_aspect = target_w / target_h
    if clip_aspect > target_aspect:
        clip = clip.resize(height=target_h)
        crop_amount = (clip.w - target_w) / 2
        clip = clip.crop(x1=crop_amount, x2=clip.w - crop_amount)
    else:
        clip = clip.resize(width=target_w)
        crop_amount = (clip.h - target_h) / 2
        clip = clip.crop(y1=crop_amount, y2=clip.h - crop_amount)
    return clip

def create_clip(media_path, asset_type, tts_path, duration, narration_text, text_color, text_size, caption_bg, target_resolution):
    """Create a video clip with media, TTS, and subtitles."""
    try:
        audio_clip = AudioFileClip(tts_path).audio_fadeout(0.2)
        target_duration = audio_clip.duration + 0.2
        if asset_type == "video":
            clip = VideoFileClip(media_path)
            clip = resize_to_fill(clip, target_resolution)
            clip = clip.loop(duration=target_duration) if clip.duration < target_duration else clip.subclip(0, target_duration)
        else:  # image
            clip = ImageClip(media_path).set_duration(target_duration).resize(target_resolution).fadein(0.3).fadeout(0.3)

        if narration_text and caption_bg != "transparent":
            words = narration_text.split()
            chunks = [' '.join(words[i:i+5]) for i in range(0, len(words), 5)]
            chunk_duration = audio_clip.duration / len(chunks)
            subtitle_clips = [
                TextClip(
                    chunk,
                    fontsize=text_size,
                    color=text_color,
                    bg_color=caption_bg,
                    size=(target_resolution[0] * 0.8, None),
                    method='caption',
                    align='center'
                ).set_position(('center', target_resolution[1] * 0.7)).set_start(i * chunk_duration).set_end((i + 1) * chunk_duration)
                for i, chunk in enumerate(chunks)
            ]
            clip = CompositeVideoClip([clip] + subtitle_clips)

        clip = clip.set_audio(audio_clip)
        return clip
    except Exception as e:
        print(f"Clip creation failed: {e}")
        return None

def add_background_music(final_video, custom_music_path, music_volume):
    """Add background music to the video."""
    try:
        if custom_music_path and os.path.exists(custom_music_path):
            bg_music = AudioFileClip(custom_music_path)
        else:
            bg_music = AudioFileClip("default_music.mp3")  # Assume a default music file exists
        if bg_music.duration < final_video.duration:
            bg_music = concatenate_audioclips([bg_music] * math.ceil(final_video.duration / bg_music.duration))
        bg_music = bg_music.subclip(0, final_video.duration).volumex(music_volume)
        final_video = final_video.set_audio(CompositeAudioClip([final_video.audio, bg_music]))
        return final_video
    except Exception as e:
        print(f"Background music failed: {e}")
        return final_video

# Gradio Interface
with gr.Blocks(title="AI Documentary Video Generator") as app:
    ### Initial Inputs
    with gr.Column():
        concept = gr.Textbox(label="Video Concept", placeholder="Enter your video concept...")
        resolution = gr.Radio(["Full", "Short"], label="Resolution", value="Full")
        captions = gr.Radio(["Yes", "No"], label="Captions", value="Yes")
        video_percentage = gr.Slider(0, 100, label="Video Percentage", value=50)
        text_color = gr.ColorPicker(label="Text Color", value="#FFFFFF")
        text_size = gr.Slider(20, 60, label="Text Size", value=28)
        caption_bg = gr.ColorPicker(label="Caption Background Color", value="transparent")
        music_volume = gr.Slider(0, 1, label="Music Volume", value=0.08)
        custom_music = gr.File(label="Upload Custom Background Music", type="file")
        generate_script_btn = gr.Button("Generate Script")

    ### States
    num_clips = gr.State(value=0)
    titles_state = gr.State(value=[])
    initial_texts_state = gr.State(value=[])

    ### Clip Editing Section
    with gr.Column(visible=False) as clip_section:
        clip_textboxes = []
        clip_files = []
        for i in range(10):  # Max 10 clips
            with gr.Row():
                text_box = gr.Textbox(label=f"Clip {i+1} Text", visible=False)
                file_upload = gr.File(label=f"Upload Media for Clip {i+1}", type="file", visible=False)
                clip_textboxes.append(text_box)
                clip_files.append(file_upload)
        generate_video_btn = gr.Button("Generate Video", visible=False)

    ### Output
    video_output = gr.Video(label="Generated Video")

    ### Script Generation Logic
    def generate_script_fn(concept):
        script = generate_script(concept)
        if not script:
            return 0, [], []
        elements = parse_script(script)
        titles = [e['prompt'] for e in elements if e['type'] == 'media']
        texts = [e['text'] for e in elements if e['type'] == 'tts']
        return len(titles), titles, texts

    def update_textboxes(texts):
        return [gr.update(value=texts[i] if i < len(texts) else "", visible=i < len(texts)) for i in range(10)]

    def update_files(n):
        return [gr.update(visible=i < n) for i in range(10)]

    generate_script_btn.click(
        fn=generate_script_fn,
        inputs=[concept],
        outputs=[num_clips, titles_state, initial_texts_state]
    ).then(
        fn=update_textboxes,
        inputs=[initial_texts_state],
        outputs=clip_textboxes
    ).then(
        fn=update_files,
        inputs=[num_clips],
        outputs=clip_files
    ).then(
        fn=lambda: gr.update(visible=True),
        outputs=[clip_section]
    ).then(
        fn=lambda: gr.update(visible=True),
        outputs=[generate_video_btn]
    )

    ### Video Generation Logic
    def generate_video_fn(resolution, captions, video_percentage, text_color, text_size, caption_bg, music_volume, custom_music, num_clips, titles, *clip_data):
        texts = clip_data[:10]
        files = clip_data[10:]
        temp_folder = tempfile.mkdtemp()
        target_resolution = (1920, 1080) if resolution == "Full" else (1080, 1920)
        clips = []

        for i in range(num_clips):
            text = texts[i]
            media_file = files[i]
            title = titles[i]
            if media_file:
                ext = os.path.splitext(media_file)[1].lower()
                media_path = os.path.join(temp_folder, f"clip_{i}{ext}")
                shutil.copy(media_file, media_path)
                asset_type = "video" if ext in ['.mp4', '.avi', '.mov'] else "image"
            else:
                media_asset = generate_media(title, video_percentage, temp_folder)
                if not media_asset:
                    continue
                media_path = media_asset['path']
                asset_type = media_asset['asset_type']

            tts_path = generate_tts(text, 'en', temp_folder)
            if not tts_path:
                continue

            duration = max(3, len(text.split()) * 0.5)
            clip = create_clip(
                media_path, asset_type, tts_path, duration, text,
                text_color, text_size, caption_bg if captions == "Yes" else "transparent", target_resolution
            )
            if clip:
                clips.append(clip)

        if not clips:
            shutil.rmtree(temp_folder)
            return None

        final_video = concatenate_videoclips(clips, method="compose")
        final_video = add_background_music(final_video, custom_music, music_volume)
        final_video.write_videofile(OUTPUT_VIDEO_FILENAME, codec='libx264', fps=24)
        shutil.rmtree(temp_folder)
        return OUTPUT_VIDEO_FILENAME

    generate_video_btn.click(
        fn=generate_video_fn,
        inputs=[resolution, captions, video_percentage, text_color, text_size, caption_bg, music_volume, custom_music, num_clips, titles_state] + clip_textboxes + clip_files,
        outputs=[video_output]
    )

app.launch(share=True)