File size: 14,733 Bytes
48a6754
 
 
 
ab66965
 
48a6754
 
372c71b
48a6754
372c71b
48a6754
 
ab66965
48a6754
 
 
 
9577f14
372c71b
48a6754
ab66965
48a6754
79e0864
 
48a6754
372c71b
ab66965
 
48a6754
 
 
372c71b
48a6754
 
ab66965
372c71b
48a6754
ab66965
48a6754
 
ab66965
48a6754
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab66965
372c71b
48a6754
372c71b
48a6754
 
372c71b
a7ea9a7
48a6754
 
 
372c71b
48a6754
 
a7ea9a7
 
48a6754
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
372c71b
48a6754
 
 
 
 
372c71b
48a6754
ab66965
 
48a6754
 
 
 
9d0e100
48a6754
ab66965
48a6754
 
9d0e100
48a6754
9d0e100
 
48a6754
 
372c71b
48a6754
ab66965
 
 
 
48a6754
 
 
ab66965
 
48a6754
 
ab66965
48a6754
 
 
 
 
 
 
 
 
ab66965
 
48a6754
 
 
 
ab66965
48a6754
 
 
ab66965
48a6754
ab66965
48a6754
 
 
 
 
 
 
ab66965
48a6754
 
ab66965
48a6754
 
ab66965
 
 
 
 
48a6754
 
ab66965
 
48a6754
 
ab66965
 
48a6754
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab66965
48a6754
 
 
 
1942951
9d0e100
48a6754
 
ab66965
48a6754
 
ab66965
48a6754
 
 
 
 
ab66965
 
48a6754
ab66965
 
48a6754
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab66965
48a6754
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab66965
48a6754
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab66965
48a6754
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab66965
 
48a6754
 
 
 
9d0e100
372c71b
48a6754
 
 
372c71b
1125cdd
48a6754
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
# Import necessary libraries
import gradio as gr
import os
import shutil
import tempfile
import random
import requests
import soundfile as sf
from moviepy.editor import (
    VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip, CompositeVideoClip, TextClip
)
import moviepy.video.fx.all as vfx
from kokoro import KPipeline
from gtts import gTTS
from pydub import AudioSegment
import math
import re
from PIL import Image

# Initialize Kokoro TTS pipeline (using American English)
pipeline = KPipeline(lang_code='a')

# Global Configuration
PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
OPENROUTER_API_KEY = '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'
OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
OUTPUT_VIDEO_FILENAME = "final_video.mp4"
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"

# Helper Functions
def generate_script(user_input):
    """Generate a documentary script using OpenRouter API."""
    headers = {
        'Authorization': f'Bearer {OPENROUTER_API_KEY}',
        'HTTP-Referer': 'https://your-domain.com',
        'X-Title': 'AI Documentary Maker'
    }
    prompt = f"""Short Documentary Script GeneratorInstructions:
If I say "use this," output the script exactly as given.
If I give topics, generate a script based on them.
If I provide a full script, rewrite it unchanged. Keep it short, simple, humorous, and serious but funny. Use normal conversational text.
Formatting Rules:
- Title in square brackets: [Title]
- Each section starts with a one-word title in [ ] (max two words).
- Narration: 5-10 words, casual, funny, unpredictable.
- No special formatting, just script text.
- Generalized search terms for Pexels.
- End with a funny subscribe statement.
Example:
[North Korea]
Top 5 unknown facts about North Korea.
[Invisibility]
North Korea’s internet speed doesn’t exist.
[Leadership]
Kim Jong-un won 100% votes… against himself.
[Subscribe]
Subscribe, or Kim sends you a ticket to nowhere.
Topic: {user_input}
"""
    data = {
        'model': OPENROUTER_MODEL,
        'messages': [{'role': 'user', 'content': prompt}],
        'temperature': 0.4,
        'max_tokens': 5000
    }
    try:
        response = requests.post('https://openrouter.ai/api/v1/chat/completions', headers=headers, json=data, timeout=30)
        response.raise_for_status()
        return response.json()['choices'][0]['message']['content']
    except Exception as e:
        print(f"Script generation failed: {e}")
        return None

def parse_script(script_text):
    """Parse script into a list of elements with media prompts and TTS text."""
    elements = []
    lines = script_text.splitlines()
    for i in range(0, len(lines), 2):
        if i + 1 < len(lines) and lines[i].startswith('[') and lines[i].endswith(']'):
            title = lines[i][1:-1].strip()
            text = lines[i + 1].strip()
            if title and text:
                elements.append({'type': 'media', 'prompt': title})
                elements.append({'type': 'tts', 'text': text, 'voice': 'en'})
    return elements

def search_pexels_videos(query, api_key):
    """Search Pexels for a random HD video."""
    headers = {'Authorization': api_key}
    params = {"query": query, "per_page": 15}
    try:
        response = requests.get("https://api.pexels.com/videos/search", headers=headers, params=params, timeout=10)
        response.raise_for_status()
        videos = response.json().get("videos", [])
        hd_videos = [v["video_files"][0]["link"] for v in videos if v["video_files"] and v["video_files"][0]["quality"] == "hd"]
        return random.choice(hd_videos) if hd_videos else None
    except Exception as e:
        print(f"Pexels video search failed: {e}")
        return None

def search_pexels_images(query, api_key):
    """Search Pexels for a random image."""
    headers = {'Authorization': api_key}
    params = {"query": query, "per_page": 5, "orientation": "landscape"}
    try:
        response = requests.get("https://api.pexels.com/v1/search", headers=headers, params=params, timeout=10)
        response.raise_for_status()
        photos = response.json().get("photos", [])
        return random.choice(photos)["src"]["original"] if photos else None
    except Exception as e:
        print(f"Pexels image search failed: {e}")
        return None

def download_file(url, filename):
    """Download a file from a URL."""
    try:
        response = requests.get(url, stream=True, timeout=15)
        response.raise_for_status()
        with open(filename, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
        return filename
    except Exception as e:
        print(f"Download failed: {e}")
        return None

def generate_media(prompt, video_percentage, temp_folder):
    """Generate media based on prompt and video percentage."""
    safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
    if random.random() < video_percentage / 100:
        video_file = os.path.join(temp_folder, f"{safe_prompt}_video.mp4")
        video_url = search_pexels_videos(prompt, PEXELS_API_KEY)
        if video_url and download_file(video_url, video_file):
            return {"path": video_file, "asset_type": "video"}
    image_file = os.path.join(temp_folder, f"{safe_prompt}.jpg")
    image_url = search_pexels_images(prompt, PEXELS_API_KEY)
    if image_url and download_file(image_url, image_file):
        return {"path": image_file, "asset_type": "image"}
    return None

def generate_tts(text, voice, temp_folder):
    """Generate TTS audio with fallback."""
    safe_text = re.sub(r'[^\w\s-]', '', text[:10]).strip().replace(' ', '_')
    file_path = os.path.join(temp_folder, f"tts_{safe_text}.wav")
    try:
        generator = pipeline(text, voice='af_heart', speed=0.9)
        audio = next(generator)[2]
        sf.write(file_path, audio, 24000)
        return file_path
    except Exception:
        try:
            tts = gTTS(text=text, lang='en')
            mp3_path = os.path.join(temp_folder, f"tts_{safe_text}.mp3")
            tts.save(mp3_path)
            audio = AudioSegment.from_mp3(mp3_path)
            audio.export(file_path, format="wav")
            os.remove(mp3_path)
            return file_path
        except Exception as e:
            print(f"TTS generation failed: {e}")
            return None

def resize_to_fill(clip, target_resolution):
    """Resize and crop clip to fill target resolution."""
    target_w, target_h = target_resolution
    clip_aspect = clip.w / clip.h
    target_aspect = target_w / target_h
    if clip_aspect > target_aspect:
        clip = clip.resize(height=target_h)
        crop_amount = (clip.w - target_w) / 2
        clip = clip.crop(x1=crop_amount, x2=clip.w - crop_amount)
    else:
        clip = clip.resize(width=target_w)
        crop_amount = (clip.h - target_h) / 2
        clip = clip.crop(y1=crop_amount, y2=clip.h - crop_amount)
    return clip

def create_clip(media_path, asset_type, tts_path, duration, narration_text, text_color, text_size, caption_bg, target_resolution):
    """Create a video clip with media, TTS, and subtitles."""
    try:
        audio_clip = AudioFileClip(tts_path).audio_fadeout(0.2)
        target_duration = audio_clip.duration + 0.2
        if asset_type == "video":
            clip = VideoFileClip(media_path)
            clip = resize_to_fill(clip, target_resolution)
            clip = clip.loop(duration=target_duration) if clip.duration < target_duration else clip.subclip(0, target_duration)
        else:  # image
            clip = ImageClip(media_path).set_duration(target_duration).resize(target_resolution).fadein(0.3).fadeout(0.3)

        if narration_text and caption_bg != "transparent":
            words = narration_text.split()
            chunks = [' '.join(words[i:i+5]) for i in range(0, len(words), 5)]
            chunk_duration = audio_clip.duration / len(chunks)
            subtitle_clips = [
                TextClip(
                    chunk,
                    fontsize=text_size,
                    color=text_color,
                    bg_color=caption_bg,
                    size=(target_resolution[0] * 0.8, None),
                    method='caption',
                    align='center'
                ).set_position(('center', target_resolution[1] * 0.7)).set_start(i * chunk_duration).set_end((i + 1) * chunk_duration)
                for i, chunk in enumerate(chunks)
            ]
            clip = CompositeVideoClip([clip] + subtitle_clips)

        clip = clip.set_audio(audio_clip)
        return clip
    except Exception as e:
        print(f"Clip creation failed: {e}")
        return None

def add_background_music(final_video, custom_music_path, music_volume):
    """Add background music to the video."""
    try:
        if custom_music_path and os.path.exists(custom_music_path):
            bg_music = AudioFileClip(custom_music_path)
        else:
            bg_music = AudioFileClip("default_music.mp3")  # Assume a default music file exists
        if bg_music.duration < final_video.duration:
            bg_music = concatenate_audioclips([bg_music] * math.ceil(final_video.duration / bg_music.duration))
        bg_music = bg_music.subclip(0, final_video.duration).volumex(music_volume)
        final_video = final_video.set_audio(CompositeAudioClip([final_video.audio, bg_music]))
        return final_video
    except Exception as e:
        print(f"Background music failed: {e}")
        return final_video

# Gradio Interface
with gr.Blocks(title="AI Documentary Video Generator") as app:
    ### Initial Inputs
    with gr.Column():
        concept = gr.Textbox(label="Video Concept", placeholder="Enter your video concept...")
        resolution = gr.Radio(["Full", "Short"], label="Resolution", value="Full")
        captions = gr.Radio(["Yes", "No"], label="Captions", value="Yes")
        video_percentage = gr.Slider(0, 100, label="Video Percentage", value=50)
        text_color = gr.ColorPicker(label="Text Color", value="#FFFFFF")
        text_size = gr.Slider(20, 60, label="Text Size", value=28)
        caption_bg = gr.ColorPicker(label="Caption Background Color", value="transparent")
        music_volume = gr.Slider(0, 1, label="Music Volume", value=0.08)
        custom_music = gr.File(label="Upload Custom Background Music", type="file")
        generate_script_btn = gr.Button("Generate Script")

    ### States
    num_clips = gr.State(value=0)
    titles_state = gr.State(value=[])
    initial_texts_state = gr.State(value=[])

    ### Clip Editing Section
    with gr.Column(visible=False) as clip_section:
        clip_textboxes = []
        clip_files = []
        for i in range(10):  # Max 10 clips
            with gr.Row():
                text_box = gr.Textbox(label=f"Clip {i+1} Text", visible=False)
                file_upload = gr.File(label=f"Upload Media for Clip {i+1}", type="file", visible=False)
                clip_textboxes.append(text_box)
                clip_files.append(file_upload)
        generate_video_btn = gr.Button("Generate Video", visible=False)

    ### Output
    video_output = gr.Video(label="Generated Video")

    ### Script Generation Logic
    def generate_script_fn(concept):
        script = generate_script(concept)
        if not script:
            return 0, [], []
        elements = parse_script(script)
        titles = [e['prompt'] for e in elements if e['type'] == 'media']
        texts = [e['text'] for e in elements if e['type'] == 'tts']
        return len(titles), titles, texts

    def update_textboxes(texts):
        return [gr.update(value=texts[i] if i < len(texts) else "", visible=i < len(texts)) for i in range(10)]

    def update_files(n):
        return [gr.update(visible=i < n) for i in range(10)]

    generate_script_btn.click(
        fn=generate_script_fn,
        inputs=[concept],
        outputs=[num_clips, titles_state, initial_texts_state]
    ).then(
        fn=update_textboxes,
        inputs=[initial_texts_state],
        outputs=clip_textboxes
    ).then(
        fn=update_files,
        inputs=[num_clips],
        outputs=clip_files
    ).then(
        fn=lambda: gr.update(visible=True),
        outputs=[clip_section]
    ).then(
        fn=lambda: gr.update(visible=True),
        outputs=[generate_video_btn]
    )

    ### Video Generation Logic
    def generate_video_fn(resolution, captions, video_percentage, text_color, text_size, caption_bg, music_volume, custom_music, num_clips, titles, *clip_data):
        texts = clip_data[:10]
        files = clip_data[10:]
        temp_folder = tempfile.mkdtemp()
        target_resolution = (1920, 1080) if resolution == "Full" else (1080, 1920)
        clips = []

        for i in range(num_clips):
            text = texts[i]
            media_file = files[i]
            title = titles[i]
            if media_file:
                ext = os.path.splitext(media_file)[1].lower()
                media_path = os.path.join(temp_folder, f"clip_{i}{ext}")
                shutil.copy(media_file, media_path)
                asset_type = "video" if ext in ['.mp4', '.avi', '.mov'] else "image"
            else:
                media_asset = generate_media(title, video_percentage, temp_folder)
                if not media_asset:
                    continue
                media_path = media_asset['path']
                asset_type = media_asset['asset_type']

            tts_path = generate_tts(text, 'en', temp_folder)
            if not tts_path:
                continue

            duration = max(3, len(text.split()) * 0.5)
            clip = create_clip(
                media_path, asset_type, tts_path, duration, text,
                text_color, text_size, caption_bg if captions == "Yes" else "transparent", target_resolution
            )
            if clip:
                clips.append(clip)

        if not clips:
            shutil.rmtree(temp_folder)
            return None

        final_video = concatenate_videoclips(clips, method="compose")
        final_video = add_background_music(final_video, custom_music, music_volume)
        final_video.write_videofile(OUTPUT_VIDEO_FILENAME, codec='libx264', fps=24)
        shutil.rmtree(temp_folder)
        return OUTPUT_VIDEO_FILENAME

    generate_video_btn.click(
        fn=generate_video_fn,
        inputs=[resolution, captions, video_percentage, text_color, text_size, caption_bg, music_volume, custom_music, num_clips, titles_state] + clip_textboxes + clip_files,
        outputs=[video_output]
    )

app.launch(share=True)