Spaces:
Sleeping
Sleeping
Priyanshi Saxena
commited on
Commit
·
9451ca9
1
Parent(s):
8f482c1
initial commit
Browse files- .gitignore +46 -0
- Dockerfile +26 -0
- README.md +42 -0
- app.py +151 -0
- config/__init__.py +0 -0
- config/settings.py +36 -0
- requirements.txt +16 -0
- src/__init__.py +1 -0
- src/pipeline.py +74 -0
- src/prompt_generator.py +67 -0
- src/video_analyzer.py +75 -0
- src/video_generator.py +55 -0
- src/video_processor.py +110 -0
- utils/__init__.py +0 -0
- utils/effects.py +33 -0
- utils/helpers.py +21 -0
.gitignore
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
__pycache__/
|
2 |
+
*.py[cod]
|
3 |
+
*$py.class
|
4 |
+
*.so
|
5 |
+
.Python
|
6 |
+
build/
|
7 |
+
develop-eggs/
|
8 |
+
dist/
|
9 |
+
downloads/
|
10 |
+
eggs/
|
11 |
+
.eggs/
|
12 |
+
lib/
|
13 |
+
lib64/
|
14 |
+
parts/
|
15 |
+
sdist/
|
16 |
+
var/
|
17 |
+
wheels/
|
18 |
+
*.egg-info/
|
19 |
+
.installed.cfg
|
20 |
+
*.egg
|
21 |
+
|
22 |
+
.env
|
23 |
+
.venv
|
24 |
+
env/
|
25 |
+
venv/
|
26 |
+
ENV/
|
27 |
+
env.bak/
|
28 |
+
venv.bak/
|
29 |
+
|
30 |
+
.vscode/
|
31 |
+
.idea/
|
32 |
+
*.swp
|
33 |
+
*.swo
|
34 |
+
|
35 |
+
.DS_Store
|
36 |
+
Thumbs.db
|
37 |
+
|
38 |
+
/tmp/
|
39 |
+
*.tmp
|
40 |
+
*.temp
|
41 |
+
|
42 |
+
/models/
|
43 |
+
/cache/
|
44 |
+
|
45 |
+
.env.local
|
46 |
+
secrets.py
|
Dockerfile
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10-slim
|
2 |
+
|
3 |
+
RUN apt-get update && apt-get install -y \
|
4 |
+
ffmpeg \
|
5 |
+
libsm6 \
|
6 |
+
libxext6 \
|
7 |
+
libxrender-dev \
|
8 |
+
libgomp1 \
|
9 |
+
git \
|
10 |
+
&& rm -rf /var/lib/apt/lists/*
|
11 |
+
|
12 |
+
WORKDIR /app
|
13 |
+
|
14 |
+
COPY requirements.txt .
|
15 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
16 |
+
|
17 |
+
COPY . .
|
18 |
+
|
19 |
+
RUN mkdir -p /tmp/ad_processing /tmp/ad_cache
|
20 |
+
|
21 |
+
EXPOSE 7860
|
22 |
+
|
23 |
+
ENV GRADIO_SERVER_NAME=0.0.0.0
|
24 |
+
ENV GRADIO_SERVER_PORT=7860
|
25 |
+
|
26 |
+
CMD ["python", "app.py"]
|
README.md
CHANGED
@@ -11,3 +11,45 @@ license: mit
|
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
14 |
+
|
15 |
+
# 🎬 Enhanced AI Ad Completion Tool
|
16 |
+
|
17 |
+
Transform your videos into professional commercials using AI! This tool combines multiple videos, generates new content using Stable Video Diffusion, and creates polished ads with intelligent prompt generation powered by Google's Gemini API.
|
18 |
+
|
19 |
+
## Features
|
20 |
+
|
21 |
+
- 🎥 **Dual Video Upload**: Combine 1 or 2 videos seamlessly
|
22 |
+
- 🤖 **AI Video Generation**: Create new content using Stable Video Diffusion
|
23 |
+
- 🧠 **Gemini Integration**: Intelligent prompt generation based on video analysis
|
24 |
+
- 🎨 **Professional Processing**: Color grading, smooth transitions, and effects
|
25 |
+
- ⚡ **Smart Fallbacks**: Reliable processing even when AI models fail
|
26 |
+
- 🎯 **Flexible Duration**: Extend videos to your target length
|
27 |
+
|
28 |
+
## How to Use
|
29 |
+
|
30 |
+
1. **Upload Videos**: Add your primary video (required) and optionally a secondary video
|
31 |
+
2. **Set Duration**: Choose your target duration (10-45 seconds)
|
32 |
+
3. **Configure AI**: Enable/disable AI video generation
|
33 |
+
4. **Process**: Click "Create Enhanced Ad" and wait for processing
|
34 |
+
5. **Download**: Get your professionally enhanced commercial!
|
35 |
+
|
36 |
+
## API Requirements
|
37 |
+
|
38 |
+
- **Gemini API Key**: Get your free API key from [Google AI Studio](https://makersuite.google.com/app/apikey)
|
39 |
+
- **GPU Access**: HF Spaces provides free GPU access for video generation
|
40 |
+
|
41 |
+
## Technical Details
|
42 |
+
|
43 |
+
- **Video Analysis**: OpenCV + MediaPipe + Whisper
|
44 |
+
- **AI Generation**: Stable Video Diffusion XL
|
45 |
+
- **Prompt Enhancement**: Google Gemini 1.5 Flash
|
46 |
+
- **Processing**: MoviePy + FFmpeg
|
47 |
+
- **Interface**: Gradio 4.44.0
|
48 |
+
|
49 |
+
## Development
|
50 |
+
|
51 |
+
Built with modern AI technologies for maximum quality and reliability. The tool automatically handles various video formats, resolutions, and provides intelligent fallbacks.
|
52 |
+
|
53 |
+
## License
|
54 |
+
|
55 |
+
Apache 2.0 - Feel free to use and modify!
|
app.py
ADDED
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import os
|
3 |
+
import warnings
|
4 |
+
warnings.filterwarnings('ignore')
|
5 |
+
|
6 |
+
from src.pipeline import EnhancedAdPipeline
|
7 |
+
from config.settings import UI_CONFIG
|
8 |
+
|
9 |
+
def create_interface():
|
10 |
+
pipeline = EnhancedAdPipeline()
|
11 |
+
|
12 |
+
def process_videos_ui(video1, video2, target_duration, use_ai, gemini_key, progress=gr.Progress()):
|
13 |
+
if video1 is None:
|
14 |
+
return None, "❌ Please upload at least the first video"
|
15 |
+
|
16 |
+
try:
|
17 |
+
if gemini_key and gemini_key.strip():
|
18 |
+
pipeline.set_gemini_key(gemini_key.strip())
|
19 |
+
|
20 |
+
progress(0.1, desc="🔍 Analyzing videos...")
|
21 |
+
|
22 |
+
result = pipeline.process_videos(video1, video2, target_duration, use_ai, progress)
|
23 |
+
|
24 |
+
if result['success']:
|
25 |
+
progress(1.0, desc="🎉 Complete!")
|
26 |
+
|
27 |
+
status = f"✅ Video processing successful!\n"
|
28 |
+
status += f"📊 Original duration: {result.get('original_duration', 0):.1f}s\n"
|
29 |
+
if result.get('second_video'):
|
30 |
+
status += f"📊 Second video included\n"
|
31 |
+
if result.get('ai_generated'):
|
32 |
+
status += "🤖 AI-generated extension added\n"
|
33 |
+
status += f"🎯 Final duration: ~{target_duration}s"
|
34 |
+
|
35 |
+
return result['output_video'], status
|
36 |
+
else:
|
37 |
+
return None, f"❌ Error: {result.get('error', 'Unknown error')}"
|
38 |
+
|
39 |
+
except Exception as e:
|
40 |
+
return None, f"❌ Error: {str(e)}"
|
41 |
+
|
42 |
+
with gr.Blocks(**UI_CONFIG) as app:
|
43 |
+
|
44 |
+
gr.Markdown("""
|
45 |
+
|
46 |
+
🎬 Enhanced AI Ad Completion Tool
|
47 |
+
|
48 |
+
|
49 |
+
**Transform your videos into professional commercials using AI!**
|
50 |
+
|
51 |
+
✨ **Features:** Dual video upload • AI video generation • Gemini-powered prompts • Professional processing
|
52 |
+
""")
|
53 |
+
|
54 |
+
with gr.Row():
|
55 |
+
with gr.Column(scale=1):
|
56 |
+
gr.Markdown("### 📹 Video Inputs")
|
57 |
+
|
58 |
+
video1_input = gr.Video(
|
59 |
+
label="🎬 Primary Video (Required)",
|
60 |
+
format="mp4"
|
61 |
+
)
|
62 |
+
|
63 |
+
video2_input = gr.Video(
|
64 |
+
label="🎬 Secondary Video (Optional)",
|
65 |
+
format="mp4"
|
66 |
+
)
|
67 |
+
|
68 |
+
gr.Markdown("### ⚙️ Settings")
|
69 |
+
|
70 |
+
duration_slider = gr.Slider(
|
71 |
+
minimum=10,
|
72 |
+
maximum=45,
|
73 |
+
value=20,
|
74 |
+
step=1,
|
75 |
+
label="🎯 Target Duration (seconds)"
|
76 |
+
)
|
77 |
+
|
78 |
+
ai_toggle = gr.Checkbox(
|
79 |
+
value=True,
|
80 |
+
label="🤖 Use AI Video Generation",
|
81 |
+
info="Generate new content using AI (requires GPU)"
|
82 |
+
)
|
83 |
+
|
84 |
+
gemini_key_input = gr.Textbox(
|
85 |
+
label="🔑 Gemini API Key (Optional)",
|
86 |
+
placeholder="Enter your Gemini API key for better prompts",
|
87 |
+
type="password"
|
88 |
+
)
|
89 |
+
|
90 |
+
process_btn = gr.Button(
|
91 |
+
"🚀 Create Enhanced Ad",
|
92 |
+
variant="primary",
|
93 |
+
size="lg"
|
94 |
+
)
|
95 |
+
|
96 |
+
gr.Markdown("""
|
97 |
+
### 📋 How to Use:
|
98 |
+
1. **Upload primary video** (your main content)
|
99 |
+
2. **Optional**: Upload second video to combine
|
100 |
+
3. **Set target duration** for final output
|
101 |
+
4. **Optional**: Add Gemini API key for better prompts
|
102 |
+
5. **Enable AI generation** for new content
|
103 |
+
6. **Click "Create Enhanced Ad"**
|
104 |
+
|
105 |
+
**Get Gemini API Key:** [Google AI Studio](https://makersuite.google.com/app/apikey) (Free)
|
106 |
+
""")
|
107 |
+
|
108 |
+
with gr.Column(scale=2):
|
109 |
+
gr.Markdown("### 🎉 Output")
|
110 |
+
|
111 |
+
video_output = gr.Video(
|
112 |
+
label="Enhanced Ad Video",
|
113 |
+
format="mp4"
|
114 |
+
)
|
115 |
+
|
116 |
+
status_output = gr.Textbox(
|
117 |
+
label="📋 Processing Status",
|
118 |
+
interactive=False,
|
119 |
+
lines=6
|
120 |
+
)
|
121 |
+
|
122 |
+
gr.Markdown("""
|
123 |
+
### 🎨 What This Tool Does:
|
124 |
+
|
125 |
+
**🔍 Video Analysis:**
|
126 |
+
- Extracts key frames and visual elements
|
127 |
+
- Transcribes audio content using Whisper
|
128 |
+
- Analyzes color palette and scene classification
|
129 |
+
|
130 |
+
**🤖 AI Enhancement:**
|
131 |
+
- Uses Gemini API for intelligent prompt generation
|
132 |
+
- Generates new video content with Stable Video Diffusion
|
133 |
+
- Applies professional color grading and effects
|
134 |
+
|
135 |
+
**🎬 Smart Processing:**
|
136 |
+
- Combines multiple videos with smooth transitions
|
137 |
+
- Extends to target duration intelligently
|
138 |
+
- Preserves audio quality throughout
|
139 |
+
""")
|
140 |
+
|
141 |
+
process_btn.click(
|
142 |
+
fn=process_videos_ui,
|
143 |
+
inputs=[video1_input, video2_input, duration_slider, ai_toggle, gemini_key_input],
|
144 |
+
outputs=[video_output, status_output]
|
145 |
+
)
|
146 |
+
|
147 |
+
return app
|
148 |
+
|
149 |
+
if __name__ == "__main__":
|
150 |
+
app = create_interface()
|
151 |
+
app.launch(server_name="0.0.0.0", server_port=7860, share=True)
|
config/__init__.py
ADDED
File without changes
|
config/settings.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
DEVICE = "cuda" if os.environ.get('CUDA_AVAILABLE') == 'true' else "cpu"
|
4 |
+
|
5 |
+
MODEL_CONFIG = {
|
6 |
+
'whisper_model': 'tiny',
|
7 |
+
'svd_model': 'stabilityai/stable-video-diffusion-img2vid-xt',
|
8 |
+
'torch_dtype': 'float16' if DEVICE == 'cuda' else 'float32',
|
9 |
+
}
|
10 |
+
|
11 |
+
PROCESSING_CONFIG = {
|
12 |
+
'max_frames': 25,
|
13 |
+
'generation_steps': 8,
|
14 |
+
'guidance_scale': 3.0,
|
15 |
+
'fps': 24,
|
16 |
+
'default_resolution': (1024, 576),
|
17 |
+
'fallback_resolution': (512, 320),
|
18 |
+
}
|
19 |
+
|
20 |
+
UI_CONFIG = {
|
21 |
+
'title': "Enhanced AI Ad Tool",
|
22 |
+
'theme': "soft",
|
23 |
+
'css': """
|
24 |
+
.gradio-container {
|
25 |
+
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
26 |
+
}
|
27 |
+
"""
|
28 |
+
}
|
29 |
+
|
30 |
+
PATHS = {
|
31 |
+
'temp_dir': '/tmp/ad_processing',
|
32 |
+
'cache_dir': '/tmp/ad_cache',
|
33 |
+
}
|
34 |
+
|
35 |
+
os.makedirs(PATHS['temp_dir'], exist_ok=True)
|
36 |
+
os.makedirs(PATHS['cache_dir'], exist_ok=True)
|
requirements.txt
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio==4.44.0
|
2 |
+
torch==2.1.0
|
3 |
+
torchvision==0.16.0
|
4 |
+
torchaudio==2.1.0
|
5 |
+
diffusers==0.24.0
|
6 |
+
transformers==4.36.0
|
7 |
+
accelerate==0.25.0
|
8 |
+
opencv-python==4.8.1.78
|
9 |
+
Pillow==10.1.0
|
10 |
+
moviepy==1.0.3
|
11 |
+
requests==2.31.0
|
12 |
+
openai-whisper==20231117
|
13 |
+
google-generativeai==0.3.2
|
14 |
+
imageio[ffmpeg]==2.33.1
|
15 |
+
scikit-learn==1.3.2
|
16 |
+
numpy==1.24.3
|
src/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
__version__ = "1.0.0"
|
src/pipeline.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .video_analyzer import VideoAnalyzer
|
2 |
+
from .prompt_generator import PromptGenerator
|
3 |
+
from .video_generator import VideoGenerator
|
4 |
+
from .video_processor import VideoProcessor
|
5 |
+
import os
|
6 |
+
|
7 |
+
class EnhancedAdPipeline:
|
8 |
+
def __init__(self):
|
9 |
+
self.analyzer = VideoAnalyzer()
|
10 |
+
self.prompt_gen = PromptGenerator()
|
11 |
+
self.video_gen = VideoGenerator()
|
12 |
+
self.processor = VideoProcessor()
|
13 |
+
|
14 |
+
def set_gemini_key(self, key):
|
15 |
+
self.prompt_gen.set_api_key(key)
|
16 |
+
|
17 |
+
def process_videos(self, video1_path, video2_path, target_duration, use_ai, progress=None):
|
18 |
+
if progress:
|
19 |
+
progress(0.1, desc="Analyzing first video...")
|
20 |
+
|
21 |
+
analysis1 = self.analyzer.analyze_video(video1_path)
|
22 |
+
if not analysis1:
|
23 |
+
return {'success': False, 'error': 'Failed to analyze first video'}
|
24 |
+
|
25 |
+
if progress:
|
26 |
+
progress(0.3, desc="Merging videos...")
|
27 |
+
|
28 |
+
merged_video = self.processor.merge_videos(video1_path, video2_path, target_duration)
|
29 |
+
if not merged_video:
|
30 |
+
return {'success': False, 'error': 'Failed to merge videos'}
|
31 |
+
|
32 |
+
current_duration = analysis1['duration']
|
33 |
+
if video2_path and os.path.exists(video2_path):
|
34 |
+
analysis2 = self.analyzer.analyze_video(video2_path)
|
35 |
+
if analysis2:
|
36 |
+
current_duration += analysis2['duration']
|
37 |
+
|
38 |
+
final_video = merged_video
|
39 |
+
ai_generated = False
|
40 |
+
|
41 |
+
if use_ai and current_duration < target_duration and analysis1.get('last_frame') is not None:
|
42 |
+
if progress:
|
43 |
+
progress(0.5, desc="Generating AI prompt...")
|
44 |
+
|
45 |
+
prompt = self.prompt_gen.generate_video_prompt(analysis1, target_duration)
|
46 |
+
|
47 |
+
if progress:
|
48 |
+
progress(0.6, desc="Generating AI video...")
|
49 |
+
|
50 |
+
ai_frames = self.video_gen.generate_video(
|
51 |
+
analysis1['last_frame'],
|
52 |
+
prompt,
|
53 |
+
duration=min(3, target_duration - current_duration)
|
54 |
+
)
|
55 |
+
|
56 |
+
if ai_frames:
|
57 |
+
if progress:
|
58 |
+
progress(0.8, desc="Adding AI extension...")
|
59 |
+
|
60 |
+
extended_video = self.processor.add_ai_extension(merged_video, ai_frames, target_duration)
|
61 |
+
if extended_video:
|
62 |
+
final_video = extended_video
|
63 |
+
ai_generated = True
|
64 |
+
|
65 |
+
if progress:
|
66 |
+
progress(0.95, desc="Finalizing...")
|
67 |
+
|
68 |
+
return {
|
69 |
+
'success': True,
|
70 |
+
'output_video': final_video,
|
71 |
+
'original_duration': analysis1['duration'],
|
72 |
+
'second_video': video2_path is not None,
|
73 |
+
'ai_generated': ai_generated
|
74 |
+
}
|
src/prompt_generator.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import google.generativeai as genai
|
2 |
+
from config.settings import PROCESSING_CONFIG
|
3 |
+
|
4 |
+
class PromptGenerator:
|
5 |
+
def __init__(self):
|
6 |
+
self.model = None
|
7 |
+
self.gemini_key = None
|
8 |
+
|
9 |
+
def set_api_key(self, key):
|
10 |
+
if key and key.strip():
|
11 |
+
try:
|
12 |
+
genai.configure(api_key=key.strip())
|
13 |
+
self.model = genai.GenerativeModel('gemini-1.5-flash')
|
14 |
+
self.gemini_key = key.strip()
|
15 |
+
except:
|
16 |
+
self.model = None
|
17 |
+
|
18 |
+
def generate_video_prompt(self, analysis, target_duration):
|
19 |
+
if not self.model or not analysis:
|
20 |
+
return self._fallback_prompt(analysis)
|
21 |
+
|
22 |
+
try:
|
23 |
+
context = f"""
|
24 |
+
Video Analysis:
|
25 |
+
- Duration: {analysis.get('duration', 0):.1f}s
|
26 |
+
- Transcript: {analysis.get('transcript', 'No audio')}
|
27 |
+
- Scene type: {analysis.get('scene_type', 'unknown')}
|
28 |
+
- Colors: {', '.join(analysis.get('dominant_colors', [])[:3])}
|
29 |
+
|
30 |
+
Target: {target_duration}s commercial
|
31 |
+
|
32 |
+
Generate a creative prompt for Stable Video Diffusion to extend this commercial.
|
33 |
+
Keep it under 100 words, focus on visual elements, camera movement, and commercial appeal.
|
34 |
+
"""
|
35 |
+
|
36 |
+
response = self.model.generate_content(context)
|
37 |
+
prompt = response.text.strip()
|
38 |
+
|
39 |
+
if len(prompt) > 20:
|
40 |
+
return prompt
|
41 |
+
else:
|
42 |
+
return self._fallback_prompt(analysis)
|
43 |
+
|
44 |
+
except Exception as e:
|
45 |
+
return self._fallback_prompt(analysis)
|
46 |
+
|
47 |
+
def _fallback_prompt(self, analysis):
|
48 |
+
if not analysis:
|
49 |
+
return "professional commercial product showcase, smooth camera movement, high quality, cinematic lighting"
|
50 |
+
|
51 |
+
scene_type = analysis.get('scene_type', 'product')
|
52 |
+
transcript = analysis.get('transcript', '').lower()
|
53 |
+
|
54 |
+
prompts = {
|
55 |
+
'lifestyle': "vibrant lifestyle commercial, people enjoying, dynamic movement, warm lighting",
|
56 |
+
'tech': "sleek tech commercial, modern interface, clean design, professional lighting",
|
57 |
+
'product': "premium product showcase, elegant presentation, studio lighting"
|
58 |
+
}
|
59 |
+
|
60 |
+
base_prompt = prompts.get(scene_type, prompts['product'])
|
61 |
+
|
62 |
+
if 'outdoor' in transcript:
|
63 |
+
base_prompt += ", outdoor setting"
|
64 |
+
elif 'indoor' in transcript:
|
65 |
+
base_prompt += ", indoor environment"
|
66 |
+
|
67 |
+
return base_prompt + ", commercial quality, smooth transitions"
|
src/video_analyzer.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import numpy as np
|
3 |
+
import whisper
|
4 |
+
from sklearn.cluster import KMeans
|
5 |
+
|
6 |
+
class VideoAnalyzer:
|
7 |
+
def __init__(self):
|
8 |
+
self.whisper_model = None
|
9 |
+
try:
|
10 |
+
self.whisper_model = whisper.load_model("tiny")
|
11 |
+
except:
|
12 |
+
pass
|
13 |
+
|
14 |
+
def analyze_video(self, video_path):
|
15 |
+
try:
|
16 |
+
cap = cv2.VideoCapture(video_path)
|
17 |
+
fps = cap.get(cv2.CAP_PROP_FPS) or 24
|
18 |
+
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
19 |
+
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
20 |
+
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
21 |
+
duration = frame_count / fps
|
22 |
+
|
23 |
+
frames = []
|
24 |
+
for idx in [0, frame_count//4, frame_count//2, 3*frame_count//4, frame_count-1]:
|
25 |
+
cap.set(cv2.CAP_PROP_POS_FRAMES, min(idx, frame_count-1))
|
26 |
+
ret, frame = cap.read()
|
27 |
+
if ret:
|
28 |
+
frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
29 |
+
|
30 |
+
cap.release()
|
31 |
+
|
32 |
+
transcript = ""
|
33 |
+
if self.whisper_model:
|
34 |
+
try:
|
35 |
+
result = self.whisper_model.transcribe(video_path)
|
36 |
+
transcript = result["text"].strip()
|
37 |
+
except:
|
38 |
+
pass
|
39 |
+
|
40 |
+
colors = ['#808080']
|
41 |
+
if frames:
|
42 |
+
try:
|
43 |
+
all_pixels = []
|
44 |
+
for frame in frames[:3]:
|
45 |
+
resized = cv2.resize(frame, (50, 50))
|
46 |
+
pixels = resized.reshape(-1, 3)
|
47 |
+
all_pixels.extend(pixels[::5])
|
48 |
+
|
49 |
+
if all_pixels:
|
50 |
+
kmeans = KMeans(n_clusters=5, random_state=42, n_init=10)
|
51 |
+
kmeans.fit(all_pixels)
|
52 |
+
colors = [f"#{r:02x}{g:02x}{b:02x}" for r, g, b in kmeans.cluster_centers_.astype(int)]
|
53 |
+
except:
|
54 |
+
pass
|
55 |
+
|
56 |
+
scene_type = "product"
|
57 |
+
if transcript:
|
58 |
+
transcript_lower = transcript.lower()
|
59 |
+
if any(word in transcript_lower for word in ['happy', 'fun', 'joy', 'celebration']):
|
60 |
+
scene_type = "lifestyle"
|
61 |
+
elif any(word in transcript_lower for word in ['tech', 'innovation', 'digital']):
|
62 |
+
scene_type = "tech"
|
63 |
+
|
64 |
+
return {
|
65 |
+
'duration': duration,
|
66 |
+
'fps': fps,
|
67 |
+
'resolution': (width, height),
|
68 |
+
'frames': frames,
|
69 |
+
'last_frame': frames[-1] if frames else None,
|
70 |
+
'transcript': transcript,
|
71 |
+
'dominant_colors': colors,
|
72 |
+
'scene_type': scene_type
|
73 |
+
}
|
74 |
+
except Exception as e:
|
75 |
+
return None
|
src/video_generator.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from diffusers import StableVideoDiffusionPipeline
|
3 |
+
from PIL import Image
|
4 |
+
import numpy as np
|
5 |
+
from config.settings import DEVICE, MODEL_CONFIG, PROCESSING_CONFIG
|
6 |
+
|
7 |
+
class VideoGenerator:
|
8 |
+
def __init__(self):
|
9 |
+
self.pipeline = None
|
10 |
+
self._load_models()
|
11 |
+
|
12 |
+
def _load_models(self):
|
13 |
+
if DEVICE == "cuda":
|
14 |
+
try:
|
15 |
+
self.pipeline = StableVideoDiffusionPipeline.from_pretrained(
|
16 |
+
MODEL_CONFIG['svd_model'],
|
17 |
+
torch_dtype=torch.float16,
|
18 |
+
variant="fp16"
|
19 |
+
)
|
20 |
+
self.pipeline.to("cuda")
|
21 |
+
self.pipeline.enable_model_cpu_offload()
|
22 |
+
except Exception as e:
|
23 |
+
print(f"Failed to load SVD: {e}")
|
24 |
+
|
25 |
+
def generate_video(self, image, prompt, duration=2):
|
26 |
+
if not self.pipeline:
|
27 |
+
return None
|
28 |
+
|
29 |
+
try:
|
30 |
+
if isinstance(image, np.ndarray):
|
31 |
+
if image.dtype == np.float32:
|
32 |
+
image = (image * 255).astype(np.uint8)
|
33 |
+
image = Image.fromarray(image)
|
34 |
+
|
35 |
+
image = image.resize(PROCESSING_CONFIG['default_resolution'])
|
36 |
+
|
37 |
+
num_frames = min(PROCESSING_CONFIG['max_frames'], int(duration * PROCESSING_CONFIG['fps']))
|
38 |
+
|
39 |
+
frames = self.pipeline(
|
40 |
+
image,
|
41 |
+
height=PROCESSING_CONFIG['default_resolution'][1],
|
42 |
+
width=PROCESSING_CONFIG['default_resolution'][0],
|
43 |
+
num_frames=num_frames,
|
44 |
+
decode_chunk_size=8,
|
45 |
+
motion_bucket_id=127,
|
46 |
+
fps=7,
|
47 |
+
noise_aug_strength=0.02,
|
48 |
+
num_inference_steps=PROCESSING_CONFIG['generation_steps']
|
49 |
+
).frames[0]
|
50 |
+
|
51 |
+
return [np.array(frame) for frame in frames]
|
52 |
+
|
53 |
+
except Exception as e:
|
54 |
+
print(f"Video generation failed: {e}")
|
55 |
+
return None
|
src/video_processor.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import moviepy.editor as mp
|
2 |
+
import numpy as np
|
3 |
+
import os
|
4 |
+
from config.settings import PATHS, PROCESSING_CONFIG
|
5 |
+
|
6 |
+
class VideoProcessor:
|
7 |
+
def __init__(self):
|
8 |
+
pass
|
9 |
+
|
10 |
+
def merge_videos(self, video1_path, video2_path=None, target_duration=20):
|
11 |
+
try:
|
12 |
+
clips = []
|
13 |
+
total_duration = 0
|
14 |
+
|
15 |
+
clip1 = mp.VideoFileClip(video1_path)
|
16 |
+
clips.append(clip1)
|
17 |
+
total_duration += clip1.duration
|
18 |
+
|
19 |
+
if video2_path and os.path.exists(video2_path):
|
20 |
+
clip2 = mp.VideoFileClip(video2_path)
|
21 |
+
clips.append(clip2.crossfadein(0.5))
|
22 |
+
total_duration += clip2.duration
|
23 |
+
|
24 |
+
if len(clips) > 1:
|
25 |
+
merged = mp.concatenate_videoclips(clips, method="compose")
|
26 |
+
else:
|
27 |
+
merged = clips[0]
|
28 |
+
|
29 |
+
if total_duration < target_duration:
|
30 |
+
extend_time = target_duration - total_duration
|
31 |
+
if extend_time > 0:
|
32 |
+
loop_times = int(extend_time / merged.duration) + 1
|
33 |
+
extended = mp.concatenate_videoclips([merged] * (loop_times + 1))
|
34 |
+
merged = extended.subclip(0, target_duration)
|
35 |
+
elif total_duration > target_duration:
|
36 |
+
merged = merged.subclip(0, target_duration)
|
37 |
+
|
38 |
+
merged = self._apply_effects(merged)
|
39 |
+
|
40 |
+
output_path = os.path.join(PATHS['temp_dir'], f"merged_{hash(video1_path)}.mp4")
|
41 |
+
merged.write_videofile(
|
42 |
+
output_path,
|
43 |
+
fps=PROCESSING_CONFIG['fps'],
|
44 |
+
codec='libx264',
|
45 |
+
audio_codec='aac',
|
46 |
+
temp_audiofile=os.path.join(PATHS['temp_dir'], 'temp_audio.m4a'),
|
47 |
+
remove_temp=True,
|
48 |
+
logger=None,
|
49 |
+
verbose=False
|
50 |
+
)
|
51 |
+
|
52 |
+
for clip in clips:
|
53 |
+
clip.close()
|
54 |
+
merged.close()
|
55 |
+
|
56 |
+
return output_path
|
57 |
+
|
58 |
+
except Exception as e:
|
59 |
+
print(f"Video merge failed: {e}")
|
60 |
+
return None
|
61 |
+
|
62 |
+
def add_ai_extension(self, video_path, ai_frames, target_duration):
|
63 |
+
if not ai_frames or not video_path:
|
64 |
+
return video_path
|
65 |
+
|
66 |
+
try:
|
67 |
+
main_clip = mp.VideoFileClip(video_path)
|
68 |
+
|
69 |
+
frame_array = np.array(ai_frames)
|
70 |
+
ai_clip = mp.ImageSequenceClip([frame for frame in frame_array], fps=PROCESSING_CONFIG['fps'])
|
71 |
+
|
72 |
+
ai_clip = ai_clip.resize(main_clip.size)
|
73 |
+
|
74 |
+
combined = mp.concatenate_videoclips([main_clip, ai_clip.crossfadein(0.3)])
|
75 |
+
|
76 |
+
if combined.duration > target_duration:
|
77 |
+
combined = combined.subclip(0, target_duration)
|
78 |
+
|
79 |
+
output_path = os.path.join(PATHS['temp_dir'], f"extended_{hash(video_path)}.mp4")
|
80 |
+
combined.write_videofile(
|
81 |
+
output_path,
|
82 |
+
fps=PROCESSING_CONFIG['fps'],
|
83 |
+
codec='libx264',
|
84 |
+
temp_audiofile=os.path.join(PATHS['temp_dir'], 'temp_audio2.m4a'),
|
85 |
+
remove_temp=True,
|
86 |
+
logger=None,
|
87 |
+
verbose=False
|
88 |
+
)
|
89 |
+
|
90 |
+
main_clip.close()
|
91 |
+
ai_clip.close()
|
92 |
+
combined.close()
|
93 |
+
|
94 |
+
return output_path
|
95 |
+
|
96 |
+
except Exception as e:
|
97 |
+
print(f"AI extension failed: {e}")
|
98 |
+
return video_path
|
99 |
+
|
100 |
+
def _apply_effects(self, clip):
|
101 |
+
try:
|
102 |
+
def color_correct(get_frame, t):
|
103 |
+
frame = get_frame(t)
|
104 |
+
enhanced = np.clip(frame * 1.05 + 2, 0, 255)
|
105 |
+
enhanced[:, :, 0] = np.clip(enhanced[:, :, 0] * 1.02, 0, 255)
|
106 |
+
return enhanced.astype(np.uint8)
|
107 |
+
|
108 |
+
return clip.fl(color_correct)
|
109 |
+
except:
|
110 |
+
return clip
|
utils/__init__.py
ADDED
File without changes
|
utils/effects.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import cv2
|
3 |
+
|
4 |
+
class ColorGrading:
|
5 |
+
def apply_commercial_grade(self, clip):
|
6 |
+
def color_grade(get_frame, t):
|
7 |
+
frame = get_frame(t)
|
8 |
+
enhanced = np.clip(frame * 1.05 + 3, 0, 255)
|
9 |
+
enhanced = self._temperature_adjust(enhanced)
|
10 |
+
return enhanced.astype(np.uint8)
|
11 |
+
return clip.fl(color_grade)
|
12 |
+
|
13 |
+
def _temperature_adjust(self, frame):
|
14 |
+
frame[:, :, 0] = np.clip(frame[:, :, 0] * 1.02, 0, 255)
|
15 |
+
frame[:, :, 2] = np.clip(frame[:, :, 2] * 0.98, 0, 255)
|
16 |
+
return frame
|
17 |
+
|
18 |
+
class TransitionEffects:
|
19 |
+
def crossfade(self, clip, duration):
|
20 |
+
return clip.crossfadein(duration)
|
21 |
+
|
22 |
+
def slow_zoom(self, clip, zoom_factor=0.05):
|
23 |
+
def zoom_effect(get_frame, t):
|
24 |
+
frame = get_frame(t)
|
25 |
+
progress = t / clip.duration
|
26 |
+
current_zoom = 1 + progress * zoom_factor
|
27 |
+
h, w = frame.shape[:2]
|
28 |
+
new_h, new_w = int(h / current_zoom), int(w / current_zoom)
|
29 |
+
start_y = (h - new_h) // 2
|
30 |
+
start_x = (w - new_w) // 2
|
31 |
+
cropped = frame[start_y:start_y + new_h, start_x:start_x + new_w]
|
32 |
+
return cv2.resize(cropped, (w, h))
|
33 |
+
return clip.fl(zoom_effect)
|
utils/helpers.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import os
|
3 |
+
|
4 |
+
def get_video_info(video_path):
|
5 |
+
try:
|
6 |
+
cap = cv2.VideoCapture(video_path)
|
7 |
+
fps = cap.get(cv2.CAP_PROP_FPS) or 24
|
8 |
+
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
9 |
+
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
10 |
+
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
11 |
+
duration = frame_count / fps if fps > 0 else 0
|
12 |
+
cap.release()
|
13 |
+
return {'fps': fps, 'duration': duration, 'resolution': (width, height), 'frame_count': frame_count}
|
14 |
+
except:
|
15 |
+
return None
|
16 |
+
|
17 |
+
def validate_video_file(video_path):
|
18 |
+
if not video_path or not os.path.exists(video_path):
|
19 |
+
return False
|
20 |
+
info = get_video_info(video_path)
|
21 |
+
return info is not None and info['duration'] > 0
|