videoxity / app.py
zamalali
Clean push: only core files
9a14671
raw
history blame
8.89 kB
import os
import cv2
import gradio as gr
from dotenv import load_dotenv
import spaces
from main import (
run,
detect_scenes,
extract_keyframes,
generate_scene_caption,
generate_video_summary,
generate_video_summary_groq,
vqa_matches,
semantic_matches,
remove_scenes,
)
# Load environment variables
load_dotenv()
if not os.getenv("HF_TOKEN"):
raise ValueError("❌ Error: HF_TOKEN not found in .env file")
@spaces.GPU
def process_video(video_path, query, progress=gr.Progress()):
"""Scene‐filtering tab: remove scenes matching the query."""
try:
os.makedirs("outputs", exist_ok=True)
output_path = os.path.join("outputs", "trimmed_video.mp4")
# 1) Detect scenes
progress(0.0, desc="Detecting scenes...")
scenes = detect_scenes(video_path)
# 2) Extract keyframes
progress(0.2, desc="Extracting keyframes...")
keyframes = extract_keyframes(video_path, scenes)
# 3) Caption each keyframe
progress(0.4, desc="Generating captions...")
captions = [generate_scene_caption(frame) for _, frame in keyframes]
# 4) VQA + semantic filtering
progress(0.6, desc="Analyzing scenes...")
vqa_mask = vqa_matches(keyframes, query)
sem_idxs, _= semantic_matches(captions, query)
# 5) Build removal list
to_remove = sorted({i for i, flag in enumerate(vqa_mask) if flag} | set(sem_idxs))
# 6) Trim via ffmpeg
progress(0.8, desc="Processing video...")
if to_remove:
remove_scenes(video_path, scenes, to_remove, output_path)
# Verify the output video
if not os.path.exists(output_path):
return None, "❌ Error: Failed to create output video"
# Check if video is valid
cap = cv2.VideoCapture(output_path)
if not cap.isOpened():
return None, "❌ Error: Generated video is invalid"
cap.release()
stats = [
"✅ Processing complete!",
f"📊 Total scenes: {len(scenes)}",
f"🗑️ Scenes removed: {len(to_remove)}",
f"🎬 Scenes kept: {len(scenes)-len(to_remove)}",
"\n🔍 Scene captions:",
*[f"[Scene {i}]: {cap}" for i, cap in enumerate(captions)]
]
return output_path, "\n".join(stats)
else:
return None, "⚠️ No matching scenes found; no trimming done."
except Exception as e:
return None, f"❌ Error: {e}"
@spaces.GPU
def generate_video_description(video_path, progress=gr.Progress()):
"""Video‐description tab: full scene‐by‐scene summary."""
try:
progress(0.0, desc="Detecting scenes...")
scenes = detect_scenes(video_path)
progress(0.3, desc="Extracting keyframes...")
keyframes = extract_keyframes(video_path, scenes)
progress(0.6, desc="Captioning scenes...")
captions = [generate_scene_caption(frame) for _, frame in keyframes]
# build & return the summary paragraph
summary = generate_video_summary(captions)
return summary
except Exception as e:
return f"❌ Error: {e}"
@spaces.GPU
def get_frame_description(video_path, frame_number):
"""Frame‐analysis tab: caption a single frame."""
try:
cap = cv2.VideoCapture(video_path)
cap.set(cv2.CAP_PROP_POS_FRAMES, int(frame_number))
ret, frame = cap.read()
cap.release()
if not ret:
return "❌ Invalid frame number"
return f"Frame {frame_number}:\n{generate_scene_caption(frame)}"
except Exception as e:
return f"❌ Error: {e}"
# ─── Gradio UI ────────────────────────────────────────────────────────────────
with gr.Blocks(theme=gr.themes.Soft(), css="""
footer {visibility: hidden}
.custom-footer {
text-align: center;
margin-top: 2em;
margin-bottom: 1em;
color: #666;
}
.description {
color: #666;
font-size: 0.9em;
line-height: 1.5;
}
.tech-stack {
background: #f5f5f5;
padding: 1em;
border-radius: 8px;
margin: 1em 0;
}
""") as demo:
gr.Markdown("""
# Videoxity
A powerful playground for video analysis and manipulation using state-of-the-art Vision-Language models.
<div class="description">
This application demonstrates the capabilities of modern AI in video processing, offering a foundation for developers to build upon and optimize.
Whether you're exploring scene detection, content filtering, or video summarization, Videoxity provides the tools to experiment with and enhance video understanding.
</div>
<div class="tech-stack">
<strong>Technical Stack:</strong>
- Scene Detection: PySceneDetect with ContentDetector
- Vision Models: BLIP (Image Captioning & VQA)
- Language Models: Groq LLM (Llama 3.1)
- Video Processing: OpenCV & FFmpeg
- Embeddings: BGE-Small for semantic search
</div>
""")
with gr.Tabs():
# 1) Scene Filtering
with gr.TabItem("Frames to Cut"):
gr.Markdown("""
### Remove specific scenes from your video
Upload a video and describe which scenes you want to remove. The AI will analyze each scene and cut out the matching ones.
Examples:
- "Remove the part where there is a cat in the video"
- "Cut out the scene where people are dancing"
""")
with gr.Row():
with gr.Column():
vid1 = gr.Video(
label="Upload Video",
format="mp4",
interactive=True
)
qry1 = gr.Textbox(
label="Scenes to Remove",
placeholder="e.g., 'Remove the part where there is a cat in the video'",
lines=2
)
btn1 = gr.Button("Process Video", variant="primary")
with gr.Column():
outVid = gr.Video(
label="Processed Video",
format="mp4",
interactive=True
)
outTxt = gr.Textbox(label="Results", lines=10)
btn1.click(
fn=process_video,
inputs=[vid1, qry1],
outputs=[outVid, outTxt]
)
# 2) Video Description
with gr.TabItem("Video Description"):
gr.Markdown("""
### Generate a comprehensive description of your video
Get AI-generated descriptions for all scenes in your video.
""")
with gr.Row():
with gr.Column():
vid2 = gr.Video(label="Upload Video")
btn2 = gr.Button("Generate Description", variant="primary")
with gr.Column():
outDesc = gr.Textbox(
label="Video Description",
lines=15,
show_copy_button=True
)
btn2.click(
fn=generate_video_description,
inputs=[vid2],
outputs=[outDesc]
)
# 3) Frame Analysis
with gr.TabItem("Frame Analysis"):
gr.Markdown("""
### Analyze specific frames in your video
Get detailed descriptions for individual frames.
""")
with gr.Row():
with gr.Column():
vid3 = gr.Video(label="Upload Video")
fn3 = gr.Number(
label="Frame Number",
value=0,
precision=0,
minimum=0
)
btn3 = gr.Button("Analyze Frame", variant="primary")
with gr.Column():
outFrm = gr.Textbox(
label="Frame Description",
lines=5,
show_copy_button=True
)
btn3.click(
fn=get_frame_description,
inputs=[vid3, fn3],
outputs=[outFrm]
)
# Add custom centered footer
gr.Markdown("""
<div class="custom-footer">
Made with ❤️
</div>
""", elem_classes=["custom-footer"])
if __name__ == "__main__":
demo.launch(share=True, show_error=True, show_api=False)