Spaces:
Running
Running
File size: 5,326 Bytes
3f5355b 89ebf9d 3f5355b 91e2ce6 b361484 3f5355b 91e2ce6 3f5355b 0557c7d 91e2ce6 62ee604 3f5355b 61615e4 3f5355b 61615e4 3f5355b 8429ef6 3f5355b 8429ef6 3f5355b 61615e4 3f5355b 7bfc54b 3f5355b 61615e4 3f5355b ec0deea 3f5355b b71920e ec0deea 3f5355b b026475 479b193 3f5355b 61615e4 3f5355b 89ebf9d 479b193 b026475 1f5c103 b361484 1f5c103 b361484 1f5c103 b450198 f8944c0 1f5c103 467346b 1f5c103 467346b 398f54d 467346b 3f5355b 467346b 3f5355b 467346b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
import streamlit as st
import os
import cv2
import torch
from PIL import Image
from gtts import gTTS
from scenedetect import open_video, SceneManager, ContentDetector
from transformers import BlipProcessor, BlipForConditionalGeneration
from openai import OpenAI
import base64
import moviepy.editor as mp
# Load AI models
caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
client = OpenAI(
api_key="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"
)
# Streamlit App UI
st.title("π₯ AI-Powered Video Summarization")
# Define custom CSS
def set_background(image_file):
with open(image_file, "rb") as image:
encoded_string = base64.b64encode(image.read()).decode()
st.markdown(
f"""
<style>
.stApp {{
background-image: url("data:image/jpg;base64,{encoded_string}");
background-size: cover;
background-position: center;
background-repeat: no-repeat;
}}
</style>
""",
unsafe_allow_html=True
)
# Set background
set_background("background.jpg")
uploaded_file = st.file_uploader("π€ Upload a Video File", type=["mp4"])
if uploaded_file:
video_path = "input_video.mp4"
with open(video_path, "wb") as f:
f.write(uploaded_file.getbuffer())
st.video(video_path)
# Scene Detection & Frame Extraction
st.write("π Detecting scene changes and extracting key frames...")
def extract_key_frames(video_path, output_folder="frames"):
os.makedirs(output_folder, exist_ok=True)
video = open_video(video_path)
scene_manager = SceneManager()
scene_manager.add_detector(ContentDetector(threshold=27.0))
scene_manager.detect_scenes(video)
scenes = scene_manager.get_scene_list()
cap = cv2.VideoCapture(video_path)
for i, (start, end) in enumerate(scenes):
frame_time = start.get_frames() # Extract frame at scene start
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_time)
ret, frame = cap.read()
if ret:
frame_path = os.path.join(output_folder, f"scene_{i+1}.jpg")
cv2.imwrite(frame_path, frame)
print(f"Saved: {frame_path}")
cap.release()
extract_key_frames(video_path)
# Caption Generation
st.write("π Generating captions for extracted frames...")
def generate_caption(image_path):
image = Image.open(image_path).convert("RGB")
inputs = caption_processor(image, return_tensors="pt")
caption_ids = caption_model.generate(**inputs)
return caption_processor.decode(caption_ids[0], skip_special_tokens=True)
captions = []
for filename in sorted(os.listdir("frames")):
if filename.endswith(".jpg"):
image_path = os.path.join("frames", filename)
captions.append(generate_caption(image_path))
# st.write("π Generated Captions:", captions)
# Summarization
st.write("π Summarizing captions using AI...")
def summarize_captions(captions):
prompt = f"Summarize the following sequence of video frames into a meaningful story under 800 characters:\n\n{captions}"
completion = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "system", "content": "You are an AI that summarizes video content."},
{"role": "user", "content": prompt}],
max_tokens=200
)
return completion.choices[0].message.content
summary = summarize_captions(captions)
st.write(summary)
# Text-to-Speech
st.write("π Generating voice narration...")
def text_to_speech(text, output_audio="summary_audio.mp3"):
tts = gTTS(text, lang="en")
tts.save(output_audio)
text_to_speech(summary)
st.audio('summary_audio.mp3')
#Video -summary
st.write("π Video Summary:")
def create_summary_video(image_folder, output_video):
images = sorted([os.path.join(image_folder, img) for img in os.listdir(image_folder) if img.endswith(".jpg")])
clips = [mp.ImageClip(img).set_duration(2) for img in images] # 2 sec per frame
video = mp.concatenate_videoclips(clips, method="compose")
video.write_videofile(output_video, fps=24)
# Example usage
create_summary_video("frames", "summary_video.mp4")
st.video('summary_video.mp4')
# # Combine Audio & Video
# st.write("π¬ Merging audio with the video...")
# def add_audio_to_video(video_path, audio_path, output_video="final_video.mp4"):
# video = moviepy.editor.VideoFileClip(video_path)
# audio = mp.AudioFileClip(audio_path)
# if audio.duration > video.duration:
# audio = audio.subclip(0, video.duration)
# final_video = video.set_audio(audio)
# final_video.write_videofile(output_video, codec="libx264", audio_codec="aac")
# add_audio_to_video("summary_video.mp4", "summary_audio.mp3")
# st.video("final_video.mp4")
|