hina19's picture
Update app.py
398f54d verified
raw
history blame
5.31 kB
import streamlit as st
import os
import cv2
import torch
from PIL import Image
from gtts import gTTS
from scenedetect import open_video, SceneManager, ContentDetector
from transformers import BlipProcessor, BlipForConditionalGeneration
from openai import OpenAI
import base64
import moviepy
# Load AI models
caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
client = OpenAI(
api_key="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"
)
# Streamlit App UI
st.title("πŸŽ₯ AI-Powered Video Summarization")
# Define custom CSS
def set_background(image_file):
with open(image_file, "rb") as image:
encoded_string = base64.b64encode(image.read()).decode()
st.markdown(
f"""
<style>
.stApp {{
background-image: url("data:image/jpg;base64,{encoded_string}");
background-size: cover;
background-position: center;
background-repeat: no-repeat;
}}
</style>
""",
unsafe_allow_html=True
)
# Set background
set_background("background.jpg")
uploaded_file = st.file_uploader("πŸ“€ Upload a Video File", type=["mp4"])
if uploaded_file:
video_path = "input_video.mp4"
with open(video_path, "wb") as f:
f.write(uploaded_file.getbuffer())
st.video(video_path)
# Scene Detection & Frame Extraction
st.write("πŸ” Detecting scene changes and extracting key frames...")
def extract_key_frames(video_path, output_folder="frames"):
os.makedirs(output_folder, exist_ok=True)
video = open_video(video_path)
scene_manager = SceneManager()
scene_manager.add_detector(ContentDetector(threshold=27.0))
scene_manager.detect_scenes(video)
scenes = scene_manager.get_scene_list()
cap = cv2.VideoCapture(video_path)
for i, (start, end) in enumerate(scenes):
frame_time = start.get_frames() # Extract frame at scene start
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_time)
ret, frame = cap.read()
if ret:
frame_path = os.path.join(output_folder, f"scene_{i+1}.jpg")
cv2.imwrite(frame_path, frame)
print(f"Saved: {frame_path}")
cap.release()
extract_key_frames(video_path)
# Caption Generation
st.write("πŸ“ Generating captions for extracted frames...")
def generate_caption(image_path):
image = Image.open(image_path).convert("RGB")
inputs = caption_processor(image, return_tensors="pt")
caption_ids = caption_model.generate(**inputs)
return caption_processor.decode(caption_ids[0], skip_special_tokens=True)
captions = []
for filename in sorted(os.listdir("frames")):
if filename.endswith(".jpg"):
image_path = os.path.join("frames", filename)
captions.append(generate_caption(image_path))
# st.write("πŸ“„ Generated Captions:", captions)
# Summarization
st.write("πŸ“– Summarizing captions using AI...")
def summarize_captions(captions):
prompt = f"Summarize the following sequence of video frames into a meaningful story under 800 characters:\n\n{captions}"
completion = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "system", "content": "You are an AI that summarizes video content."},
{"role": "user", "content": prompt}],
max_tokens=200
)
return completion.choices[0].message.content
summary = summarize_captions(captions)
st.write("πŸ“Œ Video Summary:", summary)
# Text-to-Speech
st.write("πŸ”Š Generating voice narration...")
def text_to_speech(text, output_audio="summary_audio.mp3"):
tts = gTTS(text, lang="en")
tts.save(output_audio)
text_to_speech(summary)
st.audio('summary_audio.mp3')
def create_summary_video(image_folder, output_video):
images = sorted([os.path.join(image_folder, img) for img in os.listdir(image_folder) if img.endswith(".jpg")])
clips = [moviepy.editor.ImageClip(img).set_duration(2) for img in images] # 2 sec per frame
video = moviepy.editor.concatenate_videoclips(clips, method="compose")
video.write_videofile(output_video, fps=24)
# Example usage
create_summary_video("scenes", "summary_video.mp4")
st.video('summary_video.mp4')
# # Combine Audio & Video
# st.write("🎬 Merging audio with the video...")
# def add_audio_to_video(video_path, audio_path, output_video="final_video.mp4"):
# video = moviepy.editor.VideoFileClip(video_path)
# audio = mp.AudioFileClip(audio_path)
# if audio.duration > video.duration:
# audio = audio.subclip(0, video.duration)
# final_video = video.set_audio(audio)
# final_video.write_videofile(output_video, codec="libx264", audio_codec="aac")
# add_audio_to_video("summary_video.mp4", "summary_audio.mp3")
# st.video("final_video.mp4")