hina19's picture
Update app.py
3725bfa verified
import streamlit as st
import os
import cv2
import torch
from PIL import Image
from gtts import gTTS
from scenedetect import open_video, SceneManager, ContentDetector
from transformers import BlipProcessor, BlipForConditionalGeneration
from openai import OpenAI
import base64
import moviepy.editor as mp
# Load AI models
caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
client = OpenAI(
api_key="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"
)
# Streamlit App UI
st.title("πŸŽ₯ AI-Powered Video Summarization")
# Define custom CSS
def set_background(image_file):
with open(image_file, "rb") as image:
encoded_string = base64.b64encode(image.read()).decode()
st.markdown(
f"""
<style>
.stApp {{
background-image: url("data:image/jpg;base64,{encoded_string}");
background-size: cover;
background-position: center;
background-repeat: no-repeat;
}}
</style>
""",
unsafe_allow_html=True
)
# Set background
set_background("background.jpg")
uploaded_file = st.file_uploader("πŸ“€ Upload a Video File", type=["mp4"])
if uploaded_file:
video_path = "input_video.mp4"
with open(video_path, "wb") as f:
f.write(uploaded_file.getbuffer())
st.video(video_path)
# Scene Detection & Frame Extraction
st.write("πŸ” Detecting scene changes and extracting key frames...")
def extract_key_frames(video_path, output_folder="frames"):
os.makedirs(output_folder, exist_ok=True)
video = open_video(video_path)
scene_manager = SceneManager()
scene_manager.add_detector(ContentDetector(threshold=27.0))
scene_manager.detect_scenes(video)
scenes = scene_manager.get_scene_list()
cap = cv2.VideoCapture(video_path)
for i, (start, end) in enumerate(scenes):
frame_time = start.get_frames() # Extract frame at scene start
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_time)
ret, frame = cap.read()
if ret:
frame_path = os.path.join(output_folder, f"scene_{i+1}.jpg")
cv2.imwrite(frame_path, frame)
print(f"Saved: {frame_path}")
cap.release()
extract_key_frames(video_path)
# Caption Generation
st.write("πŸ“ Generating captions for extracted frames...")
def generate_caption(image_path):
image = Image.open(image_path).convert("RGB")
inputs = caption_processor(image, return_tensors="pt")
caption_ids = caption_model.generate(**inputs)
return caption_processor.decode(caption_ids[0], skip_special_tokens=True)
captions = []
for filename in sorted(os.listdir("frames")):
if filename.endswith(".jpg"):
image_path = os.path.join("frames", filename)
captions.append(generate_caption(image_path))
# st.write("πŸ“„ Generated Captions:", captions)
# Summarization
st.write("πŸ“– Summarizing captions using AI...")
def summarize_captions(captions):
prompt = f"Summarize the following sequence of video frames into a meaningful story under 800 characters:\n\n{captions}"
completion = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "system", "content": "You are an AI that summarizes video content."},
{"role": "user", "content": prompt}],
max_tokens=200
)
return completion.choices[0].message.content
summary = summarize_captions(captions)
st.write(summary)
# Text-to-Speech
st.write("πŸ”Š Generating voice narration...")
def text_to_speech(text, output_audio="summary_audio.mp3"):
tts = gTTS(text, lang="en")
tts.save(output_audio)
text_to_speech(summary)
st.audio('summary_audio.mp3')
#Video -summary
st.write("πŸ“Œ Video Summary:")
def create_summary_video(image_folder, output_video):
images = sorted([os.path.join(image_folder, img) for img in os.listdir(image_folder) if img.endswith(".jpg")])
clips = [mp.ImageClip(img).set_duration(2) for img in images] # 2 sec per frame
video = mp.concatenate_videoclips(clips, method="compose")
video.write_videofile(output_video, fps=24)
# Example usage
create_summary_video("frames", "summary_video.mp4")
st.video('summary_video.mp4')
# # Combine Audio & Video
# st.write("🎬 Merging audio with the video...")
# def add_audio_to_video(video_path, audio_path, output_video="final_video.mp4"):
# video = moviepy.editor.VideoFileClip(video_path)
# audio = mp.AudioFileClip(audio_path)
# if audio.duration > video.duration:
# audio = audio.subclip(0, video.duration)
# final_video = video.set_audio(audio)
# final_video.write_videofile(output_video, codec="libx264", audio_codec="aac")
# add_audio_to_video("summary_video.mp4", "summary_audio.mp3")
# st.video("final_video.mp4")