File size: 5,326 Bytes
3f5355b
 
 
 
 
 
 
89ebf9d
3f5355b
91e2ce6
b361484
3f5355b
 
91e2ce6
3f5355b
 
 
 
 
 
 
0557c7d
91e2ce6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62ee604
3f5355b
61615e4
3f5355b
 
 
 
 
 
 
 
 
61615e4
3f5355b
8429ef6
3f5355b
 
 
 
 
 
 
 
8429ef6
 
 
 
 
 
 
 
 
3f5355b
 
 
 
 
61615e4
3f5355b
 
 
 
 
 
 
 
 
 
 
 
 
7bfc54b
3f5355b
 
61615e4
3f5355b
 
ec0deea
3f5355b
 
 
 
b71920e
ec0deea
3f5355b
 
 
 
 
b026475
479b193
3f5355b
 
61615e4
3f5355b
 
 
 
 
 
89ebf9d
479b193
 
b026475
1f5c103
 
b361484
1f5c103
b361484
1f5c103
 
 
b450198
f8944c0
1f5c103
467346b
 
1f5c103
467346b
398f54d
467346b
 
 
 
 
3f5355b
467346b
3f5355b
467346b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import streamlit as st
import os
import cv2
import torch
from PIL import Image
from gtts import gTTS
from scenedetect import open_video, SceneManager, ContentDetector
from transformers import BlipProcessor, BlipForConditionalGeneration
from openai import OpenAI
import base64
import moviepy.editor as mp
# Load AI models
caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
client = OpenAI(
  api_key="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"
)


# Streamlit App UI
st.title("πŸŽ₯ AI-Powered Video Summarization")
# Define custom CSS
def set_background(image_file):
    with open(image_file, "rb") as image:
        encoded_string = base64.b64encode(image.read()).decode()

    st.markdown(
        f"""
        <style>
        .stApp {{
            background-image: url("data:image/jpg;base64,{encoded_string}");
            background-size: cover;
            background-position: center;
            background-repeat: no-repeat;
        }}
        </style>
        """,
        unsafe_allow_html=True
    )

# Set background
set_background("background.jpg")

uploaded_file = st.file_uploader("πŸ“€ Upload a Video File", type=["mp4"])

if uploaded_file:
    video_path = "input_video.mp4"
    with open(video_path, "wb") as f:
        f.write(uploaded_file.getbuffer())

    st.video(video_path)

    # Scene Detection & Frame Extraction
    st.write("πŸ” Detecting scene changes and extracting key frames...")

    def extract_key_frames(video_path, output_folder="frames"):
        os.makedirs(output_folder, exist_ok=True)
        video = open_video(video_path)
        scene_manager = SceneManager()
        scene_manager.add_detector(ContentDetector(threshold=27.0))
        scene_manager.detect_scenes(video)
        scenes = scene_manager.get_scene_list()
        cap = cv2.VideoCapture(video_path)
        for i, (start, end) in enumerate(scenes):
            frame_time = start.get_frames()  # Extract frame at scene start
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_time)
            ret, frame = cap.read()
            
            if ret:
                frame_path = os.path.join(output_folder, f"scene_{i+1}.jpg")
                cv2.imwrite(frame_path, frame)
                print(f"Saved: {frame_path}")

        cap.release()

    extract_key_frames(video_path)

    # Caption Generation
    st.write("πŸ“ Generating captions for extracted frames...")

    def generate_caption(image_path):
        image = Image.open(image_path).convert("RGB")
        inputs = caption_processor(image, return_tensors="pt")
        caption_ids = caption_model.generate(**inputs)
        return caption_processor.decode(caption_ids[0], skip_special_tokens=True)

    captions = []
    for filename in sorted(os.listdir("frames")):
        if filename.endswith(".jpg"):
            image_path = os.path.join("frames", filename)
            captions.append(generate_caption(image_path))

    # st.write("πŸ“„ Generated Captions:", captions)

    # Summarization
    st.write("πŸ“– Summarizing captions using AI...")

    def summarize_captions(captions):
      prompt = f"Summarize the following sequence of video frames into a meaningful story under 800 characters:\n\n{captions}"
      
      completion = client.chat.completions.create(
          model="gpt-4o-mini",
          messages=[{"role": "system", "content": "You are an AI that summarizes video content."},
                    {"role": "user", "content": prompt}],
          max_tokens=200
      )
      return completion.choices[0].message.content
        

    summary = summarize_captions(captions)
    st.write(summary)
 

    # Text-to-Speech
    st.write("πŸ”Š Generating voice narration...")

    def text_to_speech(text, output_audio="summary_audio.mp3"):
        tts = gTTS(text, lang="en")
        tts.save(output_audio)
    
    text_to_speech(summary)
    st.audio('summary_audio.mp3')
    
    #Video -summary
    st.write("πŸ“Œ Video Summary:")
    def create_summary_video(image_folder, output_video):
        images = sorted([os.path.join(image_folder, img) for img in os.listdir(image_folder) if img.endswith(".jpg")])
        clips = [mp.ImageClip(img).set_duration(2) for img in images]  # 2 sec per frame
        
        video = mp.concatenate_videoclips(clips, method="compose")
        video.write_videofile(output_video, fps=24)
    
    # Example usage
    create_summary_video("frames", "summary_video.mp4")
    st.video('summary_video.mp4')

    # # Combine Audio & Video
    # st.write("🎬 Merging audio with the video...")

    # def add_audio_to_video(video_path, audio_path, output_video="final_video.mp4"):
    #     video = moviepy.editor.VideoFileClip(video_path)
    #     audio = mp.AudioFileClip(audio_path)
    #     if audio.duration > video.duration:
    #         audio = audio.subclip(0, video.duration)
    #     final_video = video.set_audio(audio)
    #     final_video.write_videofile(output_video, codec="libx264", audio_codec="aac")

    # add_audio_to_video("summary_video.mp4", "summary_audio.mp3")

    # st.video("final_video.mp4")