hina19 commited on
Commit
3f5355b
Β·
verified Β·
1 Parent(s): c95214f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -0
app.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import cv2
4
+ import torch
5
+ import moviepy.editor as mp
6
+ from PIL import Image
7
+ from gtts import gTTS
8
+ from scenedetect import open_video, SceneManager, ContentDetector
9
+ from transformers import BlipProcessor, BlipForConditionalGeneration, AutoModelForCausalLM, AutoTokenizer
10
+ from openai import OpenAI
11
+ # Load AI models
12
+ caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
13
+ caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
14
+ client = OpenAI(
15
+ api_key="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"
16
+ )
17
+
18
+
19
+ # Streamlit App UI
20
+ st.title("πŸŽ₯ AI-Powered Video Summarization")
21
+
22
+ uploaded_file = st.file_uploader("πŸ“€ Upload a Video File", type=["mp4"])
23
+
24
+ if uploaded_file:
25
+ video_path = "input_video.mp4"
26
+ with open(video_path, "wb") as f:
27
+ f.write(uploaded_file.getbuffer())
28
+
29
+ st.video(video_path)
30
+
31
+ # Scene Detection & Frame Extraction
32
+ st.write("πŸ” Detecting scene changes and extracting key frames...")
33
+
34
+ def extract_key_frames(video_path, output_folder="frames", frames_per_scene=3):
35
+ os.makedirs(output_folder, exist_ok=True)
36
+ video = open_video(video_path)
37
+ scene_manager = SceneManager()
38
+ scene_manager.add_detector(ContentDetector(threshold=27.0))
39
+ video.set_downscale_factor()
40
+ scene_manager.detect_scenes(video)
41
+ scenes = scene_manager.get_scene_list()
42
+ cap = cv2.VideoCapture(video_path)
43
+ for i, (start, end) in enumerate(scenes):
44
+ start_frame = start.get_frames()
45
+ end_frame = end.get_frames()
46
+ step = (end_frame - start_frame) // (frames_per_scene + 1)
47
+ for j in range(frames_per_scene):
48
+ frame_time = start_frame + step * (j + 1)
49
+ cap.set(cv2.CAP_PROP_POS_FRAMES, frame_time)
50
+ ret, frame = cap.read()
51
+ if ret:
52
+ frame_path = os.path.join(output_folder, f"scene_{i+1}_frame{j+1}.jpg")
53
+ cv2.imwrite(frame_path, frame)
54
+ cap.release()
55
+
56
+ extract_key_frames(video_path)
57
+
58
+ # Caption Generation
59
+ st.write("πŸ“ Generating captions for extracted frames...")
60
+
61
+ def generate_caption(image_path):
62
+ image = Image.open(image_path).convert("RGB")
63
+ inputs = caption_processor(image, return_tensors="pt")
64
+ caption_ids = caption_model.generate(**inputs)
65
+ return caption_processor.decode(caption_ids[0], skip_special_tokens=True)
66
+
67
+ captions = []
68
+ for filename in sorted(os.listdir("frames")):
69
+ if filename.endswith(".jpg"):
70
+ image_path = os.path.join("frames", filename)
71
+ captions.append(generate_caption(image_path))
72
+
73
+ st.write("πŸ“„ Generated Captions:", captions)
74
+
75
+ # Summarization
76
+ st.write("πŸ“– Summarizing captions using AI...")
77
+
78
+ def summarize_captions(captions):
79
+ prompt = f"Summarize the following sequence of video frames into a meaningful story:\n\n{captions}"
80
+
81
+ completion = client.chat.completions.create(
82
+ model="gpt-4o-mini",
83
+ messages=[{"role": "system", "content": "You are an AI that summarizes video content."},
84
+ {"role": "user", "content": prompt}]
85
+ )
86
+ return completion.choices[0].message.content
87
+
88
+
89
+ summary = summarize_captions(captions)
90
+ st.write("πŸ“Œ Video Summary:", summary)
91
+
92
+ # Text-to-Speech
93
+ st.write("πŸ”Š Generating voice narration...")
94
+
95
+ def text_to_speech(text, output_audio="summary_audio.mp3"):
96
+ tts = gTTS(text, lang="en")
97
+ tts.save(output_audio)
98
+
99
+ text_to_speech(summary)
100
+
101
+ # Combine Audio & Video
102
+ st.write("🎬 Merging audio with the video...")
103
+
104
+ def add_audio_to_video(video_path, audio_path, output_video="final_video.mp4"):
105
+ video = mp.VideoFileClip(video_path)
106
+ audio = mp.AudioFileClip(audio_path)
107
+ if audio.duration > video.duration:
108
+ audio = audio.subclip(0, video.duration)
109
+ final_video = video.set_audio(audio)
110
+ final_video.write_videofile(output_video, codec="libx264", audio_codec="aac")
111
+
112
+ add_audio_to_video(video_path, "summary_audio.mp3")
113
+
114
+ st.video("final_video.mp4")