Spaces:
Runtime error
Runtime error
Commit
·
a590feb
1
Parent(s):
56e52b4
Upload 7 files
Browse files- 01_🎥_Input_YouTube_Link.py +84 -82
- LICENSE.txt +21 -0
- README.md +11 -21
01_🎥_Input_YouTube_Link.py
CHANGED
@@ -11,39 +11,17 @@ from io import StringIO
|
|
11 |
from utils import write_vtt, write_srt
|
12 |
import ffmpeg
|
13 |
from languages import LANGUAGES
|
14 |
-
import torch
|
15 |
-
from zipfile import ZipFile
|
16 |
-
from io import BytesIO
|
17 |
-
import base64
|
18 |
-
import pathlib
|
19 |
-
import re
|
20 |
|
21 |
st.set_page_config(page_title="Auto Subtitled Video Generator", page_icon=":movie_camera:", layout="wide")
|
22 |
|
23 |
-
torch.cuda.is_available()
|
24 |
-
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
25 |
-
# Model options: tiny, base, small, medium, large
|
26 |
-
loaded_model = whisper.load_model("small", device=DEVICE)
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
# Define a function that we can use to load lottie files from a link.
|
31 |
-
@st.cache(
|
32 |
def load_lottieurl(url: str):
|
33 |
r = requests.get(url)
|
34 |
if r.status_code != 200:
|
35 |
return None
|
36 |
return r.json()
|
37 |
|
38 |
-
APP_DIR = pathlib.Path(__file__).parent.absolute()
|
39 |
-
|
40 |
-
LOCAL_DIR = APP_DIR / "local_youtube"
|
41 |
-
LOCAL_DIR.mkdir(exist_ok=True)
|
42 |
-
save_dir = LOCAL_DIR / "output"
|
43 |
-
save_dir.mkdir(exist_ok=True)
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
col1, col2 = st.columns([1, 3])
|
48 |
with col1:
|
49 |
lottie = load_lottieurl("https://assets8.lottiefiles.com/packages/lf20_jh9gfdye.json")
|
@@ -58,6 +36,7 @@ with col2:
|
|
58 |
###### I recommend starting with the base model and then experimenting with the larger models, the small and medium models often work well. """)
|
59 |
|
60 |
|
|
|
61 |
def populate_metadata(link):
|
62 |
yt = YouTube(link)
|
63 |
author = yt.author
|
@@ -69,6 +48,7 @@ def populate_metadata(link):
|
|
69 |
return author, title, description, thumbnail, length, views
|
70 |
|
71 |
|
|
|
72 |
def download_video(link):
|
73 |
yt = YouTube(link)
|
74 |
video = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download()
|
@@ -79,9 +59,23 @@ def convert(seconds):
|
|
79 |
return time.strftime("%H:%M:%S", time.gmtime(seconds))
|
80 |
|
81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
def inference(link, loaded_model, task):
|
83 |
yt = YouTube(link)
|
84 |
-
path = yt.streams.filter(only_audio=True)[0].download(filename=
|
85 |
if task == "Transcribe":
|
86 |
options = dict(task="transcribe", best_of=5)
|
87 |
results = loaded_model.transcribe(path, **options)
|
@@ -100,6 +94,7 @@ def inference(link, loaded_model, task):
|
|
100 |
raise ValueError("Task not supported")
|
101 |
|
102 |
|
|
|
103 |
def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int) -> str:
|
104 |
segmentStream = StringIO()
|
105 |
|
@@ -125,34 +120,35 @@ def get_language_code(language):
|
|
125 |
def generate_subtitled_video(video, audio, transcript):
|
126 |
video_file = ffmpeg.input(video)
|
127 |
audio_file = ffmpeg.input(audio)
|
128 |
-
ffmpeg.concat(video_file.filter("subtitles", transcript), audio_file, v=1, a=1).output("
|
129 |
-
video_with_subs = open("
|
130 |
return video_with_subs
|
131 |
|
132 |
|
133 |
def main():
|
134 |
-
|
|
|
|
|
|
|
|
|
135 |
task = st.selectbox("Select Task", ["Transcribe", "Translate"], index=0)
|
136 |
if task == "Transcribe":
|
137 |
if st.button("Transcribe"):
|
138 |
author, title, description, thumbnail, length, views = populate_metadata(link)
|
139 |
-
|
140 |
-
results = inference(link, loaded_model, task)
|
141 |
video = download_video(link)
|
142 |
lang = results[3]
|
143 |
detected_language = get_language_code(lang)
|
144 |
|
145 |
col3, col4 = st.columns(2)
|
|
|
|
|
146 |
with col3:
|
147 |
st.video(video)
|
148 |
-
|
149 |
-
#
|
150 |
-
sentences = re.split("([!?.])", results[0])
|
151 |
-
# Join the punctuation back to the sentences
|
152 |
-
sentences = ["".join(i) for i in zip(sentences[0::2], sentences[1::2])]
|
153 |
-
text = "\n\n".join(sentences)
|
154 |
with open("transcript.txt", "w+", encoding='utf8') as f:
|
155 |
-
f.writelines(
|
156 |
f.close()
|
157 |
with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
|
158 |
datatxt = f.read()
|
@@ -168,48 +164,50 @@ def main():
|
|
168 |
f.close()
|
169 |
with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
|
170 |
datasrt = f.read()
|
171 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
with col4:
|
173 |
with st.spinner("Generating Subtitled Video"):
|
174 |
-
video_with_subs = generate_subtitled_video(video,
|
175 |
st.video(video_with_subs)
|
176 |
st.balloons()
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
zipObj.write("transcript.srt")
|
182 |
-
zipObj.write("youtube_sub.mp4")
|
183 |
-
zipObj.close()
|
184 |
-
ZipfileDotZip = "YouTube_transcripts_and_video.zip"
|
185 |
-
with open(ZipfileDotZip, "rb") as f:
|
186 |
-
datazip = f.read()
|
187 |
-
b64 = base64.b64encode(datazip).decode()
|
188 |
-
href = f"<a href=\"data:file/zip;base64,{b64}\" download='{ZipfileDotZip}'>\
|
189 |
-
Download Transcripts and Video\
|
190 |
-
</a>"
|
191 |
-
st.markdown(href, unsafe_allow_html=True)
|
192 |
-
|
193 |
elif task == "Translate":
|
194 |
if st.button("Translate to English"):
|
195 |
author, title, description, thumbnail, length, views = populate_metadata(link)
|
196 |
-
|
197 |
-
results = inference(link, loaded_model, task)
|
198 |
video = download_video(link)
|
199 |
lang = results[3]
|
200 |
detected_language = get_language_code(lang)
|
201 |
|
202 |
col3, col4 = st.columns(2)
|
|
|
|
|
203 |
with col3:
|
204 |
st.video(video)
|
205 |
|
206 |
-
#
|
207 |
-
sentences = re.split("([!?.])", results[0])
|
208 |
-
# Join the punctuation back to the sentences
|
209 |
-
sentences = ["".join(i) for i in zip(sentences[0::2], sentences[1::2])]
|
210 |
-
text = "\n\n".join(sentences)
|
211 |
with open("transcript.txt", "w+", encoding='utf8') as f:
|
212 |
-
f.writelines(
|
213 |
f.close()
|
214 |
with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
|
215 |
datatxt = f.read()
|
@@ -225,32 +223,36 @@ def main():
|
|
225 |
f.close()
|
226 |
with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
|
227 |
datasrt = f.read()
|
228 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
229 |
with col4:
|
230 |
with st.spinner("Generating Subtitled Video"):
|
231 |
-
video_with_subs = generate_subtitled_video(video,
|
232 |
st.video(video_with_subs)
|
233 |
st.balloons()
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
zipObj.write("transcript.srt")
|
239 |
-
zipObj.write("youtube_sub.mp4")
|
240 |
-
zipObj.close()
|
241 |
-
ZipfileDotZip = "YouTube_transcripts_and_video.zip"
|
242 |
-
with open(ZipfileDotZip, "rb") as f:
|
243 |
-
datazip = f.read()
|
244 |
-
b64 = base64.b64encode(datazip).decode()
|
245 |
-
href = f"<a href=\"data:file/zip;base64,{b64}\" download='{ZipfileDotZip}'>\
|
246 |
-
Download Transcripts and Video\
|
247 |
-
</a>"
|
248 |
-
st.markdown(href, unsafe_allow_html=True)
|
249 |
-
|
250 |
else:
|
251 |
-
st.
|
252 |
|
253 |
|
254 |
if __name__ == "__main__":
|
255 |
main()
|
256 |
-
|
|
|
11 |
from utils import write_vtt, write_srt
|
12 |
import ffmpeg
|
13 |
from languages import LANGUAGES
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
st.set_page_config(page_title="Auto Subtitled Video Generator", page_icon=":movie_camera:", layout="wide")
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
# Define a function that we can use to load lottie files from a link.
|
18 |
+
@st.cache()
|
19 |
def load_lottieurl(url: str):
|
20 |
r = requests.get(url)
|
21 |
if r.status_code != 200:
|
22 |
return None
|
23 |
return r.json()
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
col1, col2 = st.columns([1, 3])
|
26 |
with col1:
|
27 |
lottie = load_lottieurl("https://assets8.lottiefiles.com/packages/lf20_jh9gfdye.json")
|
|
|
36 |
###### I recommend starting with the base model and then experimenting with the larger models, the small and medium models often work well. """)
|
37 |
|
38 |
|
39 |
+
@st.cache(allow_output_mutation=True)
|
40 |
def populate_metadata(link):
|
41 |
yt = YouTube(link)
|
42 |
author = yt.author
|
|
|
48 |
return author, title, description, thumbnail, length, views
|
49 |
|
50 |
|
51 |
+
@st.cache(allow_output_mutation=True)
|
52 |
def download_video(link):
|
53 |
yt = YouTube(link)
|
54 |
video = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download()
|
|
|
59 |
return time.strftime("%H:%M:%S", time.gmtime(seconds))
|
60 |
|
61 |
|
62 |
+
loaded_model = whisper.load_model("base")
|
63 |
+
current_size = "None"
|
64 |
+
|
65 |
+
|
66 |
+
@st.cache(allow_output_mutation=True)
|
67 |
+
def change_model(current_size, size):
|
68 |
+
if current_size != size:
|
69 |
+
loaded_model = whisper.load_model(size)
|
70 |
+
return loaded_model
|
71 |
+
else:
|
72 |
+
raise Exception("Model size is the same as the current size.")
|
73 |
+
|
74 |
+
|
75 |
+
@st.cache(allow_output_mutation=True)
|
76 |
def inference(link, loaded_model, task):
|
77 |
yt = YouTube(link)
|
78 |
+
path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp3")
|
79 |
if task == "Transcribe":
|
80 |
options = dict(task="transcribe", best_of=5)
|
81 |
results = loaded_model.transcribe(path, **options)
|
|
|
94 |
raise ValueError("Task not supported")
|
95 |
|
96 |
|
97 |
+
@st.cache(allow_output_mutation=True)
|
98 |
def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int) -> str:
|
99 |
segmentStream = StringIO()
|
100 |
|
|
|
120 |
def generate_subtitled_video(video, audio, transcript):
|
121 |
video_file = ffmpeg.input(video)
|
122 |
audio_file = ffmpeg.input(audio)
|
123 |
+
ffmpeg.concat(video_file.filter("subtitles", transcript), audio_file, v=1, a=1).output("final.mp4").run(quiet=True, overwrite_output=True)
|
124 |
+
video_with_subs = open("final.mp4", "rb")
|
125 |
return video_with_subs
|
126 |
|
127 |
|
128 |
def main():
|
129 |
+
size = st.selectbox("Select Model Size (The larger the model, the more accurate the transcription will be, but it will take longer)", ["tiny", "base", "small", "medium", "large"], index=1)
|
130 |
+
loaded_model = change_model(current_size, size)
|
131 |
+
st.write(f"Model is {'multilingual' if loaded_model.is_multilingual else 'English-only'} "
|
132 |
+
f"and has {sum(np.prod(p.shape) for p in loaded_model.parameters()):,} parameters.")
|
133 |
+
link = st.text_input("YouTube Link (The longer the video, the longer the processing time)")
|
134 |
task = st.selectbox("Select Task", ["Transcribe", "Translate"], index=0)
|
135 |
if task == "Transcribe":
|
136 |
if st.button("Transcribe"):
|
137 |
author, title, description, thumbnail, length, views = populate_metadata(link)
|
138 |
+
results = inference(link, loaded_model, task)
|
|
|
139 |
video = download_video(link)
|
140 |
lang = results[3]
|
141 |
detected_language = get_language_code(lang)
|
142 |
|
143 |
col3, col4 = st.columns(2)
|
144 |
+
col5, col6, col7, col8 = st.columns(4)
|
145 |
+
col9, col10 = st.columns(2)
|
146 |
with col3:
|
147 |
st.video(video)
|
148 |
+
|
149 |
+
# Write the results to a .txt file and download it.
|
|
|
|
|
|
|
|
|
150 |
with open("transcript.txt", "w+", encoding='utf8') as f:
|
151 |
+
f.writelines(results[0])
|
152 |
f.close()
|
153 |
with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
|
154 |
datatxt = f.read()
|
|
|
164 |
f.close()
|
165 |
with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
|
166 |
datasrt = f.read()
|
167 |
+
|
168 |
+
with col5:
|
169 |
+
st.download_button(label="Download Transcript (.txt)",
|
170 |
+
data=datatxt,
|
171 |
+
file_name="transcript.txt")
|
172 |
+
with col6:
|
173 |
+
st.download_button(label="Download Transcript (.vtt)",
|
174 |
+
data=datavtt,
|
175 |
+
file_name="transcript.vtt")
|
176 |
+
with col7:
|
177 |
+
st.download_button(label="Download Transcript (.srt)",
|
178 |
+
data=datasrt,
|
179 |
+
file_name="transcript.srt")
|
180 |
+
with col9:
|
181 |
+
st.success("You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
|
182 |
+
with col10:
|
183 |
+
st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
|
184 |
+
|
185 |
with col4:
|
186 |
with st.spinner("Generating Subtitled Video"):
|
187 |
+
video_with_subs = generate_subtitled_video(video, "audio.mp3", "transcript.srt")
|
188 |
st.video(video_with_subs)
|
189 |
st.balloons()
|
190 |
+
with col8:
|
191 |
+
st.download_button(label="Download Subtitled Video",
|
192 |
+
data=video_with_subs,
|
193 |
+
file_name=f"{title} with subtitles.mp4")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
elif task == "Translate":
|
195 |
if st.button("Translate to English"):
|
196 |
author, title, description, thumbnail, length, views = populate_metadata(link)
|
197 |
+
results = inference(link, loaded_model, task)
|
|
|
198 |
video = download_video(link)
|
199 |
lang = results[3]
|
200 |
detected_language = get_language_code(lang)
|
201 |
|
202 |
col3, col4 = st.columns(2)
|
203 |
+
col5, col6, col7, col8 = st.columns(4)
|
204 |
+
col9, col10 = st.columns(2)
|
205 |
with col3:
|
206 |
st.video(video)
|
207 |
|
208 |
+
# Write the results to a .txt file and download it.
|
|
|
|
|
|
|
|
|
209 |
with open("transcript.txt", "w+", encoding='utf8') as f:
|
210 |
+
f.writelines(results[0])
|
211 |
f.close()
|
212 |
with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
|
213 |
datatxt = f.read()
|
|
|
223 |
f.close()
|
224 |
with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
|
225 |
datasrt = f.read()
|
226 |
+
with col5:
|
227 |
+
st.download_button(label="Download Transcript (.txt)",
|
228 |
+
data=datatxt,
|
229 |
+
file_name="transcript.txt")
|
230 |
+
with col6:
|
231 |
+
st.download_button(label="Download Transcript (.vtt)",
|
232 |
+
data=datavtt,
|
233 |
+
file_name="transcript.vtt")
|
234 |
+
with col7:
|
235 |
+
st.download_button(label="Download Transcript (.srt)",
|
236 |
+
data=datasrt,
|
237 |
+
file_name="transcript.srt")
|
238 |
+
with col9:
|
239 |
+
st.success("You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
|
240 |
+
with col10:
|
241 |
+
st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
|
242 |
+
|
243 |
with col4:
|
244 |
with st.spinner("Generating Subtitled Video"):
|
245 |
+
video_with_subs = generate_subtitled_video(video, "audio.mp3", "transcript.srt")
|
246 |
st.video(video_with_subs)
|
247 |
st.balloons()
|
248 |
+
with col8:
|
249 |
+
st.download_button(label="Download Subtitled Video",
|
250 |
+
data=video_with_subs,
|
251 |
+
file_name=f"{title} with subtitles.mp4")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
252 |
else:
|
253 |
+
st.error("Please select a task.")
|
254 |
|
255 |
|
256 |
if __name__ == "__main__":
|
257 |
main()
|
258 |
+
st.markdown("###### Made with :heart: by [@BatuhanYılmaz](https://twitter.com/batuhan3326) [](https://www.buymeacoffee.com/batuhanylmz)")
|
LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2022 Batuhan Yılmaz
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
README.md
CHANGED
@@ -1,22 +1,12 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
-
|
4 |
-

|
5 |
-

|
6 |
-
|
7 |
-
#### About this project
|
8 |
-
- This project is an automatic speech recognition application that takes a YouTube video link or a video file as input to generate a video with subtitles.
|
9 |
-
- You can also upload an audio file to generate a transcript as .txt, .vtt, .srt files.
|
10 |
-
- The application performs 2 tasks:
|
11 |
-
- Detects the language, transcribes the input video in its original language.
|
12 |
-
- Detects the language, translates it into English and then transcribes.
|
13 |
-
- Downloaded the video of the input link using [pytube](https://github.com/pytube/pytube).
|
14 |
-
- Generated a transcription of the video using the [OpenAI Whisper](https://openai.com/blog/whisper) model.
|
15 |
-
- Saved the transcriptions as .txt, .vtt and .srt files.
|
16 |
-
- Generated a subtitled version of the input video using [ffmpeg](https://github.com/FFmpeg).
|
17 |
-
- Displayed the original video and the subtitled video side by side.
|
18 |
-
- Built a multipage web app using [Streamlit](https://streamlit.io) and hosted on [HuggingFace Spaces](https://huggingface.co/spaces).
|
19 |
-
- You can download the generated .txt, .vtt, .srt files and the subtitled video.
|
20 |
-
- You can use the app via this [link](https://huggingface.co/spaces/BatuhanYilmaz/Auto-Subtitled-Video-Generator).
|
21 |
-
|
22 |
-

|
|
|
1 |
+
---
|
2 |
+
title: Whisper-Auto-Subtitled-Video-Generator
|
3 |
+
emoji: 🎥
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: purple
|
6 |
+
sdk: streamlit
|
7 |
+
sdk_version: 1.10.0
|
8 |
+
app_file: 01_🎥_Input_YouTube_Link.py
|
9 |
+
pinned: false
|
10 |
+
---
|
11 |
|
12 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|