whisper-webui-translate

Runtime error

App Files Files Community

aadnk commited on Sep 26, 2022

Commit

8d120bf

1 Parent(s): 3fadc6e

Add support for downloading files from YouTube using yt-dlp

Browse files

Files changed (3) hide show

app.py +29 -10
download.py +38 -0
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -1,14 +1,19 @@
 from io import StringIO
 import os
 import tempfile
-from typing import Iterator
 import gradio as gr
 from utils import slugify, write_srt, write_vtt
-import whisper
-import ffmpeg
 #import os
 #os.system("pip install git+https://github.com/openai/whisper.git")
@@ -42,9 +47,8 @@ class UI:
     def __init__(self, inputAudioMaxDuration):
         self.inputAudioMaxDuration = inputAudioMaxDuration
-    def transcribeFile(self, modelName, languageName, uploadFile, microphoneData, task):
-        source = uploadFile if uploadFile is not None else microphoneData
-        sourceName = os.path.basename(source)
         selectedLanguage = languageName.lower() if len(languageName) > 0 else None
         selectedModel = modelName if modelName is not None else "base"
@@ -78,7 +82,20 @@ class UI:
         download.append(createFile(vtt, downloadDirectory, filePrefix + "-subs.vtt"));
         download.append(createFile(text, downloadDirectory, filePrefix + "-transcript.txt"));
-        return text, vtt, download
 def createFile(text: str, directory: str, fileName: str) -> str:
     # Write the text to a file
@@ -99,6 +116,7 @@ def getSubs(segments: Iterator[dict], format: str) -> str:
     segmentStream.seek(0)
     return segmentStream.read()
 def createUi(inputAudioMaxDuration, share=False):
     ui = UI(inputAudioMaxDuration)
@@ -113,13 +131,14 @@ def createUi(inputAudioMaxDuration, share=False):
     demo = gr.Interface(fn=ui.transcribeFile, description=ui_description, inputs=[
         gr.Dropdown(choices=["tiny", "base", "small", "medium", "large"], value="medium", label="Model"),
         gr.Dropdown(choices=sorted(LANGUAGES), label="Language"),
         gr.Audio(source="upload", type="filepath", label="Upload Audio"),
         gr.Audio(source="microphone", type="filepath", label="Microphone Input"),
         gr.Dropdown(choices=["transcribe", "translate"], label="Task"),
     ], outputs=[
         gr.Text(label="Transcription"),
-        gr.Text(label="Segments"),
-        gr.File(label="Download")
     ])
     demo.launch(share=share)

+from typing import Iterator
 from io import StringIO
 import os
+import pathlib
 import tempfile
+# External programs
+import whisper
+import ffmpeg
+# UI
 import gradio as gr
+from download import downloadUrl
 from utils import slugify, write_srt, write_vtt
 #import os
 #os.system("pip install git+https://github.com/openai/whisper.git")
     def __init__(self, inputAudioMaxDuration):
         self.inputAudioMaxDuration = inputAudioMaxDuration
+    def transcribeFile(self, modelName, languageName, urlData, uploadFile, microphoneData, task):
+        source, sourceName = getSource(urlData, uploadFile, microphoneData)
         selectedLanguage = languageName.lower() if len(languageName) > 0 else None
         selectedModel = modelName if modelName is not None else "base"
         download.append(createFile(vtt, downloadDirectory, filePrefix + "-subs.vtt"));
         download.append(createFile(text, downloadDirectory, filePrefix + "-transcript.txt"));
+        return download, text, vtt
+def getSource(urlData, uploadFile, microphoneData):
+    if urlData:
+        # Download from YouTube
+        source = downloadUrl(urlData)
+    else:
+        # File input
+        source = uploadFile if uploadFile is not None else microphoneData
+    file_path = pathlib.Path(source)
+    sourceName = file_path.stem[:18] + file_path.suffix
+    return source, sourceName
 def createFile(text: str, directory: str, fileName: str) -> str:
     # Write the text to a file
     segmentStream.seek(0)
     return segmentStream.read()
 def createUi(inputAudioMaxDuration, share=False):
     ui = UI(inputAudioMaxDuration)
     demo = gr.Interface(fn=ui.transcribeFile, description=ui_description, inputs=[
         gr.Dropdown(choices=["tiny", "base", "small", "medium", "large"], value="medium", label="Model"),
         gr.Dropdown(choices=sorted(LANGUAGES), label="Language"),
+        gr.Text(label="URL (YouTube, etc.)"),
         gr.Audio(source="upload", type="filepath", label="Upload Audio"),
         gr.Audio(source="microphone", type="filepath", label="Microphone Input"),
         gr.Dropdown(choices=["transcribe", "translate"], label="Task"),
     ], outputs=[
+        gr.File(label="Download"),
         gr.Text(label="Transcription"),
+        gr.Text(label="Segments")
     ])
     demo.launch(share=share)

download.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import os
+from tempfile import mkdtemp
+from yt_dlp import YoutubeDL
+from yt_dlp.postprocessor import PostProcessor
+class FilenameCollectorPP(PostProcessor):
+    def __init__(self):
+        super(FilenameCollectorPP, self).__init__(None)
+        self.filenames = []
+    def run(self, information):
+        self.filenames.append(information["filepath"])
+        return [], information
+def downloadUrl(url: str):
+    destinationDirectory = mkdtemp()
+    ydl_opts = {
+        "format": "bestaudio/best",
+        'playlist_items': '1',
+        'paths': {
+            'home': destinationDirectory
+        }
+    }
+    filename_collector = FilenameCollectorPP()
+    with YoutubeDL(ydl_opts) as ydl:
+        ydl.add_post_processor(filename_collector)
+        ydl.download([url])
+    if len(filename_collector.filenames) <= 0:
+        raise Exception("Cannot download " + url)
+    result = filename_collector.filenames[0]
+    print("Downloaded " + result)
+    return result

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 git+https://github.com/openai/whisper.git
 transformers
 ffmpeg-python==0.2.0
-gradio

 git+https://github.com/openai/whisper.git
 transformers
 ffmpeg-python==0.2.0
+gradio
+yt-dlp