Update app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,7 @@ import torch
|
|
4 |
from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
5 |
import requests
|
6 |
from bs4 import BeautifulSoup
|
7 |
-
import tempfile
|
8 |
import os
|
9 |
import soundfile as sf
|
10 |
from spellchecker import SpellChecker
|
@@ -12,9 +12,12 @@ from pydub import AudioSegment
|
|
12 |
import librosa
|
13 |
import numpy as np
|
14 |
from pyannote.audio import Pipeline
|
15 |
-
|
16 |
-
from
|
17 |
-
|
|
|
|
|
|
|
18 |
|
19 |
# Initialize the speaker diarization pipeline
|
20 |
try:
|
@@ -171,18 +174,57 @@ def transcribe_video(url):
|
|
171 |
print(error_message)
|
172 |
return error_message
|
173 |
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
|
186 |
if __name__ == '__main__':
|
187 |
-
|
188 |
-
start_server(video_transcription, port=7860, debug=True)
|
|
|
4 |
from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
5 |
import requests
|
6 |
from bs4 import BeautifulSoup
|
7 |
+
import tempfile
|
8 |
import os
|
9 |
import soundfile as sf
|
10 |
from spellchecker import SpellChecker
|
|
|
12 |
import librosa
|
13 |
import numpy as np
|
14 |
from pyannote.audio import Pipeline
|
15 |
+
import dash
|
16 |
+
from dash import dcc, html, Input, Output, State
|
17 |
+
import dash_bootstrap_components as dbc
|
18 |
+
from dash.exceptions import PreventUpdate
|
19 |
+
import base64
|
20 |
+
import threading
|
21 |
|
22 |
# Initialize the speaker diarization pipeline
|
23 |
try:
|
|
|
174 |
print(error_message)
|
175 |
return error_message
|
176 |
|
177 |
+
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
|
178 |
+
|
179 |
+
app.layout = dbc.Container([
|
180 |
+
dbc.Row([
|
181 |
+
dbc.Col([
|
182 |
+
html.H1("Video Transcription", className="text-center mb-4"),
|
183 |
+
dbc.Card([
|
184 |
+
dbc.CardBody([
|
185 |
+
dbc.Input(id="video-url", type="text", placeholder="Enter video URL"),
|
186 |
+
dbc.Button("Transcribe", id="transcribe-button", color="primary", className="mt-3"),
|
187 |
+
dbc.Spinner(html.Div(id="transcription-output", className="mt-3")),
|
188 |
+
dcc.Download(id="download-transcript")
|
189 |
+
])
|
190 |
+
])
|
191 |
+
], width=12)
|
192 |
+
])
|
193 |
+
], fluid=True)
|
194 |
+
|
195 |
+
@app.callback(
|
196 |
+
Output("transcription-output", "children"),
|
197 |
+
Output("download-transcript", "data"),
|
198 |
+
Input("transcribe-button", "n_clicks"),
|
199 |
+
State("video-url", "value"),
|
200 |
+
prevent_initial_call=True
|
201 |
+
)
|
202 |
+
def update_transcription(n_clicks, url):
|
203 |
+
if not url:
|
204 |
+
raise PreventUpdate
|
205 |
+
|
206 |
+
def transcribe():
|
207 |
+
transcript = transcribe_video(url)
|
208 |
+
return transcript
|
209 |
+
|
210 |
+
# Run transcription in a separate thread
|
211 |
+
thread = threading.Thread(target=transcribe)
|
212 |
+
thread.start()
|
213 |
+
thread.join()
|
214 |
+
|
215 |
+
transcript = transcribe()
|
216 |
+
|
217 |
+
if transcript:
|
218 |
+
download_data = dict(content=transcript, filename="transcript.txt")
|
219 |
+
return dbc.Card([
|
220 |
+
dbc.CardBody([
|
221 |
+
html.H5("Transcription Result"),
|
222 |
+
html.Pre(transcript),
|
223 |
+
dbc.Button("Download Transcript", id="btn-download", color="secondary", className="mt-3")
|
224 |
+
])
|
225 |
+
]), download_data
|
226 |
+
else:
|
227 |
+
return "Failed to transcribe video.", None
|
228 |
|
229 |
if __name__ == '__main__':
|
230 |
+
app.run(debug=True, host='0.0.0.0', port=7860)
|
|