Update app.py
Browse files
app.py
CHANGED
@@ -12,10 +12,9 @@ from pydub import AudioSegment
|
|
12 |
import librosa
|
13 |
import numpy as np
|
14 |
from pyannote.audio import Pipeline
|
15 |
-
from pywebio import start_server, config
|
16 |
-
from pywebio.input import input
|
17 |
from pywebio.output import put_text, put_markdown, put_file
|
18 |
-
from pywebio.session import run_js
|
19 |
|
20 |
# Initialize the speaker diarization pipeline
|
21 |
try:
|
@@ -65,21 +64,21 @@ def correct_spelling(text):
|
|
65 |
corrected_words = [spell.correction(word) or word for word in words]
|
66 |
return ' '.join(corrected_words)
|
67 |
|
68 |
-
def
|
69 |
-
sentences = transcript.split('.')
|
70 |
formatted_transcript = []
|
71 |
current_speaker = None
|
72 |
-
for
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
|
|
83 |
|
84 |
def transcribe_audio(audio_file):
|
85 |
try:
|
@@ -93,6 +92,8 @@ def transcribe_audio(audio_file):
|
|
93 |
print("Applying speaker diarization...")
|
94 |
diarization = pipeline(audio_file)
|
95 |
print("Speaker diarization complete.")
|
|
|
|
|
96 |
|
97 |
chunk_length = 30 * sr
|
98 |
overlap = 5 * sr
|
@@ -110,8 +111,12 @@ def transcribe_audio(audio_file):
|
|
110 |
full_transcription = " ".join(transcriptions)
|
111 |
print(f"Transcription complete. Full transcription length: {len(full_transcription)} characters")
|
112 |
|
113 |
-
|
114 |
-
|
|
|
|
|
|
|
|
|
115 |
|
116 |
return formatted_transcription
|
117 |
except Exception as e:
|
@@ -119,7 +124,6 @@ def transcribe_audio(audio_file):
|
|
119 |
raise
|
120 |
|
121 |
def format_transcript_with_breaks(transcript):
|
122 |
-
# Split into sentences
|
123 |
sentences = re.split('(?<=[.!?]) +', transcript)
|
124 |
paragraphs = []
|
125 |
current_paragraph = []
|
@@ -158,9 +162,8 @@ def transcribe_video(url):
|
|
158 |
# Clean up the temporary file
|
159 |
os.unlink(temp_audio_path)
|
160 |
|
161 |
-
# Apply spelling correction
|
162 |
transcript = correct_spelling(transcript)
|
163 |
-
transcript = format_transcript(transcript)
|
164 |
|
165 |
return transcript
|
166 |
except Exception as e:
|
@@ -168,13 +171,7 @@ def transcribe_video(url):
|
|
168 |
print(error_message)
|
169 |
return error_message
|
170 |
|
171 |
-
def
|
172 |
-
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as temp_file:
|
173 |
-
temp_file.write(transcript)
|
174 |
-
temp_file_path = temp_file.name
|
175 |
-
return temp_file_path
|
176 |
-
|
177 |
-
def pdf_compressor():
|
178 |
put_markdown("# Video Transcription")
|
179 |
video_url = input(label="Video URL")
|
180 |
if video_url:
|
@@ -182,13 +179,10 @@ def pdf_compressor():
|
|
182 |
transcript = transcribe_video(video_url)
|
183 |
if transcript:
|
184 |
put_text(transcript)
|
185 |
-
|
186 |
-
with open(download_link, 'r') as file:
|
187 |
-
file_content = file.read()
|
188 |
-
put_file(content=file_content, label="Download Transcript")
|
189 |
else:
|
190 |
put_text("Failed to transcribe video.")
|
191 |
|
192 |
if __name__ == '__main__':
|
193 |
config(title="Video Transcription", description="Transcribe audio from a video URL using Whisper and PyAnnote")
|
194 |
-
start_server(
|
|
|
12 |
import librosa
|
13 |
import numpy as np
|
14 |
from pyannote.audio import Pipeline
|
15 |
+
from pywebio import start_server, config
|
16 |
+
from pywebio.input import input
|
17 |
from pywebio.output import put_text, put_markdown, put_file
|
|
|
18 |
|
19 |
# Initialize the speaker diarization pipeline
|
20 |
try:
|
|
|
64 |
corrected_words = [spell.correction(word) or word for word in words]
|
65 |
return ' '.join(corrected_words)
|
66 |
|
67 |
+
def format_transcript_with_speakers(transcript, diarization):
|
|
|
68 |
formatted_transcript = []
|
69 |
current_speaker = None
|
70 |
+
for segment, _, speaker in diarization.itertracks(yield_label=True):
|
71 |
+
start = segment.start
|
72 |
+
end = segment.end
|
73 |
+
if speaker != current_speaker:
|
74 |
+
if current_speaker is not None:
|
75 |
+
formatted_transcript.append("\n") # Add a blank line between speakers
|
76 |
+
formatted_transcript.append(f"Speaker {speaker}:\n")
|
77 |
+
current_speaker = speaker
|
78 |
+
segment_text = transcript[start:end].strip()
|
79 |
+
if segment_text:
|
80 |
+
formatted_transcript.append(f"{segment_text}\n")
|
81 |
+
return "".join(formatted_transcript)
|
82 |
|
83 |
def transcribe_audio(audio_file):
|
84 |
try:
|
|
|
92 |
print("Applying speaker diarization...")
|
93 |
diarization = pipeline(audio_file)
|
94 |
print("Speaker diarization complete.")
|
95 |
+
else:
|
96 |
+
diarization = None
|
97 |
|
98 |
chunk_length = 30 * sr
|
99 |
overlap = 5 * sr
|
|
|
111 |
full_transcription = " ".join(transcriptions)
|
112 |
print(f"Transcription complete. Full transcription length: {len(full_transcription)} characters")
|
113 |
|
114 |
+
if diarization:
|
115 |
+
print("Applying formatting with speaker diarization...")
|
116 |
+
formatted_transcription = format_transcript_with_speakers(full_transcription, diarization)
|
117 |
+
else:
|
118 |
+
print("Applying formatting without speaker diarization...")
|
119 |
+
formatted_transcription = format_transcript_with_breaks(full_transcription)
|
120 |
|
121 |
return formatted_transcription
|
122 |
except Exception as e:
|
|
|
124 |
raise
|
125 |
|
126 |
def format_transcript_with_breaks(transcript):
|
|
|
127 |
sentences = re.split('(?<=[.!?]) +', transcript)
|
128 |
paragraphs = []
|
129 |
current_paragraph = []
|
|
|
162 |
# Clean up the temporary file
|
163 |
os.unlink(temp_audio_path)
|
164 |
|
165 |
+
# Apply spelling correction
|
166 |
transcript = correct_spelling(transcript)
|
|
|
167 |
|
168 |
return transcript
|
169 |
except Exception as e:
|
|
|
171 |
print(error_message)
|
172 |
return error_message
|
173 |
|
174 |
+
def video_transcription():
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
put_markdown("# Video Transcription")
|
176 |
video_url = input(label="Video URL")
|
177 |
if video_url:
|
|
|
179 |
transcript = transcribe_video(video_url)
|
180 |
if transcript:
|
181 |
put_text(transcript)
|
182 |
+
put_file('transcript.txt', transcript.encode('utf-8'), 'Download Transcript')
|
|
|
|
|
|
|
183 |
else:
|
184 |
put_text("Failed to transcribe video.")
|
185 |
|
186 |
if __name__ == '__main__':
|
187 |
config(title="Video Transcription", description="Transcribe audio from a video URL using Whisper and PyAnnote")
|
188 |
+
start_server(video_transcription, port=7860, debug=True)
|