Update app.py
Browse files
app.py
CHANGED
|
@@ -12,10 +12,9 @@ from pydub import AudioSegment
|
|
| 12 |
import librosa
|
| 13 |
import numpy as np
|
| 14 |
from pyannote.audio import Pipeline
|
| 15 |
-
from pywebio import start_server, config
|
| 16 |
-
from pywebio.input import input
|
| 17 |
from pywebio.output import put_text, put_markdown, put_file
|
| 18 |
-
from pywebio.session import run_js
|
| 19 |
|
| 20 |
# Initialize the speaker diarization pipeline
|
| 21 |
try:
|
|
@@ -65,21 +64,21 @@ def correct_spelling(text):
|
|
| 65 |
corrected_words = [spell.correction(word) or word for word in words]
|
| 66 |
return ' '.join(corrected_words)
|
| 67 |
|
| 68 |
-
def
|
| 69 |
-
sentences = transcript.split('.')
|
| 70 |
formatted_transcript = []
|
| 71 |
current_speaker = None
|
| 72 |
-
for
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
|
|
|
| 83 |
|
| 84 |
def transcribe_audio(audio_file):
|
| 85 |
try:
|
|
@@ -93,6 +92,8 @@ def transcribe_audio(audio_file):
|
|
| 93 |
print("Applying speaker diarization...")
|
| 94 |
diarization = pipeline(audio_file)
|
| 95 |
print("Speaker diarization complete.")
|
|
|
|
|
|
|
| 96 |
|
| 97 |
chunk_length = 30 * sr
|
| 98 |
overlap = 5 * sr
|
|
@@ -110,8 +111,12 @@ def transcribe_audio(audio_file):
|
|
| 110 |
full_transcription = " ".join(transcriptions)
|
| 111 |
print(f"Transcription complete. Full transcription length: {len(full_transcription)} characters")
|
| 112 |
|
| 113 |
-
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
return formatted_transcription
|
| 117 |
except Exception as e:
|
|
@@ -119,7 +124,6 @@ def transcribe_audio(audio_file):
|
|
| 119 |
raise
|
| 120 |
|
| 121 |
def format_transcript_with_breaks(transcript):
|
| 122 |
-
# Split into sentences
|
| 123 |
sentences = re.split('(?<=[.!?]) +', transcript)
|
| 124 |
paragraphs = []
|
| 125 |
current_paragraph = []
|
|
@@ -158,9 +162,8 @@ def transcribe_video(url):
|
|
| 158 |
# Clean up the temporary file
|
| 159 |
os.unlink(temp_audio_path)
|
| 160 |
|
| 161 |
-
# Apply spelling correction
|
| 162 |
transcript = correct_spelling(transcript)
|
| 163 |
-
transcript = format_transcript(transcript)
|
| 164 |
|
| 165 |
return transcript
|
| 166 |
except Exception as e:
|
|
@@ -168,13 +171,7 @@ def transcribe_video(url):
|
|
| 168 |
print(error_message)
|
| 169 |
return error_message
|
| 170 |
|
| 171 |
-
def
|
| 172 |
-
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as temp_file:
|
| 173 |
-
temp_file.write(transcript)
|
| 174 |
-
temp_file_path = temp_file.name
|
| 175 |
-
return temp_file_path
|
| 176 |
-
|
| 177 |
-
def pdf_compressor():
|
| 178 |
put_markdown("# Video Transcription")
|
| 179 |
video_url = input(label="Video URL")
|
| 180 |
if video_url:
|
|
@@ -182,13 +179,10 @@ def pdf_compressor():
|
|
| 182 |
transcript = transcribe_video(video_url)
|
| 183 |
if transcript:
|
| 184 |
put_text(transcript)
|
| 185 |
-
|
| 186 |
-
with open(download_link, 'r') as file:
|
| 187 |
-
file_content = file.read()
|
| 188 |
-
put_file(content=file_content, label="Download Transcript")
|
| 189 |
else:
|
| 190 |
put_text("Failed to transcribe video.")
|
| 191 |
|
| 192 |
if __name__ == '__main__':
|
| 193 |
config(title="Video Transcription", description="Transcribe audio from a video URL using Whisper and PyAnnote")
|
| 194 |
-
start_server(
|
|
|
|
| 12 |
import librosa
|
| 13 |
import numpy as np
|
| 14 |
from pyannote.audio import Pipeline
|
| 15 |
+
from pywebio import start_server, config
|
| 16 |
+
from pywebio.input import input
|
| 17 |
from pywebio.output import put_text, put_markdown, put_file
|
|
|
|
| 18 |
|
| 19 |
# Initialize the speaker diarization pipeline
|
| 20 |
try:
|
|
|
|
| 64 |
corrected_words = [spell.correction(word) or word for word in words]
|
| 65 |
return ' '.join(corrected_words)
|
| 66 |
|
| 67 |
+
def format_transcript_with_speakers(transcript, diarization):
|
|
|
|
| 68 |
formatted_transcript = []
|
| 69 |
current_speaker = None
|
| 70 |
+
for segment, _, speaker in diarization.itertracks(yield_label=True):
|
| 71 |
+
start = segment.start
|
| 72 |
+
end = segment.end
|
| 73 |
+
if speaker != current_speaker:
|
| 74 |
+
if current_speaker is not None:
|
| 75 |
+
formatted_transcript.append("\n") # Add a blank line between speakers
|
| 76 |
+
formatted_transcript.append(f"Speaker {speaker}:\n")
|
| 77 |
+
current_speaker = speaker
|
| 78 |
+
segment_text = transcript[start:end].strip()
|
| 79 |
+
if segment_text:
|
| 80 |
+
formatted_transcript.append(f"{segment_text}\n")
|
| 81 |
+
return "".join(formatted_transcript)
|
| 82 |
|
| 83 |
def transcribe_audio(audio_file):
|
| 84 |
try:
|
|
|
|
| 92 |
print("Applying speaker diarization...")
|
| 93 |
diarization = pipeline(audio_file)
|
| 94 |
print("Speaker diarization complete.")
|
| 95 |
+
else:
|
| 96 |
+
diarization = None
|
| 97 |
|
| 98 |
chunk_length = 30 * sr
|
| 99 |
overlap = 5 * sr
|
|
|
|
| 111 |
full_transcription = " ".join(transcriptions)
|
| 112 |
print(f"Transcription complete. Full transcription length: {len(full_transcription)} characters")
|
| 113 |
|
| 114 |
+
if diarization:
|
| 115 |
+
print("Applying formatting with speaker diarization...")
|
| 116 |
+
formatted_transcription = format_transcript_with_speakers(full_transcription, diarization)
|
| 117 |
+
else:
|
| 118 |
+
print("Applying formatting without speaker diarization...")
|
| 119 |
+
formatted_transcription = format_transcript_with_breaks(full_transcription)
|
| 120 |
|
| 121 |
return formatted_transcription
|
| 122 |
except Exception as e:
|
|
|
|
| 124 |
raise
|
| 125 |
|
| 126 |
def format_transcript_with_breaks(transcript):
|
|
|
|
| 127 |
sentences = re.split('(?<=[.!?]) +', transcript)
|
| 128 |
paragraphs = []
|
| 129 |
current_paragraph = []
|
|
|
|
| 162 |
# Clean up the temporary file
|
| 163 |
os.unlink(temp_audio_path)
|
| 164 |
|
| 165 |
+
# Apply spelling correction
|
| 166 |
transcript = correct_spelling(transcript)
|
|
|
|
| 167 |
|
| 168 |
return transcript
|
| 169 |
except Exception as e:
|
|
|
|
| 171 |
print(error_message)
|
| 172 |
return error_message
|
| 173 |
|
| 174 |
+
def video_transcription():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
put_markdown("# Video Transcription")
|
| 176 |
video_url = input(label="Video URL")
|
| 177 |
if video_url:
|
|
|
|
| 179 |
transcript = transcribe_video(video_url)
|
| 180 |
if transcript:
|
| 181 |
put_text(transcript)
|
| 182 |
+
put_file('transcript.txt', transcript.encode('utf-8'), 'Download Transcript')
|
|
|
|
|
|
|
|
|
|
| 183 |
else:
|
| 184 |
put_text("Failed to transcribe video.")
|
| 185 |
|
| 186 |
if __name__ == '__main__':
|
| 187 |
config(title="Video Transcription", description="Transcribe audio from a video URL using Whisper and PyAnnote")
|
| 188 |
+
start_server(video_transcription, port=7860, debug=True)
|