bluenevus commited on
Commit
59a6a31
·
verified ·
1 Parent(s): 532f762

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -33
app.py CHANGED
@@ -12,10 +12,9 @@ from pydub import AudioSegment
12
  import librosa
13
  import numpy as np
14
  from pyannote.audio import Pipeline
15
- from pywebio import start_server, config, session
16
- from pywebio.input import input, input_group
17
  from pywebio.output import put_text, put_markdown, put_file
18
- from pywebio.session import run_js
19
 
20
  # Initialize the speaker diarization pipeline
21
  try:
@@ -65,21 +64,21 @@ def correct_spelling(text):
65
  corrected_words = [spell.correction(word) or word for word in words]
66
  return ' '.join(corrected_words)
67
 
68
- def format_transcript(transcript):
69
- sentences = transcript.split('.')
70
  formatted_transcript = []
71
  current_speaker = None
72
- for sentence in sentences:
73
- if ':' in sentence:
74
- speaker, content = sentence.split(':', 1)
75
- if speaker != current_speaker:
76
- formatted_transcript.append(f"\n\n{speaker.strip()}:{content.strip()}.")
77
- current_speaker = speaker
78
- else:
79
- formatted_transcript.append(f"{content.strip()}.")
80
- else:
81
- formatted_transcript.append(sentence.strip() + '.')
82
- return ' '.join(formatted_transcript)
 
83
 
84
  def transcribe_audio(audio_file):
85
  try:
@@ -93,6 +92,8 @@ def transcribe_audio(audio_file):
93
  print("Applying speaker diarization...")
94
  diarization = pipeline(audio_file)
95
  print("Speaker diarization complete.")
 
 
96
 
97
  chunk_length = 30 * sr
98
  overlap = 5 * sr
@@ -110,8 +111,12 @@ def transcribe_audio(audio_file):
110
  full_transcription = " ".join(transcriptions)
111
  print(f"Transcription complete. Full transcription length: {len(full_transcription)} characters")
112
 
113
- print("Applying formatting and paragraph breaks...")
114
- formatted_transcription = format_transcript_with_breaks(full_transcription)
 
 
 
 
115
 
116
  return formatted_transcription
117
  except Exception as e:
@@ -119,7 +124,6 @@ def transcribe_audio(audio_file):
119
  raise
120
 
121
  def format_transcript_with_breaks(transcript):
122
- # Split into sentences
123
  sentences = re.split('(?<=[.!?]) +', transcript)
124
  paragraphs = []
125
  current_paragraph = []
@@ -158,9 +162,8 @@ def transcribe_video(url):
158
  # Clean up the temporary file
159
  os.unlink(temp_audio_path)
160
 
161
- # Apply spelling correction and formatting
162
  transcript = correct_spelling(transcript)
163
- transcript = format_transcript(transcript)
164
 
165
  return transcript
166
  except Exception as e:
@@ -168,13 +171,7 @@ def transcribe_video(url):
168
  print(error_message)
169
  return error_message
170
 
171
- def download_transcript(transcript):
172
- with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as temp_file:
173
- temp_file.write(transcript)
174
- temp_file_path = temp_file.name
175
- return temp_file_path
176
-
177
- def pdf_compressor():
178
  put_markdown("# Video Transcription")
179
  video_url = input(label="Video URL")
180
  if video_url:
@@ -182,13 +179,10 @@ def pdf_compressor():
182
  transcript = transcribe_video(video_url)
183
  if transcript:
184
  put_text(transcript)
185
- download_link = download_transcript(transcript)
186
- with open(download_link, 'r') as file:
187
- file_content = file.read()
188
- put_file(content=file_content, label="Download Transcript")
189
  else:
190
  put_text("Failed to transcribe video.")
191
 
192
  if __name__ == '__main__':
193
  config(title="Video Transcription", description="Transcribe audio from a video URL using Whisper and PyAnnote")
194
- start_server(pdf_compressor, host='0.0.0.0', port=7860, debug=True, enable_rate_limit=True, max_payload_size='200M')
 
12
  import librosa
13
  import numpy as np
14
  from pyannote.audio import Pipeline
15
+ from pywebio import start_server, config
16
+ from pywebio.input import input
17
  from pywebio.output import put_text, put_markdown, put_file
 
18
 
19
  # Initialize the speaker diarization pipeline
20
  try:
 
64
  corrected_words = [spell.correction(word) or word for word in words]
65
  return ' '.join(corrected_words)
66
 
67
+ def format_transcript_with_speakers(transcript, diarization):
 
68
  formatted_transcript = []
69
  current_speaker = None
70
+ for segment, _, speaker in diarization.itertracks(yield_label=True):
71
+ start = segment.start
72
+ end = segment.end
73
+ if speaker != current_speaker:
74
+ if current_speaker is not None:
75
+ formatted_transcript.append("\n") # Add a blank line between speakers
76
+ formatted_transcript.append(f"Speaker {speaker}:\n")
77
+ current_speaker = speaker
78
+ segment_text = transcript[start:end].strip()
79
+ if segment_text:
80
+ formatted_transcript.append(f"{segment_text}\n")
81
+ return "".join(formatted_transcript)
82
 
83
  def transcribe_audio(audio_file):
84
  try:
 
92
  print("Applying speaker diarization...")
93
  diarization = pipeline(audio_file)
94
  print("Speaker diarization complete.")
95
+ else:
96
+ diarization = None
97
 
98
  chunk_length = 30 * sr
99
  overlap = 5 * sr
 
111
  full_transcription = " ".join(transcriptions)
112
  print(f"Transcription complete. Full transcription length: {len(full_transcription)} characters")
113
 
114
+ if diarization:
115
+ print("Applying formatting with speaker diarization...")
116
+ formatted_transcription = format_transcript_with_speakers(full_transcription, diarization)
117
+ else:
118
+ print("Applying formatting without speaker diarization...")
119
+ formatted_transcription = format_transcript_with_breaks(full_transcription)
120
 
121
  return formatted_transcription
122
  except Exception as e:
 
124
  raise
125
 
126
  def format_transcript_with_breaks(transcript):
 
127
  sentences = re.split('(?<=[.!?]) +', transcript)
128
  paragraphs = []
129
  current_paragraph = []
 
162
  # Clean up the temporary file
163
  os.unlink(temp_audio_path)
164
 
165
+ # Apply spelling correction
166
  transcript = correct_spelling(transcript)
 
167
 
168
  return transcript
169
  except Exception as e:
 
171
  print(error_message)
172
  return error_message
173
 
174
+ def video_transcription():
 
 
 
 
 
 
175
  put_markdown("# Video Transcription")
176
  video_url = input(label="Video URL")
177
  if video_url:
 
179
  transcript = transcribe_video(video_url)
180
  if transcript:
181
  put_text(transcript)
182
+ put_file('transcript.txt', transcript.encode('utf-8'), 'Download Transcript')
 
 
 
183
  else:
184
  put_text("Failed to transcribe video.")
185
 
186
  if __name__ == '__main__':
187
  config(title="Video Transcription", description="Transcribe audio from a video URL using Whisper and PyAnnote")
188
+ start_server(video_transcription, port=7860, debug=True)