Spaces:

MicroHealth
/

AV-to-transcripts

Paused

App Files Files Community

bluenevus commited on Apr 22

Commit

59a6a31

verified ·

1 Parent(s): 532f762

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -33

app.py CHANGED Viewed

@@ -12,10 +12,9 @@ from pydub import AudioSegment
 import librosa
 import numpy as np
 from pyannote.audio import Pipeline
-from pywebio import start_server, config, session
-from pywebio.input import input, input_group
 from pywebio.output import put_text, put_markdown, put_file
-from pywebio.session import run_js
 # Initialize the speaker diarization pipeline
 try:
@@ -65,21 +64,21 @@ def correct_spelling(text):
     corrected_words = [spell.correction(word) or word for word in words]
     return ' '.join(corrected_words)
-def format_transcript(transcript):
-    sentences = transcript.split('.')
     formatted_transcript = []
     current_speaker = None
-    for sentence in sentences:
-        if ':' in sentence:
-            speaker, content = sentence.split(':', 1)
-            if speaker != current_speaker:
-                formatted_transcript.append(f"\n\n{speaker.strip()}:{content.strip()}.")
-                current_speaker = speaker
-            else:
-                formatted_transcript.append(f"{content.strip()}.")
-        else:
-            formatted_transcript.append(sentence.strip() + '.')
-    return ' '.join(formatted_transcript)
 def transcribe_audio(audio_file):
     try:
@@ -93,6 +92,8 @@ def transcribe_audio(audio_file):
             print("Applying speaker diarization...")
             diarization = pipeline(audio_file)
             print("Speaker diarization complete.")
         chunk_length = 30 * sr
         overlap = 5 * sr
@@ -110,8 +111,12 @@ def transcribe_audio(audio_file):
         full_transcription = " ".join(transcriptions)
         print(f"Transcription complete. Full transcription length: {len(full_transcription)} characters")
-        print("Applying formatting and paragraph breaks...")
-        formatted_transcription = format_transcript_with_breaks(full_transcription)
         return formatted_transcription
     except Exception as e:
@@ -119,7 +124,6 @@ def transcribe_audio(audio_file):
         raise
 def format_transcript_with_breaks(transcript):
-    # Split into sentences
     sentences = re.split('(?<=[.!?]) +', transcript)
     paragraphs = []
     current_paragraph = []
@@ -158,9 +162,8 @@ def transcribe_video(url):
         # Clean up the temporary file
         os.unlink(temp_audio_path)
-        # Apply spelling correction and formatting
         transcript = correct_spelling(transcript)
-        transcript = format_transcript(transcript)
         return transcript
     except Exception as e:
@@ -168,13 +171,7 @@ def transcribe_video(url):
         print(error_message)
         return error_message
-def download_transcript(transcript):
-    with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as temp_file:
-        temp_file.write(transcript)
-        temp_file_path = temp_file.name
-    return temp_file_path
-def pdf_compressor():
     put_markdown("# Video Transcription")
     video_url = input(label="Video URL")
     if video_url:
@@ -182,13 +179,10 @@ def pdf_compressor():
         transcript = transcribe_video(video_url)
         if transcript:
             put_text(transcript)
-            download_link = download_transcript(transcript)
-            with open(download_link, 'r') as file:
-                file_content = file.read()
-            put_file(content=file_content, label="Download Transcript")
         else:
             put_text("Failed to transcribe video.")
 if __name__ == '__main__':
     config(title="Video Transcription", description="Transcribe audio from a video URL using Whisper and PyAnnote")
-    start_server(pdf_compressor, host='0.0.0.0', port=7860, debug=True, enable_rate_limit=True, max_payload_size='200M')

 import librosa
 import numpy as np
 from pyannote.audio import Pipeline
+from pywebio import start_server, config
+from pywebio.input import input
 from pywebio.output import put_text, put_markdown, put_file
 # Initialize the speaker diarization pipeline
 try:
     corrected_words = [spell.correction(word) or word for word in words]
     return ' '.join(corrected_words)
+def format_transcript_with_speakers(transcript, diarization):
     formatted_transcript = []
     current_speaker = None
+    for segment, _, speaker in diarization.itertracks(yield_label=True):
+        start = segment.start
+        end = segment.end
+        if speaker != current_speaker:
+            if current_speaker is not None:
+                formatted_transcript.append("\n")  # Add a blank line between speakers
+            formatted_transcript.append(f"Speaker {speaker}:\n")
+            current_speaker = speaker
+        segment_text = transcript[start:end].strip()
+        if segment_text:
+            formatted_transcript.append(f"{segment_text}\n")
+    return "".join(formatted_transcript)
 def transcribe_audio(audio_file):
     try:
             print("Applying speaker diarization...")
             diarization = pipeline(audio_file)
             print("Speaker diarization complete.")
+        else:
+            diarization = None
         chunk_length = 30 * sr
         overlap = 5 * sr
         full_transcription = " ".join(transcriptions)
         print(f"Transcription complete. Full transcription length: {len(full_transcription)} characters")
+        if diarization:
+            print("Applying formatting with speaker diarization...")
+            formatted_transcription = format_transcript_with_speakers(full_transcription, diarization)
+        else:
+            print("Applying formatting without speaker diarization...")
+            formatted_transcription = format_transcript_with_breaks(full_transcription)
         return formatted_transcription
     except Exception as e:
         raise
 def format_transcript_with_breaks(transcript):
     sentences = re.split('(?<=[.!?]) +', transcript)
     paragraphs = []
     current_paragraph = []
         # Clean up the temporary file
         os.unlink(temp_audio_path)
+        # Apply spelling correction
         transcript = correct_spelling(transcript)
         return transcript
     except Exception as e:
         print(error_message)
         return error_message
+def video_transcription():
     put_markdown("# Video Transcription")
     video_url = input(label="Video URL")
     if video_url:
         transcript = transcribe_video(video_url)
         if transcript:
             put_text(transcript)
+            put_file('transcript.txt', transcript.encode('utf-8'), 'Download Transcript')
         else:
             put_text("Failed to transcribe video.")
 if __name__ == '__main__':
     config(title="Video Transcription", description="Transcribe audio from a video URL using Whisper and PyAnnote")
+    start_server(video_transcription, port=7860, debug=True)