Spaces:

MicroHealth
/

AV-to-transcripts

Paused

App Files Files Community

bluenevus commited on Apr 23

Commit

8af57a0

verified ·

1 Parent(s): e0a7ef4

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -40

app.py CHANGED Viewed

@@ -18,14 +18,11 @@ import dash_bootstrap_components as dbc
 from dash.exceptions import PreventUpdate
 import base64
 import threading
 # Initialize the speaker diarization pipeline
-try:
-    pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization")
-    print("Speaker diarization pipeline initialized successfully")
-except Exception as e:
-    print(f"Error initializing speaker diarization pipeline: {str(e)}")
-    pipeline = None
 # Check if CUDA is available and set the device
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -40,7 +37,15 @@ spell = SpellChecker()
 def download_audio_from_url(url):
     try:
-        if "share" in url:
             print("Processing shareable link...")
             response = requests.get(url)
             soup = BeautifulSoup(response.content, 'html.parser')
@@ -50,12 +55,13 @@ def download_audio_from_url(url):
                 print(f"Extracted video URL: {video_url}")
             else:
                 raise ValueError("Direct video URL not found in the shareable link.")
         else:
-            video_url = url
-        print(f"Downloading video from URL: {video_url}")
-        response = requests.get(video_url)
-        audio_bytes = response.content
         print(f"Successfully downloaded {len(audio_bytes)} bytes of data")
         return audio_bytes
     except Exception as e:
@@ -91,12 +97,9 @@ def transcribe_audio(audio_file):
         print(f"Audio duration: {len(audio_input) / sr:.2f} seconds")
         # Apply speaker diarization
-        if pipeline:
-            print("Applying speaker diarization...")
-            diarization = pipeline(audio_file)
-            print("Speaker diarization complete.")
-        else:
-            diarization = None
         chunk_length = 30 * sr
         overlap = 5 * sr
@@ -114,34 +117,14 @@ def transcribe_audio(audio_file):
         full_transcription = " ".join(transcriptions)
         print(f"Transcription complete. Full transcription length: {len(full_transcription)} characters")
-        if diarization:
-            print("Applying formatting with speaker diarization...")
-            formatted_transcription = format_transcript_with_speakers(full_transcription, diarization)
-        else:
-            print("Applying formatting without speaker diarization...")
-            formatted_transcription = format_transcript_with_breaks(full_transcription)
         return formatted_transcription
     except Exception as e:
         print(f"Error in transcribe_audio: {str(e)}")
         raise
-def format_transcript_with_breaks(transcript):
-    sentences = re.split('(?<=[.!?]) +', transcript)
-    paragraphs = []
-    current_paragraph = []
-    for sentence in sentences:
-        current_paragraph.append(sentence)
-        if len(current_paragraph) >= 3:  # Adjust this number to control paragraph size
-            paragraphs.append(' '.join(current_paragraph))
-            current_paragraph = []
-    if current_paragraph:
-        paragraphs.append(' '.join(current_paragraph))
-    return '\n\n'.join(paragraphs)
 def transcribe_video(url):
     try:
         print(f"Attempting to download audio from URL: {url}")
@@ -219,7 +202,7 @@ def update_transcription(n_clicks, url):
         return dbc.Card([
             dbc.CardBody([
                 html.H5("Transcription Result"),
-                html.Pre(transcript),
                 dbc.Button("Download Transcript", id="btn-download", color="secondary", className="mt-3")
             ])
         ]), download_data

 from dash.exceptions import PreventUpdate
 import base64
 import threading
+from pytube import YouTube
 # Initialize the speaker diarization pipeline
+pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token="YOUR_HF_AUTH_TOKEN")
+print("Speaker diarization pipeline initialized successfully")
 # Check if CUDA is available and set the device
 device = "cuda" if torch.cuda.is_available() else "cpu"
 def download_audio_from_url(url):
     try:
+        if "youtube.com" in url or "youtu.be" in url:
+            print("Processing YouTube URL...")
+            yt = YouTube(url)
+            audio_stream = yt.streams.filter(only_audio=True).first()
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_file:
+                audio_stream.download(output_path=temp_file.name)
+                audio_bytes = open(temp_file.name, "rb").read()
+            os.unlink(temp_file.name)
+        elif "share" in url:
             print("Processing shareable link...")
             response = requests.get(url)
             soup = BeautifulSoup(response.content, 'html.parser')
                 print(f"Extracted video URL: {video_url}")
             else:
                 raise ValueError("Direct video URL not found in the shareable link.")
+            response = requests.get(video_url)
+            audio_bytes = response.content
         else:
+            print(f"Downloading video from URL: {url}")
+            response = requests.get(url)
+            audio_bytes = response.content
         print(f"Successfully downloaded {len(audio_bytes)} bytes of data")
         return audio_bytes
     except Exception as e:
         print(f"Audio duration: {len(audio_input) / sr:.2f} seconds")
         # Apply speaker diarization
+        print("Applying speaker diarization...")
+        diarization = pipeline(audio_file)
+        print("Speaker diarization complete.")
         chunk_length = 30 * sr
         overlap = 5 * sr
         full_transcription = " ".join(transcriptions)
         print(f"Transcription complete. Full transcription length: {len(full_transcription)} characters")
+        print("Applying formatting with speaker diarization...")
+        formatted_transcription = format_transcript_with_speakers(full_transcription, diarization)
         return formatted_transcription
     except Exception as e:
         print(f"Error in transcribe_audio: {str(e)}")
         raise
 def transcribe_video(url):
     try:
         print(f"Attempting to download audio from URL: {url}")
         return dbc.Card([
             dbc.CardBody([
                 html.H5("Transcription Result"),
+                html.Pre(transcript, style={"white-space": "pre-wrap", "word-wrap": "break-word"}),
                 dbc.Button("Download Transcript", id="btn-download", color="secondary", className="mt-3")
             ])
         ]), download_data