bluenevus commited on
Commit
752e0a6
·
verified ·
1 Parent(s): 0cfb05e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -44
app.py CHANGED
@@ -10,6 +10,8 @@ import os
10
  from bs4 import BeautifulSoup
11
  import re
12
  import numpy as np
 
 
13
 
14
  # Load the transcription model
15
  transcription_pipeline = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
@@ -38,50 +40,38 @@ def download_audio_from_url(url):
38
  print(f"Error in download_audio_from_url: {str(e)}")
39
  raise
40
 
41
- def transcribe_audio(audio_bytes):
42
- audio = AudioSegment.from_file(io.BytesIO(audio_bytes))
43
- audio.export("temp_audio.wav", format="wav")
44
- waveform, sample_rate = torchaudio.load("temp_audio.wav")
45
- os.remove("temp_audio.wav")
46
-
47
- # Convert torch.Tensor to numpy.ndarray
48
- waveform_np = waveform.numpy().squeeze()
49
-
50
- # Transcribe the audio
51
- result = transcription_pipeline(waveform_np, chunk_length_s=30)
52
- transcript = result['text']
53
-
54
- # Split transcript into paragraphs based on silence
55
- chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)
56
- paragraphs = []
57
- current_paragraph = ""
58
-
59
- for chunk in chunks:
60
- chunk.export("temp_chunk.wav", format="wav")
61
- waveform_chunk, sample_rate_chunk = torchaudio.load("temp_chunk.wav")
62
- os.remove("temp_chunk.wav")
63
-
64
- # Convert torch.Tensor to numpy.ndarray
65
- waveform_chunk_np = waveform_chunk.numpy().squeeze()
66
-
67
- chunk_result = transcription_pipeline(waveform_chunk_np, chunk_length_s=30)
68
- chunk_transcript = chunk_result['text']
69
-
70
- if chunk_transcript:
71
- if current_paragraph:
72
- current_paragraph += " " + chunk_transcript
73
- else:
74
- current_paragraph = chunk_transcript
75
- else:
76
- if current_paragraph:
77
- paragraphs.append(current_paragraph)
78
- current_paragraph = ""
79
-
80
- if current_paragraph:
81
- paragraphs.append(current_paragraph)
82
-
83
- formatted_transcript = "\n\n".join(paragraphs)
84
- return formatted_transcript
85
 
86
  def transcribe_video(url):
87
  try:
 
10
  from bs4 import BeautifulSoup
11
  import re
12
  import numpy as np
13
+ from moviepy.editor import VideoFileClip
14
+ import soundfile as sf
15
 
16
  # Load the transcription model
17
  transcription_pipeline = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
 
40
  print(f"Error in download_audio_from_url: {str(e)}")
41
  raise
42
 
43
+ def transcribe_audio(video_bytes):
44
+ try:
45
+ # Save the video bytes to a temporary file
46
+ with open("temp_video.mp4", "wb") as f:
47
+ f.write(video_bytes)
48
+
49
+ # Extract audio from video
50
+ video = VideoFileClip("temp_video.mp4")
51
+ audio = video.audio
52
+
53
+ # Export audio as mono WAV
54
+ audio.write_audiofile("temp_audio.wav", fps=16000, nbytes=2, codec='pcm_s16le')
55
+
56
+ # Load the audio file
57
+ audio_data, sample_rate = sf.read("temp_audio.wav")
58
+
59
+ # Ensure audio is mono
60
+ if len(audio_data.shape) > 1:
61
+ audio_data = audio_data.mean(axis=1)
62
+
63
+ # Transcribe the audio
64
+ result = transcription_pipeline(audio_data, sampling_rate=sample_rate)
65
+ transcript = result['text']
66
+
67
+ # Clean up temporary files
68
+ os.remove("temp_video.mp4")
69
+ os.remove("temp_audio.wav")
70
+
71
+ return transcript
72
+ except Exception as e:
73
+ print(f"Error in transcribe_audio: {str(e)}")
74
+ raise
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  def transcribe_video(url):
77
  try: