camparchimedes commited on
Commit
30cf24d
·
verified ·
1 Parent(s): de293de

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -3
app.py CHANGED
@@ -46,6 +46,7 @@ def convert_to_wav(audio_file):
46
  audio = AudioSegment.from_file(audio_file, format="m4a")
47
  wav_file = "temp.wav"
48
  audio.export(wav_file, format="wav")
 
49
  return wav_file
50
 
51
  #:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
@@ -56,11 +57,10 @@ kwargs = {
56
  "language": "no",
57
  }
58
 
59
- # funct.@ASR,
60
  def transcribe_audio(audio_file):
61
- if audio_file.endswith(".m4a"):
62
  audio_file = convert_to_wav(audio_file)
63
-
64
  start_time = time.time()
65
 
66
  outputs = asr(audio_file, forced_decoder_ids=None, task="transcribe", batch_size=16, return_timestamps=False, **kwargs) # chunk_length_s=30,
@@ -71,10 +71,17 @@ def transcribe_audio(audio_file):
71
  word_count = len(text.split())
72
 
73
  result = f"Transcription: {text.strip()}\n\nTime taken: {output_time:.2f} seconds\nNumber of words: {word_count}"
 
74
 
75
  return text.strip(), result
76
  #:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
77
 
 
 
 
 
 
 
78
  # Clean and preprocess text
79
  def clean_text(text):
80
  text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)
 
46
  audio = AudioSegment.from_file(audio_file, format="m4a")
47
  wav_file = "temp.wav"
48
  audio.export(wav_file, format="wav")
49
+
50
  return wav_file
51
 
52
  #:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
 
57
  "language": "no",
58
  }
59
 
 
60
  def transcribe_audio(audio_file):
61
+ if audio_file.endswith(".m4a"):
62
  audio_file = convert_to_wav(audio_file)
63
+
64
  start_time = time.time()
65
 
66
  outputs = asr(audio_file, forced_decoder_ids=None, task="transcribe", batch_size=16, return_timestamps=False, **kwargs) # chunk_length_s=30,
 
71
  word_count = len(text.split())
72
 
73
  result = f"Transcription: {text.strip()}\n\nTime taken: {output_time:.2f} seconds\nNumber of words: {word_count}"
74
+
75
 
76
  return text.strip(), result
77
  #:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
78
 
79
+
80
+
81
+
82
+
83
+
84
+
85
  # Clean and preprocess text
86
  def clean_text(text):
87
  text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)