Spaces:

alexandrainst
/

roest-demo

Sleeping

App Files Files Community

saattrupdan commited on Sep 19, 2024

Commit

d087544

1 Parent(s): 8cd000d

chore: Add logging

Browse files

Files changed (1) hide show

app.py +19 -0

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 """Røst ASR demo."""
 import os
 import warnings
@@ -11,6 +12,13 @@ from punctfix import PunctFixer
 from transformers import pipeline
 from dotenv import load_dotenv
 load_dotenv()
 warnings.filterwarnings("ignore", category=FutureWarning)
@@ -33,6 +41,7 @@ send the audio to the model for transcription. You can also upload an audio file
 pressing the {icon} button.
 """
 device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
 transcriber = pipeline(
     task="automatic-speech-recognition",
@@ -40,8 +49,12 @@ transcriber = pipeline(
     device=device,
     token=os.getenv("HUGGINGFACE_HUB_TOKEN", True),
 )
 transcription_fixer = PunctFixer(language="da", device=device)
 def transcribe_audio(sampling_rate_and_audio: tuple[int, np.ndarray]) -> str:
     """Transcribe the audio.
@@ -56,12 +69,18 @@ def transcribe_audio(sampling_rate_and_audio: tuple[int, np.ndarray]) -> str:
     if audio.ndim > 1:
         audio = np.mean(audio, axis=1)
     audio = samplerate.resample(audio, 16_000 / sampling_rate, "sinc_best")
     transcription = transcriber(inputs=audio)
     if not isinstance(transcription, dict):
         return ""
     cleaned_transcription = transcription_fixer.punctuate(
         text=transcription["text"]
     )
     return cleaned_transcription
 demo = gr.Interface(

 """Røst ASR demo."""
+import logging
 import os
 import warnings
 from transformers import pipeline
 from dotenv import load_dotenv
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s ⋅ %(name)s ⋅ %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+logger = logging.getLogger("roest-asr-demo")
 load_dotenv()
 warnings.filterwarnings("ignore", category=FutureWarning)
 pressing the {icon} button.
 """
+logger.info("Loading the ASR model...")
 device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
 transcriber = pipeline(
     task="automatic-speech-recognition",
     device=device,
     token=os.getenv("HUGGINGFACE_HUB_TOKEN", True),
 )
+logger.info("Loading the punctuation fixer model...")
 transcription_fixer = PunctFixer(language="da", device=device)
+logger.info("Models loaded, ready to transcribe audio.")
 def transcribe_audio(sampling_rate_and_audio: tuple[int, np.ndarray]) -> str:
     """Transcribe the audio.
     if audio.ndim > 1:
         audio = np.mean(audio, axis=1)
     audio = samplerate.resample(audio, 16_000 / sampling_rate, "sinc_best")
+    logger.info(f"Transcribing audio clip of {len(audio) / 16_000:.2f} seconds...")
     transcription = transcriber(inputs=audio)
     if not isinstance(transcription, dict):
         return ""
+    logger.info(f"Raw transcription is {transcription['text']!r}. Cleaning it up...")
     cleaned_transcription = transcription_fixer.punctuate(
         text=transcription["text"]
     )
+    logger.info(f"Final transcription: {cleaned_transcription!r}")
     return cleaned_transcription
 demo = gr.Interface(