Spaces:

langtech-innovation
/

WhisperLiveKitDiarization

Paused

qfuxa commited on Mar 24

Commit

df32d26

1 Parent(s): f94a527

solve #100

Files changed (4) hide show

README.md CHANGED Viewed

@@ -72,6 +72,12 @@ pip install tokenize_uk  # If you work with Ukrainian text
 # If you want to use diarization
 pip install diart
 ```
 ### Get access to 🎹 pyannote models

 # If you want to use diarization
 pip install diart
+# Optional backends. Default is faster-whisper
+pip install whisperlivekit[whisper]           # Original Whisper backend
+pip install whisperlivekit[whisper-timestamped]  # Whisper with improved timestamps
+pip install whisperlivekit[mlx-whisper]       # Optimized for Apple Silicon
+pip install whisperlivekit[openai]            # OpenAI API backend
 ```
 ### Get access to 🎹 pyannote models

setup.py CHANGED Viewed

@@ -1,8 +1,7 @@
 from setuptools import setup, find_packages
 setup(
     name="whisperlivekit",
-    version="0.1.2",
     description="Real-time, Fully Local Whisper's Speech-to-Text and Speaker Diarization",
     long_description=open("README.md", "r", encoding="utf-8").read(),
     long_description_content_type="text/markdown",
@@ -22,6 +21,10 @@ setup(
         "diarization": ["diart"],
         "vac": ["torch"],
         "sentence": ["mosestokenizer", "wtpsplit"],
     },
     package_data={
         'whisperlivekit': ['web/*.html'],

 from setuptools import setup, find_packages
 setup(
     name="whisperlivekit",
+    version="0.1.3",
     description="Real-time, Fully Local Whisper's Speech-to-Text and Speaker Diarization",
     long_description=open("README.md", "r", encoding="utf-8").read(),
     long_description_content_type="text/markdown",
         "diarization": ["diart"],
         "vac": ["torch"],
         "sentence": ["mosestokenizer", "wtpsplit"],
+        "whisper": ["whisper"],
+        "whisper-timestamped": ["whisper-timestamped"],
+        "mlx-whisper": ["mlx-whisper"],
+        "openai": ["openai"],
     },
     package_data={
         'whisperlivekit': ['web/*.html'],

whisperlivekit/core.py CHANGED Viewed

@@ -1,7 +1,7 @@
 try:
     from whisperlivekit.whisper_streaming_custom.whisper_online import backend_factory, warmup_asr
-except:
-    from whisper_streaming_custom.whisper_online import backend_factory, warmup_asr
 from argparse import Namespace, ArgumentParser
 def parse_args():

 try:
     from whisperlivekit.whisper_streaming_custom.whisper_online import backend_factory, warmup_asr
+except ImportError:
+    from .whisper_streaming_custom.whisper_online import backend_factory, warmup_asr
 from argparse import Namespace, ArgumentParser
 def parse_args():

whisperlivekit/whisper_streaming_custom/backends.py CHANGED Viewed

@@ -3,7 +3,10 @@ import logging
 import io
 import soundfile as sf
 import math
-import torch
 from typing import List
 import numpy as np
 from whisperlivekit.timed_objects import ASRToken
@@ -102,7 +105,7 @@ class FasterWhisperASR(ASRBase):
             model_size_or_path = modelsize
         else:
             raise ValueError("Either modelsize or model_dir must be set")
-        device = "cuda" if torch.cuda.is_available() else "cpu"
         compute_type = "float16" if device == "cuda" else "float32"
         model = WhisperModel(

 import io
 import soundfile as sf
 import math
+try:
+    import torch
+except ImportError:
+    torch = None
 from typing import List
 import numpy as np
 from whisperlivekit.timed_objects import ASRToken
             model_size_or_path = modelsize
         else:
             raise ValueError("Either modelsize or model_dir must be set")
+        device = "cuda" if torch and torch.cuda.is_available() else "cpu"
         compute_type = "float16" if device == "cuda" else "float32"
         model = WhisperModel(