qfuxa commited on
Commit
df32d26
·
1 Parent(s): f94a527

solve #100

Browse files
README.md CHANGED
@@ -72,6 +72,12 @@ pip install tokenize_uk # If you work with Ukrainian text
72
 
73
  # If you want to use diarization
74
  pip install diart
 
 
 
 
 
 
75
  ```
76
 
77
  ### Get access to 🎹 pyannote models
 
72
 
73
  # If you want to use diarization
74
  pip install diart
75
+
76
+ # Optional backends. Default is faster-whisper
77
+ pip install whisperlivekit[whisper] # Original Whisper backend
78
+ pip install whisperlivekit[whisper-timestamped] # Whisper with improved timestamps
79
+ pip install whisperlivekit[mlx-whisper] # Optimized for Apple Silicon
80
+ pip install whisperlivekit[openai] # OpenAI API backend
81
  ```
82
 
83
  ### Get access to 🎹 pyannote models
setup.py CHANGED
@@ -1,8 +1,7 @@
1
  from setuptools import setup, find_packages
2
-
3
  setup(
4
  name="whisperlivekit",
5
- version="0.1.2",
6
  description="Real-time, Fully Local Whisper's Speech-to-Text and Speaker Diarization",
7
  long_description=open("README.md", "r", encoding="utf-8").read(),
8
  long_description_content_type="text/markdown",
@@ -22,6 +21,10 @@ setup(
22
  "diarization": ["diart"],
23
  "vac": ["torch"],
24
  "sentence": ["mosestokenizer", "wtpsplit"],
 
 
 
 
25
  },
26
  package_data={
27
  'whisperlivekit': ['web/*.html'],
 
1
  from setuptools import setup, find_packages
 
2
  setup(
3
  name="whisperlivekit",
4
+ version="0.1.3",
5
  description="Real-time, Fully Local Whisper's Speech-to-Text and Speaker Diarization",
6
  long_description=open("README.md", "r", encoding="utf-8").read(),
7
  long_description_content_type="text/markdown",
 
21
  "diarization": ["diart"],
22
  "vac": ["torch"],
23
  "sentence": ["mosestokenizer", "wtpsplit"],
24
+ "whisper": ["whisper"],
25
+ "whisper-timestamped": ["whisper-timestamped"],
26
+ "mlx-whisper": ["mlx-whisper"],
27
+ "openai": ["openai"],
28
  },
29
  package_data={
30
  'whisperlivekit': ['web/*.html'],
whisperlivekit/core.py CHANGED
@@ -1,7 +1,7 @@
1
  try:
2
  from whisperlivekit.whisper_streaming_custom.whisper_online import backend_factory, warmup_asr
3
- except:
4
- from whisper_streaming_custom.whisper_online import backend_factory, warmup_asr
5
  from argparse import Namespace, ArgumentParser
6
 
7
  def parse_args():
 
1
  try:
2
  from whisperlivekit.whisper_streaming_custom.whisper_online import backend_factory, warmup_asr
3
+ except ImportError:
4
+ from .whisper_streaming_custom.whisper_online import backend_factory, warmup_asr
5
  from argparse import Namespace, ArgumentParser
6
 
7
  def parse_args():
whisperlivekit/whisper_streaming_custom/backends.py CHANGED
@@ -3,7 +3,10 @@ import logging
3
  import io
4
  import soundfile as sf
5
  import math
6
- import torch
 
 
 
7
  from typing import List
8
  import numpy as np
9
  from whisperlivekit.timed_objects import ASRToken
@@ -102,7 +105,7 @@ class FasterWhisperASR(ASRBase):
102
  model_size_or_path = modelsize
103
  else:
104
  raise ValueError("Either modelsize or model_dir must be set")
105
- device = "cuda" if torch.cuda.is_available() else "cpu"
106
  compute_type = "float16" if device == "cuda" else "float32"
107
 
108
  model = WhisperModel(
 
3
  import io
4
  import soundfile as sf
5
  import math
6
+ try:
7
+ import torch
8
+ except ImportError:
9
+ torch = None
10
  from typing import List
11
  import numpy as np
12
  from whisperlivekit.timed_objects import ASRToken
 
105
  model_size_or_path = modelsize
106
  else:
107
  raise ValueError("Either modelsize or model_dir must be set")
108
+ device = "cuda" if torch and torch.cuda.is_available() else "cpu"
109
  compute_type = "float16" if device == "cuda" else "float32"
110
 
111
  model = WhisperModel(