husseinelsaadi commited on
Commit
1a5a90b
Β·
1 Parent(s): 51adbe8

updated faster whisper model

Browse files
.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
.idea/ai-interviewer-demo.iml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$">
5
+ <excludeFolder url="file://$MODULE_DIR$/.venv" />
6
+ </content>
7
+ <orderEntry type="inheritedJdk" />
8
+ <orderEntry type="sourceFolder" forTests="false" />
9
+ </component>
10
+ <component name="PyDocumentationSettings">
11
+ <option name="format" value="PLAIN" />
12
+ <option name="myDocStringFormat" value="Plain" />
13
+ </component>
14
+ </module>
.idea/inspectionProfiles/Project_Default.xml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <profile version="1.0">
3
+ <option name="myName" value="Project Default" />
4
+ <inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
5
+ <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
6
+ <option name="ignoredPackages">
7
+ <value>
8
+ <list size="11">
9
+ <item index="0" class="java.lang.String" itemvalue="scipy" />
10
+ <item index="1" class="java.lang.String" itemvalue="scikit-learn" />
11
+ <item index="2" class="java.lang.String" itemvalue="opencv-python" />
12
+ <item index="3" class="java.lang.String" itemvalue="pip" />
13
+ <item index="4" class="java.lang.String" itemvalue="numpy" />
14
+ <item index="5" class="java.lang.String" itemvalue="datasets" />
15
+ <item index="6" class="java.lang.String" itemvalue="evaluate" />
16
+ <item index="7" class="java.lang.String" itemvalue="fuzzywuzzy" />
17
+ <item index="8" class="java.lang.String" itemvalue="ragas" />
18
+ <item index="9" class="java.lang.String" itemvalue="TTS" />
19
+ <item index="10" class="java.lang.String" itemvalue="textract" />
20
+ </list>
21
+ </value>
22
+ </option>
23
+ </inspection_tool>
24
+ </profile>
25
+ </component>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/ai-interviewer-demo.iml" filepath="$PROJECT_DIR$/.idea/ai-interviewer-demo.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="" vcs="Git" />
5
+ </component>
6
+ </project>
app.py CHANGED
@@ -1697,7 +1697,7 @@ import os
1697
  import json
1698
  from transformers import BarkModel, AutoProcessor
1699
  import torch, gc
1700
- import whisper
1701
  import asyncio
1702
  import threading
1703
  from concurrent.futures import ThreadPoolExecutor
@@ -1709,7 +1709,7 @@ gc.collect()
1709
  # Global variables for lazy loading
1710
  model_bark = None
1711
  processor_bark = None
1712
- whisper_model = None
1713
  bark_voice_preset = "v2/en_speaker_6"
1714
 
1715
  # Thread pool for async operations
@@ -1726,7 +1726,7 @@ else:
1726
 
1727
  def load_models_lazy():
1728
  """Load models only when needed"""
1729
- global model_bark, processor_bark, whisper_model
1730
 
1731
  device = "cuda" if torch.cuda.is_available() else "cpu"
1732
  print(f"πŸ” Using device: {device}")
@@ -1740,11 +1740,13 @@ def load_models_lazy():
1740
  print("πŸ” Loading Bark processor...")
1741
  processor_bark = AutoProcessor.from_pretrained("suno/bark")
1742
  print("βœ… Bark processor loaded")
1743
-
1744
- if whisper_model is None:
1745
- print("πŸ” Loading Whisper model...")
1746
- whisper_model = whisper.load_model("base", device=device)
1747
- print(f"βœ… Whisper model loaded on {device}")
 
 
1748
 
1749
  def bark_tts_async(text):
1750
  """Fully correct async TTS generation with Bark"""
@@ -1773,18 +1775,17 @@ def bark_tts_async(text):
1773
 
1774
 
1775
  def whisper_stt(audio_path):
1776
- """Lazy loading whisper STT"""
1777
- if not audio_path or not os.path.exists(audio_path):
1778
  return ""
1779
-
1780
- load_models_lazy() # Load only when needed
1781
-
1782
- # Check what device Whisper is actually using
1783
- device = "cuda" if torch.cuda.is_available() else "cpu"
1784
- print(f"πŸ” Whisper transcribing on {device}")
1785
-
1786
- result = whisper_model.transcribe(audio_path)
1787
- return result["text"]
1788
 
1789
  seniority_mapping = {
1790
  "Entry-level": 1, "Junior": 2, "Mid-Level": 3, "Senior": 4, "Lead": 5
 
1697
  import json
1698
  from transformers import BarkModel, AutoProcessor
1699
  import torch, gc
1700
+ from faster_whisper import WhisperModel
1701
  import asyncio
1702
  import threading
1703
  from concurrent.futures import ThreadPoolExecutor
 
1709
  # Global variables for lazy loading
1710
  model_bark = None
1711
  processor_bark = None
1712
+ faster_whisper_model = None
1713
  bark_voice_preset = "v2/en_speaker_6"
1714
 
1715
  # Thread pool for async operations
 
1726
 
1727
  def load_models_lazy():
1728
  """Load models only when needed"""
1729
+ global model_bark, processor_bark, faster_whisper_model
1730
 
1731
  device = "cuda" if torch.cuda.is_available() else "cpu"
1732
  print(f"πŸ” Using device: {device}")
 
1740
  print("πŸ” Loading Bark processor...")
1741
  processor_bark = AutoProcessor.from_pretrained("suno/bark")
1742
  print("βœ… Bark processor loaded")
1743
+
1744
+ if faster_whisper_model is None:
1745
+ print("πŸ” Loading Faster-Whisper model...")
1746
+ compute_type = "float16" if device == "cuda" else "int8"
1747
+ faster_whisper_model = WhisperModel("base", device=device, compute_type=compute_type)
1748
+ print(f"βœ… Faster-Whisper model loaded on {device}")
1749
+
1750
 
1751
  def bark_tts_async(text):
1752
  """Fully correct async TTS generation with Bark"""
 
1775
 
1776
 
1777
  def whisper_stt(audio_path):
1778
+ """STT using Faster-Whisper"""
1779
+ if not audio_path or not os.path.exists(audio_path):
1780
  return ""
1781
+
1782
+ load_models_lazy()
1783
+ print("πŸ” Transcribing with Faster-Whisper")
1784
+
1785
+ segments, _ = faster_whisper_model.transcribe(audio_path)
1786
+ transcript = " ".join(segment.text for segment in segments)
1787
+ return transcript.strip()
1788
+
 
1789
 
1790
  seniority_mapping = {
1791
  "Entry-level": 1, "Junior": 2, "Mid-Level": 3, "Senior": 4, "Lead": 5
requirements.txt CHANGED
@@ -37,4 +37,5 @@ pip==23.3.1
37
  accelerate==0.29.3
38
  huggingface_hub==0.20.3
39
  textract==1.6.3
40
- bitsandbytes
 
 
37
  accelerate==0.29.3
38
  huggingface_hub==0.20.3
39
  textract==1.6.3
40
+ bitsandbytes
41
+ faster-whisper==0.10.0