husseinelsaadi commited on
Commit
35f6a81
Β·
verified Β·
1 Parent(s): 6d2cf08

Update app.py to use GPU for faster processing

Browse files
Files changed (1) hide show
  1. app.py +34 -14
app.py CHANGED
@@ -106,6 +106,7 @@ class ChatGroq:
106
  except requests.exceptions.RequestException as e:
107
  logging.error(f"ChatGroq API error: {e}")
108
  return "Interviewer: Due to a system issue, let's move on to another question."
 
109
  groq_llm = ChatGroq(
110
  temperature=0.7,
111
  model_name="llama-3.3-70b-versatile",
@@ -122,26 +123,34 @@ if HF_TOKEN:
122
  else:
123
  raise EnvironmentError("Missing HF_TOKEN environment variable.")
124
 
125
-
126
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
127
  import torch
 
 
 
 
128
 
129
- #MODEL_PATH = "mistralai/Mistral-7B-Instruct-v0.3"
130
- MODEL_PATH = "tiiuae/falcon-rw-1b"
 
 
 
 
131
 
132
- tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
133
 
134
- model = AutoModelForCausalLM.from_pretrained(
135
  MODEL_PATH,
136
- torch_dtype=torch.bfloat16 if torch.backends.mps.is_available() else torch.float32,
137
  device_map="auto",
138
- offload_folder="offload"
139
  )
140
 
141
- falcon_pipeline = pipeline(
142
  "text-generation",
143
- model=model,
144
- tokenizer=tokenizer,
145
  max_new_tokens=128,
146
  temperature=0.3,
147
  top_p=0.9,
@@ -149,9 +158,6 @@ falcon_pipeline = pipeline(
149
  repetition_penalty=1.1,
150
  )
151
 
152
- # βœ… Test it
153
- # result = falcon_pipeline("Explain LLMs:")
154
- # print(result[0]["generated_text"])
155
 
156
  # embedding model
157
  from sentence_transformers import SentenceTransformer
@@ -1455,6 +1461,10 @@ import whisper
1455
  from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification
1456
  import librosa
1457
 
 
 
 
 
1458
  # Bark TTS
1459
  print("πŸ” Loading Bark model...")
1460
  model_bark = BarkModel.from_pretrained("suno/bark")
@@ -1481,7 +1491,7 @@ def bark_tts(text):
1481
 
1482
  # Whisper STT
1483
  print("πŸ” Loading Whisper model...")
1484
- whisper_model = whisper.load_model("base")
1485
  print("βœ… Whisper model loaded")
1486
  def whisper_stt(audio_path):
1487
  if not audio_path or not os.path.exists(audio_path): return ""
@@ -1543,6 +1553,7 @@ local_wav2vec_model_path = "HaniaRuby/speech-emotion-recognition-wav2vec2" # Loc
1543
  print("πŸ” Loading Wav2Vec processor and model...")
1544
  wav2vec_processor = Wav2Vec2Processor.from_pretrained(local_wav2vec_model_path)
1545
  wav2vec_model = Wav2Vec2ForSequenceClassification.from_pretrained(local_wav2vec_model_path)
 
1546
  print("βœ… Wav2Vec model loaded")
1547
  wav2vec_model.eval()
1548
  voice_label_map = {
@@ -1555,14 +1566,23 @@ voice_label_map = {
1555
  def analyze_audio_emotion(audio_path):
1556
  print(f"πŸ” Analyzing audio emotion for: {audio_path}")
1557
  if not audio_path or not os.path.exists(audio_path): return "neutral"
 
1558
  speech, sr = librosa.load(audio_path, sr=16000)
1559
  inputs = wav2vec_processor(speech, sampling_rate=16000, return_tensors="pt")
 
 
 
 
 
 
1560
  with torch.no_grad():
1561
  logits = wav2vec_model(**inputs).logits
 
1562
  probs = torch.nn.functional.softmax(logits, dim=-1)
1563
  predicted_id = torch.argmax(probs, dim=-1).item()
1564
  return voice_label_map.get(predicted_id, "neutral")
1565
 
 
1566
  # --- Effective confidence calculation
1567
  def interpret_confidence(voice_label, face_label, answer_score_label, k=0.2):
1568
  emotion_map = {"happy": 0.9, "neutral": 0.6, "surprised": 0.7, "sad": 0.4, "angry": 0.3, "disgust": 0.2, "fear": 0.3, "no_face": 0.5, "unknown": 0.5}
 
106
  except requests.exceptions.RequestException as e:
107
  logging.error(f"ChatGroq API error: {e}")
108
  return "Interviewer: Due to a system issue, let's move on to another question."
109
+
110
  groq_llm = ChatGroq(
111
  temperature=0.7,
112
  model_name="llama-3.3-70b-versatile",
 
123
  else:
124
  raise EnvironmentError("Missing HF_TOKEN environment variable.")
125
 
126
+ #Load mistral Model
127
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
128
  import torch
129
+ print(torch.cuda.is_available())
130
+
131
+ MODEL_PATH = "mistralai/Mistral-7B-Instruct-v0.3"
132
+ #MODEL_PATH = "tiiuae/falcon-rw-1b"
133
 
134
+ bnb_config = BitsAndBytesConfig(
135
+ load_in_4bit=True,
136
+ bnb_4bit_compute_dtype=torch.float16,
137
+ bnb_4bit_use_double_quant=True,
138
+ bnb_4bit_quant_type="nf4"
139
+ )
140
 
141
+ mistral_tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH,use_auth_token=True)
142
 
143
+ judge_llm = AutoModelForCausalLM.from_pretrained(
144
  MODEL_PATH,
145
+ quantization_config=bnb_config,torch_dtype=torch.float16,
146
  device_map="auto",
147
+ use_auth_token=True
148
  )
149
 
150
+ judge_pipeline = pipeline(
151
  "text-generation",
152
+ model=judge_llm,
153
+ tokenizer=mistral_tokenizer,
154
  max_new_tokens=128,
155
  temperature=0.3,
156
  top_p=0.9,
 
158
  repetition_penalty=1.1,
159
  )
160
 
 
 
 
161
 
162
  # embedding model
163
  from sentence_transformers import SentenceTransformer
 
1461
  from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification
1462
  import librosa
1463
 
1464
+ import torch
1465
+ print(torch.cuda.is_available()) # βœ… Tells you if GPU is available
1466
+
1467
+
1468
  # Bark TTS
1469
  print("πŸ” Loading Bark model...")
1470
  model_bark = BarkModel.from_pretrained("suno/bark")
 
1491
 
1492
  # Whisper STT
1493
  print("πŸ” Loading Whisper model...")
1494
+ whisper_model = whisper.load_model("base", device="cuda")
1495
  print("βœ… Whisper model loaded")
1496
  def whisper_stt(audio_path):
1497
  if not audio_path or not os.path.exists(audio_path): return ""
 
1553
  print("πŸ” Loading Wav2Vec processor and model...")
1554
  wav2vec_processor = Wav2Vec2Processor.from_pretrained(local_wav2vec_model_path)
1555
  wav2vec_model = Wav2Vec2ForSequenceClassification.from_pretrained(local_wav2vec_model_path)
1556
+ wav2vec_model = wav2vec_model.to("cuda" if torch.cuda.is_available() else "cpu")
1557
  print("βœ… Wav2Vec model loaded")
1558
  wav2vec_model.eval()
1559
  voice_label_map = {
 
1566
  def analyze_audio_emotion(audio_path):
1567
  print(f"πŸ” Analyzing audio emotion for: {audio_path}")
1568
  if not audio_path or not os.path.exists(audio_path): return "neutral"
1569
+
1570
  speech, sr = librosa.load(audio_path, sr=16000)
1571
  inputs = wav2vec_processor(speech, sampling_rate=16000, return_tensors="pt")
1572
+
1573
+ # πŸ”₯ Move model and inputs to GPU
1574
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
1575
+ wav2vec_model.to(device)
1576
+ inputs = {k: v.to(device) for k, v in inputs.items()}
1577
+
1578
  with torch.no_grad():
1579
  logits = wav2vec_model(**inputs).logits
1580
+
1581
  probs = torch.nn.functional.softmax(logits, dim=-1)
1582
  predicted_id = torch.argmax(probs, dim=-1).item()
1583
  return voice_label_map.get(predicted_id, "neutral")
1584
 
1585
+
1586
  # --- Effective confidence calculation
1587
  def interpret_confidence(voice_label, face_label, answer_score_label, k=0.2):
1588
  emotion_map = {"happy": 0.9, "neutral": 0.6, "surprised": 0.7, "sad": 0.4, "angry": 0.3, "disgust": 0.2, "fear": 0.3, "no_face": 0.5, "unknown": 0.5}