Spaces:

husseinelsaadi
/

ai-interviewer-demo

Paused

App Files Files Community

husseinelsaadi commited on Jul 15

Commit

35f6a81

verified ·

1 Parent(s): 6d2cf08

Update app.py to use GPU for faster processing

Browse files

Files changed (1) hide show

app.py +34 -14

app.py CHANGED Viewed

@@ -106,6 +106,7 @@ class ChatGroq:
         except requests.exceptions.RequestException as e:
             logging.error(f"ChatGroq API error: {e}")
             return "Interviewer: Due to a system issue, let's move on to another question."
 groq_llm = ChatGroq(
     temperature=0.7,
     model_name="llama-3.3-70b-versatile",
@@ -122,26 +123,34 @@ if HF_TOKEN:
 else:
     raise EnvironmentError("Missing HF_TOKEN environment variable.")
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
-#MODEL_PATH = "mistralai/Mistral-7B-Instruct-v0.3"
-MODEL_PATH = "tiiuae/falcon-rw-1b"
-tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
-model = AutoModelForCausalLM.from_pretrained(
     MODEL_PATH,
-    torch_dtype=torch.bfloat16 if torch.backends.mps.is_available() else torch.float32,
     device_map="auto",
-    offload_folder="offload"
 )
-falcon_pipeline = pipeline(
     "text-generation",
-    model=model,
-    tokenizer=tokenizer,
     max_new_tokens=128,
     temperature=0.3,
     top_p=0.9,
@@ -149,9 +158,6 @@ falcon_pipeline = pipeline(
     repetition_penalty=1.1,
 )
-# ✅ Test it
-# result = falcon_pipeline("Explain LLMs:")
-# print(result[0]["generated_text"])
 # embedding model
 from sentence_transformers import SentenceTransformer
@@ -1455,6 +1461,10 @@ import whisper
 from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification
 import librosa
 # Bark TTS
 print("🔁 Loading Bark model...")
 model_bark = BarkModel.from_pretrained("suno/bark")
@@ -1481,7 +1491,7 @@ def bark_tts(text):
 # Whisper STT
 print("🔁 Loading Whisper model...")
-whisper_model = whisper.load_model("base")
 print("✅ Whisper model loaded")
 def whisper_stt(audio_path):
     if not audio_path or not os.path.exists(audio_path): return ""
@@ -1543,6 +1553,7 @@ local_wav2vec_model_path = "HaniaRuby/speech-emotion-recognition-wav2vec2" # Loc
 print("🔁 Loading Wav2Vec processor and model...")
 wav2vec_processor = Wav2Vec2Processor.from_pretrained(local_wav2vec_model_path)
 wav2vec_model = Wav2Vec2ForSequenceClassification.from_pretrained(local_wav2vec_model_path)
 print("✅ Wav2Vec model loaded")
 wav2vec_model.eval()
 voice_label_map = {
@@ -1555,14 +1566,23 @@ voice_label_map = {
 def analyze_audio_emotion(audio_path):
     print(f"🔁 Analyzing audio emotion for: {audio_path}")
     if not audio_path or not os.path.exists(audio_path): return "neutral"
     speech, sr = librosa.load(audio_path, sr=16000)
     inputs = wav2vec_processor(speech, sampling_rate=16000, return_tensors="pt")
     with torch.no_grad():
         logits = wav2vec_model(**inputs).logits
     probs = torch.nn.functional.softmax(logits, dim=-1)
     predicted_id = torch.argmax(probs, dim=-1).item()
     return voice_label_map.get(predicted_id, "neutral")
 # --- Effective confidence calculation
 def interpret_confidence(voice_label, face_label, answer_score_label, k=0.2):
     emotion_map = {"happy": 0.9, "neutral": 0.6, "surprised": 0.7, "sad": 0.4, "angry": 0.3, "disgust": 0.2, "fear": 0.3, "no_face": 0.5, "unknown": 0.5}

         except requests.exceptions.RequestException as e:
             logging.error(f"ChatGroq API error: {e}")
             return "Interviewer: Due to a system issue, let's move on to another question."
 groq_llm = ChatGroq(
     temperature=0.7,
     model_name="llama-3.3-70b-versatile",
 else:
     raise EnvironmentError("Missing HF_TOKEN environment variable.")
+#Load mistral Model
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
+print(torch.cuda.is_available())
+MODEL_PATH = "mistralai/Mistral-7B-Instruct-v0.3"
+#MODEL_PATH = "tiiuae/falcon-rw-1b"
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_compute_dtype=torch.float16,
+    bnb_4bit_use_double_quant=True,
+    bnb_4bit_quant_type="nf4"
+)
+mistral_tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH,use_auth_token=True)
+judge_llm = AutoModelForCausalLM.from_pretrained(
     MODEL_PATH,
+    quantization_config=bnb_config,torch_dtype=torch.float16,
     device_map="auto",
+    use_auth_token=True
 )
+judge_pipeline = pipeline(
     "text-generation",
+    model=judge_llm,
+    tokenizer=mistral_tokenizer,
     max_new_tokens=128,
     temperature=0.3,
     top_p=0.9,
     repetition_penalty=1.1,
 )
 # embedding model
 from sentence_transformers import SentenceTransformer
 from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification
 import librosa
+import torch
+print(torch.cuda.is_available())  # ✅ Tells you if GPU is available
 # Bark TTS
 print("🔁 Loading Bark model...")
 model_bark = BarkModel.from_pretrained("suno/bark")
 # Whisper STT
 print("🔁 Loading Whisper model...")
+whisper_model = whisper.load_model("base", device="cuda")
 print("✅ Whisper model loaded")
 def whisper_stt(audio_path):
     if not audio_path or not os.path.exists(audio_path): return ""
 print("🔁 Loading Wav2Vec processor and model...")
 wav2vec_processor = Wav2Vec2Processor.from_pretrained(local_wav2vec_model_path)
 wav2vec_model = Wav2Vec2ForSequenceClassification.from_pretrained(local_wav2vec_model_path)
+wav2vec_model = wav2vec_model.to("cuda" if torch.cuda.is_available() else "cpu")
 print("✅ Wav2Vec model loaded")
 wav2vec_model.eval()
 voice_label_map = {
 def analyze_audio_emotion(audio_path):
     print(f"🔁 Analyzing audio emotion for: {audio_path}")
     if not audio_path or not os.path.exists(audio_path): return "neutral"
     speech, sr = librosa.load(audio_path, sr=16000)
     inputs = wav2vec_processor(speech, sampling_rate=16000, return_tensors="pt")
+    # 🔥 Move model and inputs to GPU
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    wav2vec_model.to(device)
+    inputs = {k: v.to(device) for k, v in inputs.items()}
     with torch.no_grad():
         logits = wav2vec_model(**inputs).logits
     probs = torch.nn.functional.softmax(logits, dim=-1)
     predicted_id = torch.argmax(probs, dim=-1).item()
     return voice_label_map.get(predicted_id, "neutral")
 # --- Effective confidence calculation
 def interpret_confidence(voice_label, face_label, answer_score_label, k=0.2):
     emotion_map = {"happy": 0.9, "neutral": 0.6, "surprised": 0.7, "sad": 0.4, "angry": 0.3, "disgust": 0.2, "fear": 0.3, "no_face": 0.5, "unknown": 0.5}