mulasagg commited on
Commit
df039e9
·
1 Parent(s): 9740f3b

add sank and emo

Browse files
app.py CHANGED
@@ -18,6 +18,7 @@ from vps.vps_api import main as analyze_vps_main
18
  from ves.ves import calc_voice_engagement_score
19
  from transcribe import transcribe_audio
20
  from filler_count.filler_score import analyze_fillers
 
21
 
22
  app = FastAPI()
23
 
@@ -358,6 +359,9 @@ async def analyze_all(file: UploadFile, language: str = Form(...)):
358
  ves_result = calc_voice_engagement_score(temp_filepath)
359
  filler_count = analyze_fillers(temp_filepath) # Assuming this function returns a dict with filler count
360
  transcript = transcribe_audio(temp_filepath, language, "base") #fix this
 
 
 
361
 
362
  # Combine results into a single response
363
  combined_result = {
@@ -369,7 +373,9 @@ async def analyze_all(file: UploadFile, language: str = Form(...)):
369
  "vps": vps_result,
370
  "ves": ves_result,
371
  "filler_words": filler_count,
372
- "transcript": transcript
 
 
373
  }
374
 
375
  return JSONResponse(content=combined_result)
 
18
  from ves.ves import calc_voice_engagement_score
19
  from transcribe import transcribe_audio
20
  from filler_count.filler_score import analyze_fillers
21
+ from emotion.emo_predict import predict_emotion
22
 
23
  app = FastAPI()
24
 
 
359
  ves_result = calc_voice_engagement_score(temp_filepath)
360
  filler_count = analyze_fillers(temp_filepath) # Assuming this function returns a dict with filler count
361
  transcript = transcribe_audio(temp_filepath, language, "base") #fix this
362
+ emotion = predict_emotion(temp_filepath)
363
+ avg_score = (fluency_result['fluency_score'] + tone_result['speech_dynamism_score'] + vcs_result['Voice Clarity Sore'] + vers_result['VERS Score'] + voice_confidence_result['voice_confidence_score'] + vps_result['VPS'] + ves_result['ves']) / 7
364
+
365
 
366
  # Combine results into a single response
367
  combined_result = {
 
373
  "vps": vps_result,
374
  "ves": ves_result,
375
  "filler_words": filler_count,
376
+ "transcript": transcript,
377
+ "emotion": emotion ,
378
+ "sank_score": avg_score
379
  }
380
 
381
  return JSONResponse(content=combined_result)
emotion/__init__.py ADDED
File without changes
emotion/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (133 Bytes). View file
 
emotion/__pycache__/emo_predict.cpython-312.pyc ADDED
Binary file (1.63 kB). View file
 
emotion/emo_predict.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2ForSequenceClassification
2
+ import librosa
3
+ import torch
4
+
5
+ # Load the feature extractor and model
6
+ feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("r-f/wav2vec-english-speech-emotion-recognition")
7
+ model = Wav2Vec2ForSequenceClassification.from_pretrained("r-f/wav2vec-english-speech-emotion-recognition")
8
+ model.eval()
9
+
10
+ def predict_emotion(audio_path):
11
+ # Load audio (mono, 16kHz)
12
+ audio, rate = librosa.load(audio_path, sr=16000)
13
+
14
+ # Extract features
15
+ inputs = feature_extractor(audio, sampling_rate=rate, return_tensors="pt", padding=True)
16
+
17
+ # Predict emotion
18
+ with torch.no_grad():
19
+ outputs = model(**inputs)
20
+ probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
21
+ pred_id = torch.argmax(probs, dim=-1).item()
22
+ emotion = model.config.id2label[pred_id]
23
+
24
+ return emotion
25
+
26
+ # # Example usage
27
+ # emotion = predict_emotion(r"D:\Intern\shankh\audio_samples\anga.wav")
28
+ # print(f"Predicted Emotion: {emotion}")
requirements.txt CHANGED
@@ -13,7 +13,8 @@ scipy
13
  openai-whisper==20240930
14
  spacy==3.8.5
15
  en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
16
-
 
17
 
18
 
19
  numpy
 
13
  openai-whisper==20240930
14
  spacy==3.8.5
15
  en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
16
+ transformers
17
+ torch
18
 
19
 
20
  numpy