Spaces:

cruvss
/

Fast_api

Running

mulasagg commited on May 20

Commit

df039e9

1 Parent(s): 9740f3b

add sank and emo

Files changed (6) hide show

app.py CHANGED Viewed

@@ -18,6 +18,7 @@ from vps.vps_api import main as analyze_vps_main
 from ves.ves import calc_voice_engagement_score
 from transcribe import transcribe_audio
 from filler_count.filler_score import analyze_fillers
 app = FastAPI()
@@ -358,6 +359,9 @@ async def analyze_all(file: UploadFile, language: str = Form(...)):
         ves_result = calc_voice_engagement_score(temp_filepath)
         filler_count = analyze_fillers(temp_filepath)  # Assuming this function returns a dict with filler count
         transcript = transcribe_audio(temp_filepath, language, "base") #fix this
         # Combine results into a single response
         combined_result = {
@@ -369,7 +373,9 @@ async def analyze_all(file: UploadFile, language: str = Form(...)):
             "vps": vps_result,
             "ves": ves_result,
             "filler_words": filler_count,
-            "transcript": transcript
         }
         return JSONResponse(content=combined_result)

 from ves.ves import calc_voice_engagement_score
 from transcribe import transcribe_audio
 from filler_count.filler_score import analyze_fillers
+from emotion.emo_predict import predict_emotion
 app = FastAPI()
         ves_result = calc_voice_engagement_score(temp_filepath)
         filler_count = analyze_fillers(temp_filepath)  # Assuming this function returns a dict with filler count
         transcript = transcribe_audio(temp_filepath, language, "base") #fix this
+        emotion = predict_emotion(temp_filepath)
+        avg_score = (fluency_result['fluency_score'] + tone_result['speech_dynamism_score'] + vcs_result['Voice Clarity Sore'] + vers_result['VERS Score'] + voice_confidence_result['voice_confidence_score'] + vps_result['VPS'] + ves_result['ves']) / 7
         # Combine results into a single response
         combined_result = {
             "vps": vps_result,
             "ves": ves_result,
             "filler_words": filler_count,
+            "transcript": transcript,
+            "emotion": emotion ,
+            "sank_score": avg_score
         }
         return JSONResponse(content=combined_result)

emotion/__init__.py ADDED Viewed

File without changes

emotion/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (133 Bytes). View file

emotion/__pycache__/emo_predict.cpython-312.pyc ADDED Viewed

Binary file (1.63 kB). View file

emotion/emo_predict.py ADDED Viewed

+from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2ForSequenceClassification
+import librosa
+import torch
+# Load the feature extractor and model
+feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("r-f/wav2vec-english-speech-emotion-recognition")
+model = Wav2Vec2ForSequenceClassification.from_pretrained("r-f/wav2vec-english-speech-emotion-recognition")
+model.eval()
+def predict_emotion(audio_path):
+    # Load audio (mono, 16kHz)
+    audio, rate = librosa.load(audio_path, sr=16000)
+    # Extract features
+    inputs = feature_extractor(audio, sampling_rate=rate, return_tensors="pt", padding=True)
+    # Predict emotion
+    with torch.no_grad():
+        outputs = model(**inputs)
+        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
+        pred_id = torch.argmax(probs, dim=-1).item()
+        emotion = model.config.id2label[pred_id]
+    return emotion
+# # Example usage
+# emotion = predict_emotion(r"D:\Intern\shankh\audio_samples\anga.wav")
+# print(f"Predicted Emotion: {emotion}")

requirements.txt CHANGED Viewed

@@ -13,7 +13,8 @@ scipy
 openai-whisper==20240930
 spacy==3.8.5
 en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
 numpy

 openai-whisper==20240930
 spacy==3.8.5
 en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
+transformers
+torch
 numpy