add sank and emo
Browse files- app.py +7 -1
- emotion/__init__.py +0 -0
- emotion/__pycache__/__init__.cpython-312.pyc +0 -0
- emotion/__pycache__/emo_predict.cpython-312.pyc +0 -0
- emotion/emo_predict.py +28 -0
- requirements.txt +2 -1
app.py
CHANGED
@@ -18,6 +18,7 @@ from vps.vps_api import main as analyze_vps_main
|
|
18 |
from ves.ves import calc_voice_engagement_score
|
19 |
from transcribe import transcribe_audio
|
20 |
from filler_count.filler_score import analyze_fillers
|
|
|
21 |
|
22 |
app = FastAPI()
|
23 |
|
@@ -358,6 +359,9 @@ async def analyze_all(file: UploadFile, language: str = Form(...)):
|
|
358 |
ves_result = calc_voice_engagement_score(temp_filepath)
|
359 |
filler_count = analyze_fillers(temp_filepath) # Assuming this function returns a dict with filler count
|
360 |
transcript = transcribe_audio(temp_filepath, language, "base") #fix this
|
|
|
|
|
|
|
361 |
|
362 |
# Combine results into a single response
|
363 |
combined_result = {
|
@@ -369,7 +373,9 @@ async def analyze_all(file: UploadFile, language: str = Form(...)):
|
|
369 |
"vps": vps_result,
|
370 |
"ves": ves_result,
|
371 |
"filler_words": filler_count,
|
372 |
-
"transcript": transcript
|
|
|
|
|
373 |
}
|
374 |
|
375 |
return JSONResponse(content=combined_result)
|
|
|
18 |
from ves.ves import calc_voice_engagement_score
|
19 |
from transcribe import transcribe_audio
|
20 |
from filler_count.filler_score import analyze_fillers
|
21 |
+
from emotion.emo_predict import predict_emotion
|
22 |
|
23 |
app = FastAPI()
|
24 |
|
|
|
359 |
ves_result = calc_voice_engagement_score(temp_filepath)
|
360 |
filler_count = analyze_fillers(temp_filepath) # Assuming this function returns a dict with filler count
|
361 |
transcript = transcribe_audio(temp_filepath, language, "base") #fix this
|
362 |
+
emotion = predict_emotion(temp_filepath)
|
363 |
+
avg_score = (fluency_result['fluency_score'] + tone_result['speech_dynamism_score'] + vcs_result['Voice Clarity Sore'] + vers_result['VERS Score'] + voice_confidence_result['voice_confidence_score'] + vps_result['VPS'] + ves_result['ves']) / 7
|
364 |
+
|
365 |
|
366 |
# Combine results into a single response
|
367 |
combined_result = {
|
|
|
373 |
"vps": vps_result,
|
374 |
"ves": ves_result,
|
375 |
"filler_words": filler_count,
|
376 |
+
"transcript": transcript,
|
377 |
+
"emotion": emotion ,
|
378 |
+
"sank_score": avg_score
|
379 |
}
|
380 |
|
381 |
return JSONResponse(content=combined_result)
|
emotion/__init__.py
ADDED
File without changes
|
emotion/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (133 Bytes). View file
|
|
emotion/__pycache__/emo_predict.cpython-312.pyc
ADDED
Binary file (1.63 kB). View file
|
|
emotion/emo_predict.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2ForSequenceClassification
|
2 |
+
import librosa
|
3 |
+
import torch
|
4 |
+
|
5 |
+
# Load the feature extractor and model
|
6 |
+
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("r-f/wav2vec-english-speech-emotion-recognition")
|
7 |
+
model = Wav2Vec2ForSequenceClassification.from_pretrained("r-f/wav2vec-english-speech-emotion-recognition")
|
8 |
+
model.eval()
|
9 |
+
|
10 |
+
def predict_emotion(audio_path):
|
11 |
+
# Load audio (mono, 16kHz)
|
12 |
+
audio, rate = librosa.load(audio_path, sr=16000)
|
13 |
+
|
14 |
+
# Extract features
|
15 |
+
inputs = feature_extractor(audio, sampling_rate=rate, return_tensors="pt", padding=True)
|
16 |
+
|
17 |
+
# Predict emotion
|
18 |
+
with torch.no_grad():
|
19 |
+
outputs = model(**inputs)
|
20 |
+
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
|
21 |
+
pred_id = torch.argmax(probs, dim=-1).item()
|
22 |
+
emotion = model.config.id2label[pred_id]
|
23 |
+
|
24 |
+
return emotion
|
25 |
+
|
26 |
+
# # Example usage
|
27 |
+
# emotion = predict_emotion(r"D:\Intern\shankh\audio_samples\anga.wav")
|
28 |
+
# print(f"Predicted Emotion: {emotion}")
|
requirements.txt
CHANGED
@@ -13,7 +13,8 @@ scipy
|
|
13 |
openai-whisper==20240930
|
14 |
spacy==3.8.5
|
15 |
en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
|
16 |
-
|
|
|
17 |
|
18 |
|
19 |
numpy
|
|
|
13 |
openai-whisper==20240930
|
14 |
spacy==3.8.5
|
15 |
en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
|
16 |
+
transformers
|
17 |
+
torch
|
18 |
|
19 |
|
20 |
numpy
|