Spaces:
Sleeping
Sleeping
Commit
·
0073001
1
Parent(s):
c9de652
- Dockerfile +3 -0
- app.py +17 -31
- requirements.txt +1 -1
Dockerfile
CHANGED
@@ -8,6 +8,9 @@ WORKDIR /app
|
|
8 |
COPY requirements.txt .
|
9 |
RUN pip install --no-cache-dir -r requirements.txt
|
10 |
|
|
|
|
|
|
|
11 |
# Copy the application code.
|
12 |
COPY . .
|
13 |
|
|
|
8 |
COPY requirements.txt .
|
9 |
RUN pip install --no-cache-dir -r requirements.txt
|
10 |
|
11 |
+
# Install additional system dependencies for soundfile
|
12 |
+
RUN apt-get update && apt-get install -y libsndfile1
|
13 |
+
|
14 |
# Copy the application code.
|
15 |
COPY . .
|
16 |
|
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
from fastapi import FastAPI, File, UploadFile, HTTPException, Form
|
2 |
from fastapi.responses import JSONResponse
|
3 |
from pydantic import BaseModel
|
4 |
-
import
|
5 |
import numpy as np
|
6 |
import tempfile
|
7 |
import os
|
@@ -12,42 +12,28 @@ warnings.filterwarnings("ignore")
|
|
12 |
app = FastAPI()
|
13 |
|
14 |
def extract_audio_features(audio_file_path):
|
15 |
-
# Load the audio file using
|
16 |
-
waveform, sample_rate =
|
17 |
-
|
18 |
-
# Ensure waveform is mono by averaging channels if necessary
|
19 |
-
if waveform.shape[0] > 1:
|
20 |
-
waveform = waveform.mean(dim=0, keepdim=True)
|
21 |
-
|
22 |
-
waveform = waveform.squeeze() # Remove channel dimension if it's 1
|
23 |
-
|
24 |
-
# Extract pitch (fundamental frequency)
|
25 |
-
pitch_frequencies, voiced_flags, _ = torchaudio.functional.detect_pitch_frequency(
|
26 |
-
waveform, sample_rate, frame_time=0.01, win_length=1024
|
27 |
-
)
|
28 |
-
f0 = pitch_frequencies[voiced_flags > 0]
|
29 |
|
30 |
-
#
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
36 |
|
37 |
-
#
|
38 |
-
|
39 |
-
speech_rate =
|
|
|
40 |
|
41 |
-
return f0
|
42 |
|
43 |
def analyze_voice_stress(audio_file_path):
|
44 |
f0, energy, speech_rate, mfccs, waveform, sample_rate = extract_audio_features(audio_file_path)
|
45 |
-
|
46 |
-
|
47 |
-
mean_f0 = np.mean(f0)
|
48 |
-
std_f0 = np.std(f0)
|
49 |
-
mean_energy = np.mean(energy)
|
50 |
-
std_energy = np.std(energy)
|
51 |
gender = 'male' if mean_f0 < 165 else 'female'
|
52 |
norm_mean_f0 = 110 if gender == 'male' else 220
|
53 |
norm_std_f0 = 20
|
|
|
1 |
from fastapi import FastAPI, File, UploadFile, HTTPException, Form
|
2 |
from fastapi.responses import JSONResponse
|
3 |
from pydantic import BaseModel
|
4 |
+
import soundfile as sf
|
5 |
import numpy as np
|
6 |
import tempfile
|
7 |
import os
|
|
|
12 |
app = FastAPI()
|
13 |
|
14 |
def extract_audio_features(audio_file_path):
|
15 |
+
# Load the audio file using soundfile
|
16 |
+
waveform, sample_rate = sf.read(audio_file_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
+
# Ensure waveform is a 1D array (mono audio)
|
19 |
+
if waveform.ndim > 1:
|
20 |
+
waveform = waveform.mean(axis=1)
|
21 |
+
|
22 |
+
# Calculate basic features (pitch estimation requires a more complex algorithm, but we'll simplify)
|
23 |
+
energy = np.mean(waveform ** 2)
|
24 |
+
mfccs = np.mean(np.abs(np.fft.fft(waveform)[:13]), axis=0) # Simplified MFCC-like features
|
25 |
|
26 |
+
# Placeholder for speech rate and fundamental frequency
|
27 |
+
# Speech rate and pitch extraction would require more complex DSP techniques or external libraries.
|
28 |
+
speech_rate = 4.0 # Arbitrary placeholder value for speech rate
|
29 |
+
f0 = np.mean(np.abs(np.diff(waveform))) * sample_rate / (2 * np.pi) # Rough pitch estimate
|
30 |
|
31 |
+
return f0, energy, speech_rate, mfccs, waveform, sample_rate
|
32 |
|
33 |
def analyze_voice_stress(audio_file_path):
|
34 |
f0, energy, speech_rate, mfccs, waveform, sample_rate = extract_audio_features(audio_file_path)
|
35 |
+
mean_f0 = f0
|
36 |
+
mean_energy = energy
|
|
|
|
|
|
|
|
|
37 |
gender = 'male' if mean_f0 < 165 else 'female'
|
38 |
norm_mean_f0 = 110 if gender == 'male' else 220
|
39 |
norm_std_f0 = 20
|
requirements.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
fastapi
|
2 |
uvicorn
|
3 |
-
|
4 |
numpy
|
5 |
pydantic
|
6 |
python-multipart
|
|
|
1 |
fastapi
|
2 |
uvicorn
|
3 |
+
soundfile
|
4 |
numpy
|
5 |
pydantic
|
6 |
python-multipart
|