Spaces:

vitorcalvi
/

aig2

Sleeping

App Files Files Community

vitorcalvi commited on Nov 13, 2024

Commit

0073001

1 Parent(s): c9de652

1

Browse files

Files changed (3) hide show

Dockerfile +3 -0
app.py +17 -31
requirements.txt +1 -1

Dockerfile CHANGED Viewed

@@ -8,6 +8,9 @@ WORKDIR /app
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 # Copy the application code.
 COPY . .

 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
+# Install additional system dependencies for soundfile
+RUN apt-get update && apt-get install -y libsndfile1
 # Copy the application code.
 COPY . .

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from fastapi import FastAPI, File, UploadFile, HTTPException, Form
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
-import torchaudio
 import numpy as np
 import tempfile
 import os
@@ -12,42 +12,28 @@ warnings.filterwarnings("ignore")
 app = FastAPI()
 def extract_audio_features(audio_file_path):
-    # Load the audio file using torchaudio
-    waveform, sample_rate = torchaudio.load(audio_file_path)
-    # Ensure waveform is mono by averaging channels if necessary
-    if waveform.shape[0] > 1:
-        waveform = waveform.mean(dim=0, keepdim=True)
-    waveform = waveform.squeeze()  # Remove channel dimension if it's 1
-    # Extract pitch (fundamental frequency)
-    pitch_frequencies, voiced_flags, _ = torchaudio.functional.detect_pitch_frequency(
-        waveform, sample_rate, frame_time=0.01, win_length=1024
-    )
-    f0 = pitch_frequencies[voiced_flags > 0]
-    # Extract energy
-    energy = waveform.pow(2).numpy()
-    # Extract MFCCs
-    mfcc_transform = torchaudio.transforms.MFCC(sample_rate=sample_rate, n_mfcc=13)
-    mfccs = mfcc_transform(waveform.unsqueeze(0)).squeeze(0).numpy()
-    # Estimate speech rate (simplified)
-    tempo = torchaudio.functional.estimate_tempo(waveform, sample_rate)
-    speech_rate = tempo / 60 if tempo is not None else 0
-    return f0.numpy(), energy, speech_rate, mfccs, waveform.numpy(), sample_rate
 def analyze_voice_stress(audio_file_path):
     f0, energy, speech_rate, mfccs, waveform, sample_rate = extract_audio_features(audio_file_path)
-    if len(f0) == 0:
-        raise ValueError("Could not extract fundamental frequency from the audio.")
-    mean_f0 = np.mean(f0)
-    std_f0 = np.std(f0)
-    mean_energy = np.mean(energy)
-    std_energy = np.std(energy)
     gender = 'male' if mean_f0 < 165 else 'female'
     norm_mean_f0 = 110 if gender == 'male' else 220
     norm_std_f0 = 20

 from fastapi import FastAPI, File, UploadFile, HTTPException, Form
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
+import soundfile as sf
 import numpy as np
 import tempfile
 import os
 app = FastAPI()
 def extract_audio_features(audio_file_path):
+    # Load the audio file using soundfile
+    waveform, sample_rate = sf.read(audio_file_path)
+    # Ensure waveform is a 1D array (mono audio)
+    if waveform.ndim > 1:
+        waveform = waveform.mean(axis=1)
+    # Calculate basic features (pitch estimation requires a more complex algorithm, but we'll simplify)
+    energy = np.mean(waveform ** 2)
+    mfccs = np.mean(np.abs(np.fft.fft(waveform)[:13]), axis=0)  # Simplified MFCC-like features
+    # Placeholder for speech rate and fundamental frequency
+    # Speech rate and pitch extraction would require more complex DSP techniques or external libraries.
+    speech_rate = 4.0  # Arbitrary placeholder value for speech rate
+    f0 = np.mean(np.abs(np.diff(waveform))) * sample_rate / (2 * np.pi)  # Rough pitch estimate
+    return f0, energy, speech_rate, mfccs, waveform, sample_rate
 def analyze_voice_stress(audio_file_path):
     f0, energy, speech_rate, mfccs, waveform, sample_rate = extract_audio_features(audio_file_path)
+    mean_f0 = f0
+    mean_energy = energy
     gender = 'male' if mean_f0 < 165 else 'female'
     norm_mean_f0 = 110 if gender == 'male' else 220
     norm_std_f0 = 20

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
 fastapi
 uvicorn
-torchaudio
 numpy
 pydantic
 python-multipart

 fastapi
 uvicorn
+soundfile
 numpy
 pydantic
 python-multipart