Spaces:

batra43pvd
/

bird-classifier

Sleeping

batra43pvd commited on 6 days ago

Commit

e46d13d

verified ·

1 Parent(s): c6a1de6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -116,22 +116,29 @@ def process_audio_file(file_path):
         # 1. Dùng pydub để mở file audio (hỗ trợ nhiều định dạng)
         audio = AudioSegment.from_file(file_path)
-        # 2. Đảm bảo audio là mono (1 kênh) và có sample rate đúng
         audio = audio.set_channels(1)
         audio = audio.set_frame_rate(SAMPLE_RATE)
-        # 3. Chuyển đổi audio của pydub thành mảng NumPy cho librosa
         # Chuẩn hóa về khoảng [-1, 1]
         samples = np.array(audio.get_array_of_samples()).astype(np.float32)
         y = samples / (2**(audio.sample_width * 8 - 1))
-        # 4. Chuẩn hóa độ dài audio về MAX_SAMPLES
         if len(y) > MAX_SAMPLES:
             y = y[:MAX_SAMPLES]
         else:
             y = np.pad(y, (0, MAX_SAMPLES - len(y)), mode='constant')
-        # 5. Trích xuất đồng thời các bộ đặc trưng (code này không đổi)
         traditional_features = _extract_traditional_features(y, SAMPLE_RATE)
         wav2vec_features = _extract_wav2vec_features(y, SAMPLE_RATE)
         spectrogram = _create_spectrogram_image(y, SAMPLE_RATE)

         # 1. Dùng pydub để mở file audio (hỗ trợ nhiều định dạng)
         audio = AudioSegment.from_file(file_path)
+        # 2. **BƯỚC MỚI: CHUẨN HÓA ÂM LƯỢNG**
+        # Chuẩn hóa âm lượng về một mức tiêu chuẩn (-20 dBFS).
+        # Điều này giúp giảm sự khác biệt về âm lượng giữa các bản ghi.
+        target_dbfs = -20.0
+        change_in_dbfs = target_dbfs - audio.dBFS
+        audio = audio.apply_gain(change_in_dbfs)
+        # 3. Đảm bảo audio là mono (1 kênh) và có sample rate đúng
         audio = audio.set_channels(1)
         audio = audio.set_frame_rate(SAMPLE_RATE)
+        # 4. Chuyển đổi audio của pydub thành mảng NumPy cho librosa
         # Chuẩn hóa về khoảng [-1, 1]
         samples = np.array(audio.get_array_of_samples()).astype(np.float32)
         y = samples / (2**(audio.sample_width * 8 - 1))
+        # 5. Chuẩn hóa độ dài audio về MAX_SAMPLES
         if len(y) > MAX_SAMPLES:
             y = y[:MAX_SAMPLES]
         else:
             y = np.pad(y, (0, MAX_SAMPLES - len(y)), mode='constant')
+        # 6. Trích xuất đồng thời các bộ đặc trưng (code này không đổi)
         traditional_features = _extract_traditional_features(y, SAMPLE_RATE)
         wav2vec_features = _extract_wav2vec_features(y, SAMPLE_RATE)
         spectrogram = _create_spectrogram_image(y, SAMPLE_RATE)