pierre-loic commited on
Commit
b24406e
·
1 Parent(s): 42b7ac6

update content with the text model from Pierre-Loic repository https://huggingface.co/spaces/pierre-loic/submission

Browse files
Files changed (2) hide show
  1. model_audio.pkl +3 -0
  2. tasks/audio.py +26 -14
model_audio.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffe51d1bdc42eab349629b4e178180c9f345509b333eaa05d3e3ff29d71efeb3
3
+ size 6345
tasks/audio.py CHANGED
@@ -1,3 +1,7 @@
 
 
 
 
1
  from fastapi import APIRouter
2
  from datetime import datetime
3
  from datasets import load_dataset
@@ -6,14 +10,14 @@ import random
6
  import os
7
 
8
  from .utils.evaluation import AudioEvaluationRequest
9
- from .utils.emissions import get_tracker, clean_emissions_data, get_space_info
10
 
11
  from dotenv import load_dotenv
12
  load_dotenv()
13
 
14
  router = APIRouter()
15
 
16
- DESCRIPTION = "Random Baseline"
17
  ROUTE = "/audio"
18
 
19
 
@@ -24,9 +28,7 @@ async def evaluate_audio(request: AudioEvaluationRequest):
24
  """
25
  Evaluate audio classification for rainforest sound detection.
26
 
27
- Current Model: Random Baseline
28
- - Makes random predictions from the label space (0-1)
29
- - Used as a baseline for comparison
30
  """
31
  # Get space info
32
  username, space_url = get_space_info()
@@ -45,19 +47,28 @@ async def evaluate_audio(request: AudioEvaluationRequest):
45
  test_dataset = train_test["test"]
46
 
47
  # Start tracking emissions
48
- tracker = get_tracker()
49
  tracker.start()
50
  tracker.start_task("inference")
51
 
52
  #--------------------------------------------------------------------------------------------
53
- # YOUR MODEL INFERENCE CODE HERE
54
- # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
55
  #--------------------------------------------------------------------------------------------
56
-
57
- # Make random predictions (placeholder for actual model inference)
58
- true_labels = test_dataset["label"]
59
- predictions = [random.randint(0, 1) for _ in range(len(true_labels))]
60
-
 
 
 
 
 
 
 
 
 
 
 
61
  #--------------------------------------------------------------------------------------------
62
  # YOUR MODEL INFERENCE STOPS HERE
63
  #--------------------------------------------------------------------------------------------
@@ -66,6 +77,7 @@ async def evaluate_audio(request: AudioEvaluationRequest):
66
  emissions_data = tracker.stop_task()
67
 
68
  # Calculate accuracy
 
69
  accuracy = accuracy_score(true_labels, predictions)
70
 
71
  # Prepare results dictionary
@@ -86,4 +98,4 @@ async def evaluate_audio(request: AudioEvaluationRequest):
86
  }
87
  }
88
 
89
- return results
 
1
+ import librosa
2
+ import joblib
3
+ import numpy as np
4
+
5
  from fastapi import APIRouter
6
  from datetime import datetime
7
  from datasets import load_dataset
 
10
  import os
11
 
12
  from .utils.evaluation import AudioEvaluationRequest
13
+ from .utils.emissions import tracker, clean_emissions_data, get_space_info
14
 
15
  from dotenv import load_dotenv
16
  load_dotenv()
17
 
18
  router = APIRouter()
19
 
20
+ DESCRIPTION = "Decision tree"
21
  ROUTE = "/audio"
22
 
23
 
 
28
  """
29
  Evaluate audio classification for rainforest sound detection.
30
 
31
+ Current Model: Basic decision tree
 
 
32
  """
33
  # Get space info
34
  username, space_url = get_space_info()
 
47
  test_dataset = train_test["test"]
48
 
49
  # Start tracking emissions
 
50
  tracker.start()
51
  tracker.start_task("inference")
52
 
53
  #--------------------------------------------------------------------------------------------
54
+ # MY MODEL
 
55
  #--------------------------------------------------------------------------------------------
56
+ def extract_features(example, sampling_rate):
57
+ audio_array = example['audio']['array']
58
+ # mfcc = librosa.feature.mfcc(y=audio_array, sr=sampling_rate, n_mfcc=5)
59
+ mfcc = librosa.feature.spectral_contrast(y=audio_array)
60
+ return np.mean(mfcc, axis=1)
61
+
62
+ def predict_new_audio(model, dataset, sampling_rate):
63
+ features_list = [extract_features(example, sampling_rate) for example in dataset]
64
+ features_array = np.vstack(features_list)
65
+ predictions = model.predict(features_array)
66
+ return predictions
67
+
68
+ model_filename = "model_audio.pkl"
69
+ clf = joblib.load(model_filename)
70
+
71
+ predictions = predict_new_audio(clf, test_dataset, 12000)
72
  #--------------------------------------------------------------------------------------------
73
  # YOUR MODEL INFERENCE STOPS HERE
74
  #--------------------------------------------------------------------------------------------
 
77
  emissions_data = tracker.stop_task()
78
 
79
  # Calculate accuracy
80
+ true_labels = test_dataset["label"]
81
  accuracy = accuracy_score(true_labels, predictions)
82
 
83
  # Prepare results dictionary
 
98
  }
99
  }
100
 
101
+ return results