satvs commited on
Commit
c5a2073
·
1 Parent(s): 11d5013

[ref] remove audio and text endpoints

Browse files
Files changed (2) hide show
  1. tasks/audio.py +0 -88
  2. tasks/text.py +0 -92
tasks/audio.py DELETED
@@ -1,88 +0,0 @@
1
- from fastapi import APIRouter
2
- from datetime import datetime
3
- from datasets import load_dataset
4
- from sklearn.metrics import accuracy_score
5
- import random
6
- import os
7
-
8
- from .utils.evaluation import AudioEvaluationRequest
9
- from .utils.emissions import tracker, clean_emissions_data, get_space_info
10
-
11
- from dotenv import load_dotenv
12
- load_dotenv()
13
-
14
- router = APIRouter()
15
-
16
- DESCRIPTION = "Random Baseline"
17
- ROUTE = "/audio"
18
-
19
-
20
-
21
- @router.post(ROUTE, tags=["Audio Task"],
22
- description=DESCRIPTION)
23
- async def evaluate_audio(request: AudioEvaluationRequest):
24
- """
25
- Evaluate audio classification for rainforest sound detection.
26
-
27
- Current Model: Random Baseline
28
- - Makes random predictions from the label space (0-1)
29
- - Used as a baseline for comparison
30
- """
31
- # Get space info
32
- username, space_url = get_space_info()
33
-
34
- # Define the label mapping
35
- LABEL_MAPPING = {
36
- "chainsaw": 0,
37
- "environment": 1
38
- }
39
- # Load and prepare the dataset
40
- # Because the dataset is gated, we need to use the HF_TOKEN environment variable to authenticate
41
- dataset = load_dataset(request.dataset_name,token=os.getenv("HF_TOKEN"))
42
-
43
- # Split dataset
44
- train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
45
- test_dataset = train_test["test"]
46
-
47
- # Start tracking emissions
48
- tracker.start()
49
- tracker.start_task("inference")
50
-
51
- #--------------------------------------------------------------------------------------------
52
- # YOUR MODEL INFERENCE CODE HERE
53
- # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
54
- #--------------------------------------------------------------------------------------------
55
-
56
- # Make random predictions (placeholder for actual model inference)
57
- true_labels = test_dataset["label"]
58
- predictions = [random.randint(0, 1) for _ in range(len(true_labels))]
59
-
60
- #--------------------------------------------------------------------------------------------
61
- # YOUR MODEL INFERENCE STOPS HERE
62
- #--------------------------------------------------------------------------------------------
63
-
64
- # Stop tracking emissions
65
- emissions_data = tracker.stop_task()
66
-
67
- # Calculate accuracy
68
- accuracy = accuracy_score(true_labels, predictions)
69
-
70
- # Prepare results dictionary
71
- results = {
72
- "username": username,
73
- "space_url": space_url,
74
- "submission_timestamp": datetime.now().isoformat(),
75
- "model_description": DESCRIPTION,
76
- "accuracy": float(accuracy),
77
- "energy_consumed_wh": emissions_data.energy_consumed * 1000,
78
- "emissions_gco2eq": emissions_data.emissions * 1000,
79
- "emissions_data": clean_emissions_data(emissions_data),
80
- "api_route": ROUTE,
81
- "dataset_config": {
82
- "dataset_name": request.dataset_name,
83
- "test_size": request.test_size,
84
- "test_seed": request.test_seed
85
- }
86
- }
87
-
88
- return results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tasks/text.py DELETED
@@ -1,92 +0,0 @@
1
- from fastapi import APIRouter
2
- from datetime import datetime
3
- from datasets import load_dataset
4
- from sklearn.metrics import accuracy_score
5
- import random
6
-
7
- from .utils.evaluation import TextEvaluationRequest
8
- from .utils.emissions import tracker, clean_emissions_data, get_space_info
9
-
10
- router = APIRouter()
11
-
12
- DESCRIPTION = "Random Baseline"
13
- ROUTE = "/text"
14
-
15
- @router.post(ROUTE, tags=["Text Task"],
16
- description=DESCRIPTION)
17
- async def evaluate_text(request: TextEvaluationRequest):
18
- """
19
- Evaluate text classification for climate disinformation detection.
20
-
21
- Current Model: Random Baseline
22
- - Makes random predictions from the label space (0-7)
23
- - Used as a baseline for comparison
24
- """
25
- # Get space info
26
- username, space_url = get_space_info()
27
-
28
- # Define the label mapping
29
- LABEL_MAPPING = {
30
- "0_not_relevant": 0,
31
- "1_not_happening": 1,
32
- "2_not_human": 2,
33
- "3_not_bad": 3,
34
- "4_solutions_harmful_unnecessary": 4,
35
- "5_science_unreliable": 5,
36
- "6_proponents_biased": 6,
37
- "7_fossil_fuels_needed": 7
38
- }
39
-
40
- # Load and prepare the dataset
41
- dataset = load_dataset(request.dataset_name)
42
-
43
- # Convert string labels to integers
44
- dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
45
-
46
- # Split dataset
47
- train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
48
- test_dataset = train_test["test"]
49
-
50
- # Start tracking emissions
51
- tracker.start()
52
- tracker.start_task("inference")
53
-
54
- #--------------------------------------------------------------------------------------------
55
- # YOUR MODEL INFERENCE CODE HERE
56
- # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
57
- #--------------------------------------------------------------------------------------------
58
-
59
- # Make random predictions (placeholder for actual model inference)
60
- true_labels = test_dataset["label"]
61
- predictions = [random.randint(0, 7) for _ in range(len(true_labels))]
62
-
63
- #--------------------------------------------------------------------------------------------
64
- # YOUR MODEL INFERENCE STOPS HERE
65
- #--------------------------------------------------------------------------------------------
66
-
67
-
68
- # Stop tracking emissions
69
- emissions_data = tracker.stop_task()
70
-
71
- # Calculate accuracy
72
- accuracy = accuracy_score(true_labels, predictions)
73
-
74
- # Prepare results dictionary
75
- results = {
76
- "username": username,
77
- "space_url": space_url,
78
- "submission_timestamp": datetime.now().isoformat(),
79
- "model_description": DESCRIPTION,
80
- "accuracy": float(accuracy),
81
- "energy_consumed_wh": emissions_data.energy_consumed * 1000,
82
- "emissions_gco2eq": emissions_data.emissions * 1000,
83
- "emissions_data": clean_emissions_data(emissions_data),
84
- "api_route": ROUTE,
85
- "dataset_config": {
86
- "dataset_name": request.dataset_name,
87
- "test_size": request.test_size,
88
- "test_seed": request.test_seed
89
- }
90
- }
91
-
92
- return results