laureBe commited on
Commit
15c4bfa
·
verified ·
1 Parent(s): b66d092

Upload 2 files

Browse files
Files changed (2) hide show
  1. README.md +6 -6
  2. app.py +105 -26
README.md CHANGED
@@ -8,11 +8,11 @@ pinned: false
8
  ---
9
 
10
 
11
- # Random Baseline Model for Climate Disinformation Classification
12
 
13
  ## Model Description
14
 
15
- This is a random baseline model for the Frugal AI Challenge 2024, specifically for the text classification task of identifying climate disinformation. The model serves as a performance floor, randomly assigning labels to text inputs without any learning.
16
 
17
  ### Intended Use
18
 
@@ -40,7 +40,7 @@ The model uses the QuotaClimat/frugalaichallenge-text-train dataset:
40
  ## Performance
41
 
42
  ### Metrics
43
- - **Accuracy**: ~12.5% (random chance with 8 classes)
44
  - **Environmental Impact**:
45
  - Emissions tracked in gCO2eq
46
  - Energy consumption tracked in Wh
@@ -57,10 +57,10 @@ Environmental impact is tracked using CodeCarbon, measuring:
57
  This tracking helps establish a baseline for the environmental impact of model deployment and inference.
58
 
59
  ## Limitations
60
- - Makes completely random predictions
61
  - No learning or pattern recognition
62
- - No consideration of input text
63
- - Serves only as a baseline reference
64
  - Not suitable for any real-world applications
65
 
66
  ## Ethical Considerations
 
8
  ---
9
 
10
 
11
+ # Logistic regression Model for Climate Disinformation Classification
12
 
13
  ## Model Description
14
 
15
+ This is a Logistic regression baseline model for the Frugal AI Challenge 2024, specifically for the text classification task of identifying climate disinformation. The model serves as a performance floor.
16
 
17
  ### Intended Use
18
 
 
40
  ## Performance
41
 
42
  ### Metrics
43
+ - **Accuracy**: ~63.5%
44
  - **Environmental Impact**:
45
  - Emissions tracked in gCO2eq
46
  - Energy consumption tracked in Wh
 
57
  This tracking helps establish a baseline for the environmental impact of model deployment and inference.
58
 
59
  ## Limitations
60
+ - Makes Logistic regression predictions
61
  - No learning or pattern recognition
62
+ - Input text vectorized
63
+ - Serves only as a LR baseline reference
64
  - Not suitable for any real-world applications
65
 
66
  ## Ethical Considerations
app.py CHANGED
@@ -1,27 +1,106 @@
1
- from fastapi import FastAPI
2
- from dotenv import load_dotenv
3
- from tasks import text, image, audio
4
-
5
- # Load environment variables
6
- load_dotenv()
7
-
8
- app = FastAPI(
9
- title="Frugal AI Challenge API",
10
- description="API for the Frugal AI Challenge evaluation endpoints"
11
- )
12
-
13
- # Include all routers
14
- app.include_router(text.router)
15
- app.include_router(image.router)
16
- app.include_router(audio.router)
17
-
18
- @app.get("/")
19
- async def root():
20
- return {
21
- "message": "Welcome to the Frugal AI Challenge API",
22
- "endpoints": {
23
- "text": "/text - Text classification task",
24
- "image": "/image - Image classification task (coming soon)",
25
- "audio": "/audio - Audio classification task (coming soon)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  }
27
- }
 
 
 
1
+ from fastapi import APIRouter
2
+ from datetime import datetime
3
+ from datasets import load_dataset
4
+ from sklearn.metrics import accuracy_score
5
+ from sklearn.linear_model import LogisticRegression
6
+ from sklearn.feature_extraction.text import TfidfVectorizer
7
+ from sklearn.model_selection import train_test_split
8
+
9
+ from .utils.evaluation import TextEvaluationRequest
10
+ from .utils.emissions import tracker, clean_emissions_data, get_space_info
11
+
12
+ router = APIRouter()
13
+
14
+ DESCRIPTION = "Logistic Regression"
15
+ ROUTE = "/text"
16
+
17
+ @router.post(ROUTE, tags=["Text Task"],
18
+ description=DESCRIPTION)
19
+ async def evaluate_text(request: TextEvaluationRequest):
20
+ """
21
+ Evaluate text classification for climate disinformation detection.
22
+
23
+ Current Model: Logistic regression
24
+ - Used as a baseline for comparison
25
+ """
26
+ # Get space info
27
+ username, space_url = get_space_info()
28
+
29
+
30
+
31
+
32
+ # Define the label mapping
33
+ LABEL_MAPPING = {
34
+ "0_not_relevant": 0,
35
+ "1_not_happening": 1,
36
+ "2_not_human": 2,
37
+ "3_not_bad": 3,
38
+ "4_solutions_harmful_unnecessary": 4,
39
+ "5_science_unreliable": 5,
40
+ "6_proponents_biased": 6,
41
+ "7_fossil_fuels_needed": 7
42
+ }
43
+
44
+ # Load and prepare the dataset
45
+ dataset = load_dataset(request.dataset_name)
46
+
47
+ # Convert string labels to integers
48
+ dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
49
+
50
+ # Split dataset
51
+ #train_test = dataset.train_test_split(test_size=.33, seed=42)
52
+ train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
53
+ test_dataset = train_test["test"]
54
+
55
+ tfidf_vect = TfidfVectorizer(stop_words = 'english')
56
+
57
+ tfidf_train = tfidf_vect.fit_transform(train_dataset['quote'])
58
+ tfidf_test = tfidf_vect.transform(test_dataset['quote'])
59
+
60
+
61
+ # Start tracking emissions
62
+ tracker.start()
63
+ tracker.start_task("inference")
64
+
65
+ #--------------------------------------------------------------------------------------------
66
+ # YOUR MODEL INFERENCE CODE HERE
67
+ # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
68
+ #--------------------------------------------------------------------------------------------
69
+
70
+ # Make random predictions (placeholder for actual model inference)
71
+ true_labels = test_dataset["label"]
72
+
73
+ LR = LogisticRegression(class_weight='balanced', max_iter=20, random_state=1234,
74
+ solver='liblinear')
75
+ LR.fit(pd.DataFrame.sparse.from_spmatrix(tfidf_train), pd.DataFrame(y_train_v))
76
+ predictions=LR.predict(pd.DataFrame.sparse.from_spmatrix(tfidf_test))
77
+ #--------------------------------------------------------------------------------------------
78
+ # YOUR MODEL INFERENCE STOPS HERE
79
+ #--------------------------------------------------------------------------------------------
80
+
81
+
82
+ # Stop tracking emissions
83
+ emissions_data = tracker.stop_task()
84
+
85
+ # Calculate accuracy
86
+ accuracy = accuracy_score(true_labels, predictions)
87
+
88
+ # Prepare results dictionary
89
+ results = {
90
+ "username": username,
91
+ "space_url": space_url,
92
+ "submission_timestamp": datetime.now().isoformat(),
93
+ "model_description": DESCRIPTION,
94
+ "accuracy": float(accuracy),
95
+ "energy_consumed_wh": emissions_data.energy_consumed * 1000,
96
+ "emissions_gco2eq": emissions_data.emissions * 1000,
97
+ "emissions_data": clean_emissions_data(emissions_data),
98
+ "api_route": ROUTE,
99
+ "dataset_config": {
100
+ "dataset_name": request.dataset_name,
101
+ "test_size": request.test_size,
102
+ "test_seed": request.test_seed
103
  }
104
+ }
105
+
106
+ return results