bxleigh commited on
Commit
ceaf4ad
·
1 Parent(s): f06b7dc

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +240 -0
app.py ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import HfApi, hf_hub_download
3
+ from huggingface_hub.repocard import metadata_load
4
+ import requests
5
+ import re
6
+ import pandas as pd
7
+ from huggingface_hub import ModelCard
8
+ import os
9
+
10
+ def pass_emoji(passed):
11
+ if passed is True:
12
+ passed = "✅"
13
+ else:
14
+ passed = "❌"
15
+ return passed
16
+
17
+ api = HfApi()
18
+ USERNAMES_DATASET_ID = "huggingface-course/audio-course-u7-hands-on"
19
+ HF_TOKEN = os.environ.get("HF_TOKEN")
20
+
21
+ def get_user_models(hf_username, task):
22
+ """
23
+ List the user's models for a given task
24
+ :param hf_username: User HF username
25
+ """
26
+ models = api.list_models(author=hf_username, filter=[task])
27
+ user_model_ids = [x.modelId for x in models]
28
+
29
+ match task:
30
+ case "audio-classification":
31
+ dataset = 'marsyas/gtzan'
32
+ case "automatic-speech-recognition":
33
+ dataset = 'PolyAI/minds14'
34
+ case "text-to-speech":
35
+ dataset = ""
36
+ case _:
37
+ print("Unsupported task")
38
+
39
+ dataset_specific_models = []
40
+
41
+ if dataset == "":
42
+ return user_model_ids
43
+ else:
44
+ for model in user_model_ids:
45
+ meta = get_metadata(model)
46
+ if meta is None:
47
+ continue
48
+ try:
49
+ if meta["datasets"] == [dataset]:
50
+ dataset_specific_models.append(model)
51
+ except:
52
+ continue
53
+ return dataset_specific_models
54
+
55
+ def calculate_best_result(user_models, task):
56
+ """
57
+ Calculate the best results of a unit for a given task
58
+ :param user_model_ids: models of a user
59
+ """
60
+
61
+ best_model = ""
62
+
63
+ if task == "audio-classification":
64
+ best_result = -100
65
+ larger_is_better = True
66
+ elif task == "automatic-speech-recognition":
67
+ best_result = 100
68
+ larger_is_better = False
69
+
70
+ for model in user_models:
71
+ meta = get_metadata(model)
72
+ if meta is None:
73
+ continue
74
+ metric = parse_metrics(model, task)
75
+
76
+ if metric == None:
77
+ continue
78
+
79
+ if larger_is_better:
80
+ if metric > best_result:
81
+ best_result = metric
82
+ best_model = meta['model-index'][0]["name"]
83
+ else:
84
+ if metric < best_result:
85
+ best_result = metric
86
+ best_model = meta['model-index'][0]["name"]
87
+
88
+ return best_result, best_model
89
+
90
+
91
+ def get_metadata(model_id):
92
+ """
93
+ Get model metadata (contains evaluation data)
94
+ :param model_id
95
+ """
96
+ try:
97
+ readme_path = hf_hub_download(model_id, filename="README.md")
98
+ return metadata_load(readme_path)
99
+ except requests.exceptions.HTTPError:
100
+ # 404 README.md not found
101
+ return None
102
+
103
+
104
+ def extract_metric(model_card_content, task):
105
+ """
106
+ Extract the metric value from the models' model card
107
+ :param model_card_content: model card content
108
+ """
109
+ accuracy_pattern = r"(?:Accuracy|eval_accuracy): (\d+\.\d+)"
110
+ wer_pattern = r"Wer: (\d+\.\d+)"
111
+
112
+ if task == "audio-classification":
113
+ pattern = accuracy_pattern
114
+ elif task == "automatic-speech-recognition":
115
+ pattern = wer_pattern
116
+
117
+ match = re.search(pattern, model_card_content)
118
+ if match:
119
+ metric = match.group(1)
120
+ return float(metric)
121
+ else:
122
+ return None
123
+
124
+
125
+ def parse_metrics(model, task):
126
+ """
127
+ Get model card and parse it
128
+ :param model_id: model id
129
+ """
130
+ card = ModelCard.load(model)
131
+ return extract_metric(card.content, task)
132
+
133
+
134
+ def certification(hf_username):
135
+ results_certification = [
136
+ {
137
+ "unit": "Unit 4: Audio Classification",
138
+ "task": "audio-classification",
139
+ "baseline_metric": 0.87,
140
+ "best_result": 0,
141
+ "best_model_id": "",
142
+ "passed_": False
143
+ },
144
+ {
145
+ "unit": "Unit 5: Automatic Speech Recognition",
146
+ "task": "automatic-speech-recognition",
147
+ "baseline_metric": 0.37,
148
+ "best_result": 0,
149
+ "best_model_id": "",
150
+ "passed_": False
151
+ },
152
+ {
153
+ "unit": "Unit 6: Text-to-Speech",
154
+ "task": "text-to-speech",
155
+ "baseline_metric": 0,
156
+ "best_result": 0,
157
+ "best_model_id": "",
158
+ "passed_": False
159
+ },
160
+ {
161
+ "unit": "Unit 7: Audio applications",
162
+ "task": "demo",
163
+ "baseline_metric": 0,
164
+ "best_result": 0,
165
+ "best_model_id": "",
166
+ "passed_": False
167
+ },
168
+ ]
169
+
170
+ for unit in results_certification:
171
+ unit["passed"] = pass_emoji(unit["passed_"])
172
+
173
+ match unit["task"]:
174
+ case "audio-classification":
175
+ try:
176
+ user_ac_models = get_user_models(hf_username, task = "audio-classification")
177
+ best_result, best_model_id = calculate_best_result(user_ac_models, task = "audio-classification")
178
+ unit["best_result"] = best_result
179
+ unit["best_model_id"] = best_model_id
180
+ if unit["best_result"] >= unit["baseline_metric"]:
181
+ unit["passed_"] = True
182
+ unit["passed"] = pass_emoji(unit["passed_"])
183
+ except: print("Either no relevant models found, or no metrics in the model card for audio classificaiton")
184
+ case "automatic-speech-recognition":
185
+ try:
186
+ user_asr_models = get_user_models(hf_username, task = "automatic-speech-recognition")
187
+ best_result, best_model_id = calculate_best_result(user_asr_models, task = "automatic-speech-recognition")
188
+ unit["best_result"] = best_result
189
+ unit["best_model_id"] = best_model_id
190
+ if unit["best_result"] <= unit["baseline_metric"]:
191
+ unit["passed_"] = True
192
+ unit["passed"] = pass_emoji(unit["passed_"])
193
+ except: print("Either no relevant models found, or no metrics in the model card for automatic speech recognition")
194
+ case "text-to-speech":
195
+ try:
196
+ user_tts_models = get_user_models(hf_username, task = "text-to-speech")
197
+ if user_tts_models:
198
+ unit["best_result"] = 0
199
+ unit["best_model_id"] = user_tts_models[0]
200
+ unit["passed_"] = True
201
+ unit["passed"] = pass_emoji(unit["passed_"])
202
+ except: print("Either no relevant models found, or no metrics in the model card for automatic speech recognition")
203
+ case "demo":
204
+ u7_usernames = hf_hub_download(USERNAMES_DATASET_ID, repo_type = "dataset", filename="usernames.csv", token=HF_TOKEN)
205
+ u7_users = pd.read_csv(u7_usernames)
206
+ if hf_username in u7_users['username'].tolist():
207
+ unit["best_result"] = 0
208
+ unit["best_model_id"] = "Demo check passed, no model id"
209
+ unit["passed_"] = True
210
+ unit["passed"] = pass_emoji(unit["passed_"])
211
+ case _:
212
+ print("Unknown task")
213
+
214
+ print(results_certification)
215
+
216
+ df = pd.DataFrame(results_certification)
217
+ df = df[['passed', 'unit', 'task', 'baseline_metric', 'best_result', 'best_model_id']]
218
+ return df
219
+
220
+ with gr.Blocks() as demo:
221
+ gr.Markdown("""
222
+ # 🏆 Check your progress in the Audio Course 🏆
223
+ - To get a certificate of completion, you must **pass 3 out of 4 assignments**.
224
+ - To get an honors certificate, you must **pass 4 out of 4 assignments**.
225
+ For the assignments where you have to train a model, your model's metric should be equal to or better than the baseline metric.
226
+ For the Unit 7 assignment, first, check your demo with the [Unit 7 assessment space](https://huggingface.co/spaces/huggingface-course/audio-course-u7-assessment)
227
+ Make sure that you have uploaded your model(s) to Hub, and that your Unit 7 demo is public.
228
+ To check your progress, type your Hugging Face Username here (in my case MariaK)
229
+ """)
230
+
231
+ hf_username = gr.Textbox(placeholder="MariaK", label="Your Hugging Face Username")
232
+ check_progress_button = gr.Button(value="Check my progress")
233
+
234
+ def check_progress():
235
+ output.set_text(certification(hf_username.text))
236
+
237
+ check_progress_button.click(check_progress)
238
+ output = gr.Textbox(value="")
239
+
240
+ demo.launch()