Spaces:
Running
Running
adding overall tab
Browse files
app.py
CHANGED
|
@@ -3,6 +3,11 @@ import pandas as pd
|
|
| 3 |
from huggingface_hub import list_models
|
| 4 |
import plotly.express as px
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
def get_plots(task):
|
| 7 |
#TO DO : hover text with energy efficiency number, parameters
|
| 8 |
task_df= pd.read_csv('data/energy/'+task)
|
|
@@ -21,25 +26,65 @@ def get_plots(task):
|
|
| 21 |
)
|
| 22 |
return fig
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
def make_link(mname):
|
| 25 |
link = "["+ str(mname).split('/')[1] +'](https://huggingface.co/'+str(mname)+")"
|
| 26 |
return link
|
| 27 |
|
| 28 |
-
def get_model_names(
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
energy_df= pd.read_csv('data/energy/'+task_data)
|
| 32 |
task_df= task_df.rename(columns={"Link": "model"})
|
| 33 |
all_df = pd.merge(task_df, energy_df, on='model')
|
| 34 |
all_df=all_df.drop_duplicates(subset=['model'])
|
| 35 |
-
all_df['
|
| 36 |
-
all_df['
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
all_df['Total GPU Energy (Wh)'] = all_df['total_gpu_energy']*1000
|
| 38 |
all_df['Total GPU Energy (Wh)'] = all_df['Total GPU Energy (Wh)'].round(2)
|
| 39 |
all_df['Rating'] = pd.cut(all_df['Total GPU Energy (Wh)'], 3, labels=["⭐⭐⭐", "⭐⭐", "⭐"])
|
| 40 |
-
model_names
|
|
|
|
| 41 |
return model_names
|
| 42 |
|
|
|
|
| 43 |
def format_params(num):
|
| 44 |
if num > 1000000000:
|
| 45 |
if not num % 1000000000:
|
|
@@ -129,10 +174,18 @@ with demo:
|
|
| 129 |
plot = gr.Plot(get_plots('question_answering.csv'))
|
| 130 |
with gr.Column():
|
| 131 |
table = gr.Dataframe(get_model_names('question_answering.csv'), datatype="markdown")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
with gr.Accordion("Methodology", open = False):
|
| 133 |
gr.Markdown(
|
| 134 |
"""For each of the ten tasks above, we created a custom dataset with 1,000 entries (see all of the datasets on our [org Hub page](https://huggingface.co/EnergyStarAI)).
|
| 135 |
-
We then tested each of the models from the leaderboard on the appropriate task, measuring the energy consumed using [Code Carbon](https://mlco2.github.io/codecarbon/), an open-source Python package for tracking the environmental impacts of code.
|
| 136 |
We developed and used a [Docker container](https://github.com/huggingface/EnergyStarAI/) to maximize the reproducibility of results, and to enable members of the community to benchmark internal models.
|
| 137 |
Reach out to us if you want to collaborate!
|
| 138 |
""")
|
|
|
|
| 3 |
from huggingface_hub import list_models
|
| 4 |
import plotly.express as px
|
| 5 |
|
| 6 |
+
|
| 7 |
+
tasks = ['asr.csv', 'object_detection.csv', 'text_classification.csv', 'image_captioning.csv',
|
| 8 |
+
'question_answering.csv', 'text_generation.csv', 'image_classification.csv',
|
| 9 |
+
'sentence_similarity.csv', 'image_generation.csv', 'summarization.csv']
|
| 10 |
+
|
| 11 |
def get_plots(task):
|
| 12 |
#TO DO : hover text with energy efficiency number, parameters
|
| 13 |
task_df= pd.read_csv('data/energy/'+task)
|
|
|
|
| 26 |
)
|
| 27 |
return fig
|
| 28 |
|
| 29 |
+
def get_all_plots():
|
| 30 |
+
for task in tasks:
|
| 31 |
+
task_df= pd.read_csv('data/energy/'+task)
|
| 32 |
+
params_df = pd.read_csv('data/params/'+task)
|
| 33 |
+
params_df= params_df.rename(columns={"Link": "model"})
|
| 34 |
+
tasks_df = pd.merge(task_df, params_df, on='model')
|
| 35 |
+
all_df = pd.DataFrame(columns = tasks_df.columns)
|
| 36 |
+
all_df = all_df.append(tasks_df)
|
| 37 |
+
all_df['Total GPU Energy (Wh)'] = all_df['total_gpu_energy']*1000
|
| 38 |
+
all_df = all_df.sort_values(by=['Total GPU Energy (Wh)'])
|
| 39 |
+
all_df['parameters'] = all_df['parameters'].apply(format_params)
|
| 40 |
+
all_df['energy_star'] = pd.cut(all_df['Total GPU Energy (Wh)'], 3, labels=["⭐⭐⭐", "⭐⭐", "⭐"])
|
| 41 |
+
fig = px.scatter(all_df, x="model", y='Total GPU Energy (Wh)', custom_data=['parameters'], height= 500, width= 800, color = 'energy_star', color_discrete_map={"⭐": 'red', "⭐⭐": "yellow", "⭐⭐⭐": "green"})
|
| 42 |
+
fig.update_traces(
|
| 43 |
+
hovertemplate="<br>".join([
|
| 44 |
+
"Total Energy: %{y}",
|
| 45 |
+
"Parameters: %{customdata[0]}"])
|
| 46 |
+
)
|
| 47 |
+
return fig
|
| 48 |
+
|
| 49 |
def make_link(mname):
|
| 50 |
link = "["+ str(mname).split('/')[1] +'](https://huggingface.co/'+str(mname)+")"
|
| 51 |
return link
|
| 52 |
|
| 53 |
+
def get_model_names(task):
|
| 54 |
+
task_df= pd.read_csv('data/params/'+task)
|
| 55 |
+
energy_df= pd.read_csv('data/energy/'+task)
|
|
|
|
| 56 |
task_df= task_df.rename(columns={"Link": "model"})
|
| 57 |
all_df = pd.merge(task_df, energy_df, on='model')
|
| 58 |
all_df=all_df.drop_duplicates(subset=['model'])
|
| 59 |
+
all_df['Parameters'] = all_df['parameters'].apply(format_params)
|
| 60 |
+
all_df['Model'] = all_df['model'].apply(make_link)
|
| 61 |
+
all_df['Total GPU Energy (Wh)'] = all_df['total_gpu_energy']*1000
|
| 62 |
+
all_df['Total GPU Energy (Wh)'] = all_df['Total GPU Energy (Wh)'].round(2)
|
| 63 |
+
all_df['Rating'] = pd.cut(all_df['Total GPU Energy (Wh)'], 3, labels=["⭐⭐⭐", "⭐⭐", "⭐"])
|
| 64 |
+
model_names= model_names.sort_values('Total GPU Energy (Wh)')
|
| 65 |
+
model_names = all_df[['Model','Rating','Total GPU Energy (Wh)', 'Parameters']]
|
| 66 |
+
return model_names
|
| 67 |
+
|
| 68 |
+
def get_all_model_names():
|
| 69 |
+
#TODO: add link to results in model card of each model
|
| 70 |
+
for task in tasks:
|
| 71 |
+
task_df= pd.read_csv('data/params/'+task)
|
| 72 |
+
energy_df= pd.read_csv('data/energy/'+task)
|
| 73 |
+
task_df= task_df.rename(columns={"Link": "model"})
|
| 74 |
+
tasks_df = pd.merge(task_df, energy_df, on='model')
|
| 75 |
+
all_df = pd.DataFrame(columns = tasks_df.columns)
|
| 76 |
+
all_df = all_df.append(tasks_df)
|
| 77 |
+
all_df=all_df.drop_duplicates(subset=['model'])
|
| 78 |
+
all_df['Parameters'] = all_df['parameters'].apply(format_params)
|
| 79 |
+
all_df['Model'] = all_df['model'].apply(make_link)
|
| 80 |
all_df['Total GPU Energy (Wh)'] = all_df['total_gpu_energy']*1000
|
| 81 |
all_df['Total GPU Energy (Wh)'] = all_df['Total GPU Energy (Wh)'].round(2)
|
| 82 |
all_df['Rating'] = pd.cut(all_df['Total GPU Energy (Wh)'], 3, labels=["⭐⭐⭐", "⭐⭐", "⭐"])
|
| 83 |
+
model_names= model_names.sort_values('Total GPU Energy (Wh)')
|
| 84 |
+
model_names = all_df[['Model','Rating','Total GPU Energy (Wh)', 'Parameters']]
|
| 85 |
return model_names
|
| 86 |
|
| 87 |
+
|
| 88 |
def format_params(num):
|
| 89 |
if num > 1000000000:
|
| 90 |
if not num % 1000000000:
|
|
|
|
| 174 |
plot = gr.Plot(get_plots('question_answering.csv'))
|
| 175 |
with gr.Column():
|
| 176 |
table = gr.Dataframe(get_model_names('question_answering.csv'), datatype="markdown")
|
| 177 |
+
|
| 178 |
+
with gr.TabItem("Overall"):
|
| 179 |
+
with gr.Row():
|
| 180 |
+
with gr.Column():
|
| 181 |
+
plot = gr.Plot(get_all_plots)
|
| 182 |
+
with gr.Column():
|
| 183 |
+
table = gr.Dataframe(get_all_model_names)
|
| 184 |
+
|
| 185 |
with gr.Accordion("Methodology", open = False):
|
| 186 |
gr.Markdown(
|
| 187 |
"""For each of the ten tasks above, we created a custom dataset with 1,000 entries (see all of the datasets on our [org Hub page](https://huggingface.co/EnergyStarAI)).
|
| 188 |
+
We then tested each of the models from the leaderboard on the appropriate task on Nvidia A100 GPUs, measuring the energy consumed using [Code Carbon](https://mlco2.github.io/codecarbon/), an open-source Python package for tracking the environmental impacts of code.
|
| 189 |
We developed and used a [Docker container](https://github.com/huggingface/EnergyStarAI/) to maximize the reproducibility of results, and to enable members of the community to benchmark internal models.
|
| 190 |
Reach out to us if you want to collaborate!
|
| 191 |
""")
|