Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Pratik Bhavsar
commited on
Commit
Β·
8208a40
1
Parent(s):
e390790
added buttons
Browse files- tabs/leaderboard_v2.py +201 -4
tabs/leaderboard_v2.py
CHANGED
@@ -1008,11 +1008,186 @@ def create_leaderboard_v2_tab():
|
|
1008 |
|
1009 |
gr.HTML(custom_css)
|
1010 |
|
|
|
1011 |
gr.HTML("""
|
1012 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1013 |
<h1 style="font-size: 3rem; margin-bottom: 12px; color: var(--text-primary);
|
1014 |
text-shadow: 0 0 20px rgba(227, 84, 84, 0.3); font-family: 'Geist', sans-serif; font-weight: 800;">
|
1015 |
-
π Galileo Agent Leaderboard
|
1016 |
</h1>
|
1017 |
<p style="color: var(--text-secondary); font-size: 1.2rem; margin-top: 0; font-family: 'Geist', sans-serif;">
|
1018 |
Comprehensive performance metrics for LLM agents across business domains
|
@@ -1020,6 +1195,28 @@ def create_leaderboard_v2_tab():
|
|
1020 |
</div>
|
1021 |
""")
|
1022 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1023 |
# Metrics overview cards with insights
|
1024 |
gr.HTML("""
|
1025 |
<div style="margin-bottom: 40px;">
|
@@ -1789,7 +1986,7 @@ def create_leaderboard_v2_tab():
|
|
1789 |
<div style="space-y: 12px;">
|
1790 |
<div style="margin-bottom: 12px;">
|
1791 |
<div style="font-weight: 600; color: var(--text-primary); margin-bottom: 4px; font-family: 'Geist Mono', monospace; font-size: 0.9rem;">
|
1792 |
-
π
|
1793 |
</div>
|
1794 |
<div style="color: var(--text-secondary); font-size: 0.9rem; line-height: 1.4; margin-bottom: 6px;">
|
1795 |
Measures how well the agent accomplishes user goals and completes tasks successfully.
|
@@ -1803,7 +2000,7 @@ def create_leaderboard_v2_tab():
|
|
1803 |
|
1804 |
<div style="border-top: 1px solid var(--border-subtle); padding-top: 12px;">
|
1805 |
<div style="font-weight: 600; color: var(--text-primary); margin-bottom: 4px; font-family: 'Geist Mono', monospace; font-size: 0.9rem;">
|
1806 |
-
π οΈ
|
1807 |
</div>
|
1808 |
<div style="color: var(--text-secondary); font-size: 0.9rem; line-height: 1.4; margin-bottom: 6px;">
|
1809 |
Evaluates the accuracy of selecting the right tools and using them with correct parameters.
|
|
|
1008 |
|
1009 |
gr.HTML(custom_css)
|
1010 |
|
1011 |
+
# Header button above title
|
1012 |
gr.HTML("""
|
1013 |
+
<style>
|
1014 |
+
/* Enhanced button styling with better gradio compatibility */
|
1015 |
+
.custom-button-container {
|
1016 |
+
text-align: center;
|
1017 |
+
padding: 20px 0 10px 0;
|
1018 |
+
margin-bottom: 10px;
|
1019 |
+
}
|
1020 |
+
|
1021 |
+
.header-action-button {
|
1022 |
+
display: inline-block !important;
|
1023 |
+
padding: 14px 28px !important;
|
1024 |
+
background: linear-gradient(135deg, #E35454 0%, #C84545 100%) !important;
|
1025 |
+
color: #FFFFFF !important;
|
1026 |
+
text-decoration: none !important;
|
1027 |
+
border-radius: 16px !important;
|
1028 |
+
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
1029 |
+
font-weight: 700 !important;
|
1030 |
+
font-size: 1.1rem !important;
|
1031 |
+
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
|
1032 |
+
border: none !important;
|
1033 |
+
cursor: pointer !important;
|
1034 |
+
box-shadow: 0 8px 24px rgba(227, 84, 84, 0.4), 0 4px 12px rgba(0, 0, 0, 0.3) !important;
|
1035 |
+
position: relative !important;
|
1036 |
+
overflow: hidden !important;
|
1037 |
+
text-shadow: 0 1px 2px rgba(0, 0, 0, 0.3) !important;
|
1038 |
+
}
|
1039 |
+
|
1040 |
+
.header-action-button::before {
|
1041 |
+
content: '';
|
1042 |
+
position: absolute;
|
1043 |
+
top: 0;
|
1044 |
+
left: -100%;
|
1045 |
+
width: 100%;
|
1046 |
+
height: 100%;
|
1047 |
+
background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.2), transparent);
|
1048 |
+
transition: left 0.6s;
|
1049 |
+
}
|
1050 |
+
|
1051 |
+
.header-action-button:hover::before {
|
1052 |
+
left: 100%;
|
1053 |
+
}
|
1054 |
+
|
1055 |
+
.header-action-button:hover {
|
1056 |
+
transform: translateY(-3px) !important;
|
1057 |
+
box-shadow: 0 12px 32px rgba(227, 84, 84, 0.5), 0 8px 16px rgba(0, 0, 0, 0.4) !important;
|
1058 |
+
background: linear-gradient(135deg, #F46464 0%, #D84F4F 100%) !important;
|
1059 |
+
color: #FFFFFF !important;
|
1060 |
+
text-decoration: none !important;
|
1061 |
+
}
|
1062 |
+
|
1063 |
+
.header-action-button:active {
|
1064 |
+
transform: translateY(-1px) !important;
|
1065 |
+
}
|
1066 |
+
|
1067 |
+
.action-button-icon {
|
1068 |
+
font-size: 1.2rem !important;
|
1069 |
+
margin-right: 8px !important;
|
1070 |
+
filter: drop-shadow(0 0 8px rgba(255, 255, 255, 0.3));
|
1071 |
+
}
|
1072 |
+
|
1073 |
+
/* Navigation buttons styling */
|
1074 |
+
.nav-buttons-container {
|
1075 |
+
display: flex;
|
1076 |
+
justify-content: center;
|
1077 |
+
align-items: center;
|
1078 |
+
gap: 16px;
|
1079 |
+
flex-wrap: wrap;
|
1080 |
+
margin: 24px 0;
|
1081 |
+
padding: 0 20px;
|
1082 |
+
}
|
1083 |
+
|
1084 |
+
.nav-link-button {
|
1085 |
+
display: inline-flex !important;
|
1086 |
+
align-items: center !important;
|
1087 |
+
gap: 8px !important;
|
1088 |
+
padding: 12px 20px !important;
|
1089 |
+
background: rgba(1, 9, 26, 0.8) !important;
|
1090 |
+
color: #F5F6F7 !important;
|
1091 |
+
text-decoration: none !important;
|
1092 |
+
border-radius: 12px !important;
|
1093 |
+
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
1094 |
+
font-weight: 600 !important;
|
1095 |
+
font-size: 0.95rem !important;
|
1096 |
+
transition: all 0.3s ease !important;
|
1097 |
+
border: 2px solid rgba(245, 246, 247, 0.15) !important;
|
1098 |
+
backdrop-filter: blur(10px) !important;
|
1099 |
+
-webkit-backdrop-filter: blur(10px) !important;
|
1100 |
+
position: relative !important;
|
1101 |
+
overflow: hidden !important;
|
1102 |
+
box-shadow: 0 4px 16px rgba(0, 0, 0, 0.3) !important;
|
1103 |
+
}
|
1104 |
+
|
1105 |
+
.nav-link-button::before {
|
1106 |
+
content: '';
|
1107 |
+
position: absolute;
|
1108 |
+
top: 0;
|
1109 |
+
left: 0;
|
1110 |
+
right: 0;
|
1111 |
+
bottom: 0;
|
1112 |
+
background: linear-gradient(135deg, rgba(227, 84, 84, 0.1) 0%, rgba(16, 152, 247, 0.1) 100%);
|
1113 |
+
opacity: 0;
|
1114 |
+
transition: opacity 0.3s ease;
|
1115 |
+
}
|
1116 |
+
|
1117 |
+
.nav-link-button:hover::before {
|
1118 |
+
opacity: 1;
|
1119 |
+
}
|
1120 |
+
|
1121 |
+
.nav-link-button:hover {
|
1122 |
+
transform: translateY(-3px) scale(1.02) !important;
|
1123 |
+
border-color: #E35454 !important;
|
1124 |
+
box-shadow: 0 8px 24px rgba(227, 84, 84, 0.3), 0 4px 12px rgba(0, 0, 0, 0.4) !important;
|
1125 |
+
text-decoration: none !important;
|
1126 |
+
color: #FFFFFF !important;
|
1127 |
+
}
|
1128 |
+
|
1129 |
+
.nav-link-button.primary-nav {
|
1130 |
+
background: linear-gradient(135deg, #1098F7 0%, #0A6BC4 100%) !important;
|
1131 |
+
border-color: #1098F7 !important;
|
1132 |
+
color: #FFFFFF !important;
|
1133 |
+
font-weight: 700 !important;
|
1134 |
+
}
|
1135 |
+
|
1136 |
+
.nav-link-button.primary-nav:hover {
|
1137 |
+
background: linear-gradient(135deg, #2AA8FF 0%, #0550A0 100%) !important;
|
1138 |
+
border-color: #2AA8FF !important;
|
1139 |
+
box-shadow: 0 8px 24px rgba(16, 152, 247, 0.4), 0 4px 12px rgba(0, 0, 0, 0.4) !important;
|
1140 |
+
color: #FFFFFF !important;
|
1141 |
+
}
|
1142 |
+
|
1143 |
+
.nav-button-icon {
|
1144 |
+
font-size: 1.1rem !important;
|
1145 |
+
filter: drop-shadow(0 0 6px currentColor);
|
1146 |
+
}
|
1147 |
+
|
1148 |
+
/* Responsive design */
|
1149 |
+
@media (max-width: 768px) {
|
1150 |
+
.nav-buttons-container {
|
1151 |
+
gap: 12px;
|
1152 |
+
padding: 0 10px;
|
1153 |
+
}
|
1154 |
+
|
1155 |
+
.nav-link-button {
|
1156 |
+
font-size: 0.85rem !important;
|
1157 |
+
padding: 10px 16px !important;
|
1158 |
+
}
|
1159 |
+
|
1160 |
+
.header-action-button {
|
1161 |
+
font-size: 1rem !important;
|
1162 |
+
padding: 12px 24px !important;
|
1163 |
+
}
|
1164 |
+
}
|
1165 |
+
|
1166 |
+
@media (max-width: 480px) {
|
1167 |
+
.nav-buttons-container {
|
1168 |
+
flex-direction: column;
|
1169 |
+
gap: 8px;
|
1170 |
+
}
|
1171 |
+
|
1172 |
+
.nav-link-button {
|
1173 |
+
width: 200px;
|
1174 |
+
justify-content: center;
|
1175 |
+
}
|
1176 |
+
}
|
1177 |
+
</style>
|
1178 |
+
|
1179 |
+
<div class="custom-button-container">
|
1180 |
+
<a href="https://app.galileo.ai/sign-up" target="_blank" class="header-action-button">
|
1181 |
+
<span class="action-button-icon">π</span>Evaluate your GenAI for free
|
1182 |
+
</a>
|
1183 |
+
</div>
|
1184 |
+
""")
|
1185 |
+
|
1186 |
+
gr.HTML("""
|
1187 |
+
<div style="text-align: center; padding: 20px 0;">
|
1188 |
<h1 style="font-size: 3rem; margin-bottom: 12px; color: var(--text-primary);
|
1189 |
text-shadow: 0 0 20px rgba(227, 84, 84, 0.3); font-family: 'Geist', sans-serif; font-weight: 800;">
|
1190 |
+
π Galileo Agent Leaderboard v2
|
1191 |
</h1>
|
1192 |
<p style="color: var(--text-secondary); font-size: 1.2rem; margin-top: 0; font-family: 'Geist', sans-serif;">
|
1193 |
Comprehensive performance metrics for LLM agents across business domains
|
|
|
1195 |
</div>
|
1196 |
""")
|
1197 |
|
1198 |
+
# Links section below title
|
1199 |
+
gr.HTML("""
|
1200 |
+
<div class="nav-buttons-container">
|
1201 |
+
<a href="http://galileo.ai/blog/agent-leaderboard-v2" target="_blank" class="nav-link-button">
|
1202 |
+
<span class="nav-button-icon">π</span>
|
1203 |
+
Blog
|
1204 |
+
</a>
|
1205 |
+
<a href="https://github.com/rungalileo/agent-leaderboard" target="_blank" class="nav-link-button">
|
1206 |
+
<span class="nav-button-icon">π</span>
|
1207 |
+
GitHub
|
1208 |
+
</a>
|
1209 |
+
<a href="https://huggingface.co/datasets/galileo-ai/agent-leaderboard-v2" target="_blank" class="nav-link-button">
|
1210 |
+
<span class="nav-button-icon">π€</span>
|
1211 |
+
Dataset
|
1212 |
+
</a>
|
1213 |
+
<a href="https://huggingface.co/spaces/galileo-ai/agent-leaderboard/discussions/new" target="_blank" class="nav-link-button">
|
1214 |
+
<span class="nav-button-icon">β</span>
|
1215 |
+
Add Model
|
1216 |
+
</a>
|
1217 |
+
</div>
|
1218 |
+
""")
|
1219 |
+
|
1220 |
# Metrics overview cards with insights
|
1221 |
gr.HTML("""
|
1222 |
<div style="margin-bottom: 40px;">
|
|
|
1986 |
<div style="space-y: 12px;">
|
1987 |
<div style="margin-bottom: 12px;">
|
1988 |
<div style="font-weight: 600; color: var(--text-primary); margin-bottom: 4px; font-family: 'Geist Mono', monospace; font-size: 0.9rem;">
|
1989 |
+
π Action Completion
|
1990 |
</div>
|
1991 |
<div style="color: var(--text-secondary); font-size: 0.9rem; line-height: 1.4; margin-bottom: 6px;">
|
1992 |
Measures how well the agent accomplishes user goals and completes tasks successfully.
|
|
|
2000 |
|
2001 |
<div style="border-top: 1px solid var(--border-subtle); padding-top: 12px;">
|
2002 |
<div style="font-weight: 600; color: var(--text-primary); margin-bottom: 4px; font-family: 'Geist Mono', monospace; font-size: 0.9rem;">
|
2003 |
+
π οΈ Tool Selection Quality
|
2004 |
</div>
|
2005 |
<div style="color: var(--text-secondary); font-size: 0.9rem; line-height: 1.4; margin-bottom: 6px;">
|
2006 |
Evaluates the accuracy of selecting the right tools and using them with correct parameters.
|