tcid / app.py
ror's picture
ror HF Staff
Fix model page for unfound data
f46b0a9
raw
history blame
10.6 kB
import matplotlib.pyplot as plt
import matplotlib
import pandas as pd
import gradio as gr
from data import CIResults
from utils import logger
from summary_page import create_summary_page
from model_page import plot_model_stats
# Configure matplotlib to prevent memory warnings and set dark background
matplotlib.rcParams['figure.facecolor'] = '#000000'
matplotlib.rcParams['axes.facecolor'] = '#000000'
matplotlib.rcParams['savefig.facecolor'] = '#000000'
plt.ioff() # Turn off interactive mode to prevent figure accumulation
# Load data once at startup
Ci_results = CIResults()
Ci_results.load_data()
# Start the auto-reload scheduler
Ci_results.schedule_data_reload()
# Function to check if a model has failures
def model_has_failures(model_name):
"""Check if a model has any failures (AMD or NVIDIA)."""
if Ci_results.df is None or Ci_results.df.empty:
return False
# Normalize model name to match DataFrame index
model_name_lower = model_name.lower()
# Check if model exists in DataFrame
if model_name_lower not in Ci_results.df.index:
return False
row = Ci_results.df.loc[model_name_lower]
# Check for failures in both AMD and NVIDIA
amd_multi_failures = row.get('failed_multi_no_amd', 0)
amd_single_failures = row.get('failed_single_no_amd', 0)
nvidia_multi_failures = row.get('failed_multi_no_nvidia', 0)
nvidia_single_failures = row.get('failed_single_no_nvidia', 0)
return any([
amd_multi_failures > 0,
amd_single_failures > 0,
nvidia_multi_failures > 0,
nvidia_single_failures > 0,
])
# Function to get current description text
def get_description_text():
"""Get description text with integrated last update time."""
msg = [
"Transformer CI Dashboard",
"-",
"AMD runs on MI325",
"NVIDIA runs on A10",
]
msg = ["**" + x + "**" for x in msg] + [""]
if Ci_results.last_update_time:
msg.append(f"*Result overview by model and hardware (last updated: {Ci_results.last_update_time})*")
else:
msg.append("*Result overview by model and hardware (loading...)*")
return "<br>".join(msg)
# Load CSS from external file
def load_css():
try:
with open("styles.css", "r") as f:
return f.read()
except FileNotFoundError:
logger.warning("styles.css not found, using minimal default styles")
return "body { background: #000; color: #fff; }"
# Create the Gradio interface with sidebar and dark theme
with gr.Blocks(title="Model Test Results Dashboard", css=load_css()) as demo:
with gr.Row():
# Sidebar for model selection
with gr.Column(scale=1, elem_classes=["sidebar"]):
gr.Markdown("# πŸ€– TCID", elem_classes=["sidebar-title"])
# Description with integrated last update time
description_text = get_description_text()
description_display = gr.Markdown(description_text, elem_classes=["sidebar-description"])
# Summary button at the top
summary_button = gr.Button(
"summary\nπŸ“Š",
variant="primary",
size="lg",
elem_classes=["summary-button"]
)
# Model selection header
gr.Markdown(f"**Select model ({len(Ci_results.available_models)}):**", elem_classes=["model-header"])
# Scrollable container for model buttons
with gr.Column(scale=1, elem_classes=["model-container"]):
# Create individual buttons for each model
model_buttons = []
model_choices = [model.lower() for model in Ci_results.available_models] if Ci_results.available_models else ["auto", "bert", "clip", "llama"]
for model_name in model_choices:
# Check if model has failures to determine styling
has_failures = model_has_failures(model_name)
# print(f"{model_name = }, {has_failures = }")
button_classes = ["model-button"]
if has_failures:
button_classes.append("model-button-failed")
btn = gr.Button(
model_name,
variant="secondary",
size="sm",
elem_classes=button_classes
)
model_buttons.append(btn)
# CI job links at bottom of sidebar
ci_links_display = gr.Markdown("πŸ”— **CI Jobs:** *Loading...*", elem_classes=["sidebar-links"])
# Main content area
with gr.Column(scale=4, elem_classes=["main-content"]):
# Summary display (default view)
summary_display = gr.Plot(
value=create_summary_page(Ci_results.df, Ci_results.available_models),
label="",
format="png",
elem_classes=["plot-container"],
visible=True
)
# Detailed view components (hidden by default)
with gr.Column(visible=False, elem_classes=["detail-view"]) as detail_view:
# Create the plot output
plot_output = gr.Plot(
label="",
format="png",
elem_classes=["plot-container"]
)
# Create two separate failed tests displays in a row layout
with gr.Row():
with gr.Column(scale=1):
amd_failed_tests_output = gr.Textbox(
value="",
lines=8,
max_lines=8,
interactive=False,
container=False,
elem_classes=["failed-tests"]
)
with gr.Column(scale=1):
nvidia_failed_tests_output = gr.Textbox(
value="",
lines=8,
max_lines=8,
interactive=False,
container=False,
elem_classes=["failed-tests"]
)
# Set up click handlers for model buttons
for i, btn in enumerate(model_buttons):
model_name = model_choices[i]
btn.click(
fn=lambda selected_model=model_name: plot_model_stats(Ci_results.df, selected_model),
outputs=[plot_output, amd_failed_tests_output, nvidia_failed_tests_output]
).then(
fn=lambda: [gr.update(visible=False), gr.update(visible=True)],
outputs=[summary_display, detail_view]
)
# Summary button click handler
def show_summary_and_update_links():
"""Show summary page and update CI links."""
return create_summary_page(Ci_results.df, Ci_results.available_models), get_description_text(), get_ci_links()
summary_button.click(
fn=show_summary_and_update_links,
outputs=[summary_display, description_display, ci_links_display]
).then(
fn=lambda: [gr.update(visible=True), gr.update(visible=False)],
outputs=[summary_display, detail_view]
)
# Function to get CI job links
def get_ci_links():
"""Get CI job links from the most recent data."""
try:
# Check if df exists and is not empty
if Ci_results.df is None or Ci_results.df.empty:
return "πŸ”— **CI Jobs:** *Loading...*"
# Get links from any available model (they should be the same for all models in a run)
amd_multi_link = None
amd_single_link = None
nvidia_multi_link = None
nvidia_single_link = None
for model_name in Ci_results.df.index:
row = Ci_results.df.loc[model_name]
# Extract AMD links
if pd.notna(row.get('job_link_amd')) and (not amd_multi_link or not amd_single_link):
amd_link_raw = row.get('job_link_amd')
if isinstance(amd_link_raw, dict):
if 'multi' in amd_link_raw and not amd_multi_link:
amd_multi_link = amd_link_raw['multi']
if 'single' in amd_link_raw and not amd_single_link:
amd_single_link = amd_link_raw['single']
# Extract NVIDIA links
if pd.notna(row.get('job_link_nvidia')) and (not nvidia_multi_link or not nvidia_single_link):
nvidia_link_raw = row.get('job_link_nvidia')
if isinstance(nvidia_link_raw, dict):
if 'multi' in nvidia_link_raw and not nvidia_multi_link:
nvidia_multi_link = nvidia_link_raw['multi']
if 'single' in nvidia_link_raw and not nvidia_single_link:
nvidia_single_link = nvidia_link_raw['single']
# Break if we have all links
if amd_multi_link and amd_single_link and nvidia_multi_link and nvidia_single_link:
break
links_md = "πŸ”— **CI Jobs:**\n\n"
# AMD links
if amd_multi_link or amd_single_link:
links_md += "**AMD:**\n"
if amd_multi_link:
links_md += f"β€’ [Multi GPU]({amd_multi_link})\n"
if amd_single_link:
links_md += f"β€’ [Single GPU]({amd_single_link})\n"
links_md += "\n"
# NVIDIA links
if nvidia_multi_link or nvidia_single_link:
links_md += "**NVIDIA:**\n"
if nvidia_multi_link:
links_md += f"β€’ [Multi GPU]({nvidia_multi_link})\n"
if nvidia_single_link:
links_md += f"β€’ [Single GPU]({nvidia_single_link})\n"
if not (amd_multi_link or amd_single_link or nvidia_multi_link or nvidia_single_link):
links_md += "*No links available*"
return links_md
except Exception as e:
logger.error(f"getting CI links: {e}")
return "πŸ”— **CI Jobs:** *Error loading links*"
# Auto-update CI links when the interface loads
demo.load(
fn=get_ci_links,
outputs=[ci_links_display]
)
# Gradio entrypoint
if __name__ == "__main__":
demo.launch()