ror HF Staff commited on
Commit
64fcb68
·
1 Parent(s): 2018d03

Re-structuration

Browse files
Files changed (6) hide show
  1. CLAUDE.md +91 -0
  2. app.py +25 -208
  3. data.py +75 -36
  4. sample_data.csv +22 -0
  5. summary_page.py +138 -0
  6. utils.py +51 -0
CLAUDE.md ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Project Overview
6
+
7
+ This is **TCID** (Transformer CI Dashboard) - a Gradio-based web dashboard that displays test results for Transformer models across AMD and NVIDIA hardware. The application fetches CI test data from HuggingFace datasets and presents it through interactive visualizations and detailed failure reports.
8
+
9
+ ## Architecture
10
+
11
+ ### Core Components
12
+
13
+ - **`app.py`** - Main Gradio application with UI components, plotting functions, and data visualization logic
14
+ - **`data.py`** - Data fetching module that retrieves test results from HuggingFace datasets for AMD and NVIDIA CI runs
15
+ - **`styles.css`** - Complete dark theme styling for the Gradio interface
16
+ - **`requirements.txt`** - Python dependencies (matplotlib only)
17
+
18
+ ### Data Flow
19
+
20
+ 1. **Data Loading**: `get_data()` in `data.py` fetches latest CI results from:
21
+ - AMD: `hf://datasets/optimum-amd/transformers_daily_ci`
22
+ - NVIDIA: `hf://datasets/hf-internal-testing/transformers_daily_ci`
23
+
24
+ 2. **Data Processing**: Results are joined and filtered to show only important models defined in `IMPORTANT_MODELS` list
25
+
26
+ 3. **Visualization**: Two main views:
27
+ - **Summary Page**: Horizontal bar charts showing test results for all models
28
+ - **Detail View**: Pie charts for individual models with failure details
29
+
30
+ ### UI Architecture
31
+
32
+ - **Sidebar**: Model selection, refresh controls, CI job links
33
+ - **Main Content**: Dynamic display switching between summary and detail views
34
+ - **Auto-refresh**: Data reloads every 15 minutes via background threading
35
+
36
+ ## Running the Application
37
+
38
+ ### Development Commands
39
+
40
+ ```bash
41
+ # Install dependencies
42
+ pip install -r requirements.txt
43
+
44
+ # Run the application
45
+ python app.py
46
+ ```
47
+
48
+ ### HuggingFace Spaces Deployment
49
+
50
+ This application is configured for HuggingFace Spaces deployment:
51
+ - **Framework**: Gradio 5.38.0
52
+ - **App file**: `app.py`
53
+ - **Configuration**: See `README.md` header for Spaces metadata
54
+
55
+ ## Key Data Structures
56
+
57
+ ### Model Results DataFrame
58
+ The joined DataFrame contains these columns:
59
+ - `success_amd` / `success_nvidia` - Number of passing tests
60
+ - `failed_multi_no_amd` / `failed_multi_no_nvidia` - Multi-GPU failure counts
61
+ - `failed_single_no_amd` / `failed_single_no_nvidia` - Single-GPU failure counts
62
+ - `failures_amd` / `failures_nvidia` - Detailed failure information objects
63
+ - `job_link_amd` / `job_link_nvidia` - CI job URLs
64
+
65
+ ### Important Models List
66
+ Predefined list in `data.py` focusing on significant models:
67
+ - Classic models: bert, gpt2, t5, vit, clip, whisper
68
+ - Modern models: llama, gemma3, qwen2, mistral3
69
+ - Multimodal: qwen2_5_vl, llava, smolvlm, internvl
70
+
71
+ ## Styling and Theming
72
+
73
+ The application uses a comprehensive dark theme with:
74
+ - Fixed sidebar layout (300px width)
75
+ - Black background throughout (`#000000`)
76
+ - Custom scrollbars with dark styling
77
+ - Monospace fonts for technical aesthetics
78
+ - Gradient buttons and hover effects
79
+
80
+ ## Error Handling
81
+
82
+ - **Data Loading Failures**: Falls back to predefined model list for testing
83
+ - **Missing Model Data**: Shows "No data available" message in visualizations
84
+ - **Empty Results**: Gracefully handles cases with no test results
85
+
86
+ ## Performance Considerations
87
+
88
+ - **Memory Management**: Matplotlib configured to prevent memory warnings
89
+ - **Interactive Mode**: Disabled to prevent figure accumulation
90
+ - **Auto-reload**: Background threading with daemon timers
91
+ - **Data Caching**: Global variables store loaded data between UI updates
app.py CHANGED
@@ -1,76 +1,30 @@
1
  import matplotlib.pyplot as plt
2
  import matplotlib
3
- import numpy as np
4
  import pandas as pd
5
  import gradio as gr
6
  import threading
7
- import time
8
- from datetime import datetime
9
- from data import get_data
 
10
 
11
  # Configure matplotlib to prevent memory warnings and set dark background
12
- matplotlib.rcParams['figure.max_open_warning'] = 0
13
  matplotlib.rcParams['figure.facecolor'] = '#000000'
14
  matplotlib.rcParams['axes.facecolor'] = '#000000'
15
  matplotlib.rcParams['savefig.facecolor'] = '#000000'
16
  plt.ioff() # Turn off interactive mode to prevent figure accumulation
17
 
18
- # Global variables for data
19
- df = pd.DataFrame()
20
- available_models = []
21
- last_update_time = None
22
-
23
- def load_data():
24
- """Load data from the data source."""
25
- global df, available_models, last_update_time
26
- try:
27
- print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Loading data...")
28
- new_df = get_data()
29
- new_models = new_df.index.tolist()
30
-
31
- # Update global variables
32
- df = new_df
33
- available_models = new_models
34
- last_update_time = datetime.now()
35
-
36
- print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Data loaded successfully: {len(available_models)} models")
37
- print(f"Models: {available_models[:5]}{'...' if len(available_models) > 5 else ''}")
38
-
39
- return True
40
- except Exception as e:
41
- print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Error loading data: {e}")
42
- return False
43
-
44
- def schedule_data_reload():
45
- """Schedule the next data reload."""
46
- def reload_data():
47
- load_data()
48
- # Schedule the next reload in 15 minutes (900 seconds)
49
- timer = threading.Timer(900.0, reload_data)
50
- timer.daemon = True # Dies when main thread dies
51
- timer.start()
52
- print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Next data reload scheduled in 15 minutes")
53
-
54
- # Start the first reload timer
55
- timer = threading.Timer(900.0, reload_data)
56
- timer.daemon = True
57
- timer.start()
58
- print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Data auto-reload scheduled every 15 minutes")
59
 
60
  # Load data once at startup
61
- if not load_data():
62
- print("WARNING: Failed to load data! Adding fallback models.")
63
- available_models = ["auto", "bert", "clip", "llama", "t5"] # Fallback models for testing
64
-
65
  # Start the auto-reload scheduler
66
- schedule_data_reload()
67
 
68
- def generate_underlined_line(text: str) -> str:
69
- return text + "\n" + "─" * len(text) + "\n"
70
 
71
  def plot_model_stats(model_name: str) -> tuple[plt.Figure, str, str]:
72
  """Draws a pie chart of model's passed, failed, skipped, and error stats."""
73
- if df.empty or model_name not in df.index:
74
  # Handle case where model data is not available
75
  fig, ax = plt.subplots(figsize=(10, 8), facecolor='#000000')
76
  ax.set_facecolor('#000000')
@@ -83,7 +37,7 @@ def plot_model_stats(model_name: str) -> tuple[plt.Figure, str, str]:
83
  ax.axis('off')
84
  return fig, "No data available", "No data available"
85
 
86
- row = df.loc[model_name]
87
 
88
  # Handle missing values and get counts directly from dataframe
89
  success_amd = int(row.get('success_amd', 0)) if pd.notna(row.get('success_amd', 0)) else 0
@@ -276,139 +230,7 @@ def extract_failure_info(failures_obj, device: str, multi_count: int, single_cou
276
  return f"Failures detected on {device} (Multi: {multi_count}, Single: {single_count})\nDetails unavailable: {str(e)}"
277
  return f"Error processing failure info for {device}: {str(e)}"
278
 
279
- def create_summary_page() -> plt.Figure:
280
- """Create a summary page with model names and both AMD/NVIDIA test stats bars."""
281
- if df.empty:
282
- fig, ax = plt.subplots(figsize=(16, 8), facecolor='#000000')
283
- ax.set_facecolor('#000000')
284
- ax.text(0.5, 0.5, 'No data available',
285
- horizontalalignment='center', verticalalignment='center',
286
- transform=ax.transAxes, fontsize=20, color='#888888',
287
- fontfamily='monospace', weight='normal')
288
- ax.axis('off')
289
- return fig
290
-
291
- fig, ax = plt.subplots(figsize=(16, len(available_models) * 2.5 + 2), facecolor='#000000')
292
- ax.set_facecolor('#000000')
293
-
294
- colors = {
295
- 'passed': '#4CAF50',
296
- 'failed': '#E53E3E',
297
- 'skipped': '#FFD54F',
298
- 'error': '#8B0000'
299
- }
300
-
301
- visible_model_count = 0
302
- max_y = 0
303
-
304
- for i, model_name in enumerate(available_models):
305
- if model_name not in df.index:
306
- continue
307
-
308
- row = df.loc[model_name]
309
-
310
- # Get values directly from dataframe
311
- success_amd = int(row.get('success_amd', 0)) if pd.notna(row.get('success_amd', 0)) else 0
312
- success_nvidia = int(row.get('success_nvidia', 0)) if pd.notna(row.get('success_nvidia', 0)) else 0
313
- failed_multi_amd = int(row.get('failed_multi_no_amd', 0)) if pd.notna(row.get('failed_multi_no_amd', 0)) else 0
314
- failed_multi_nvidia = int(row.get('failed_multi_no_nvidia', 0)) if pd.notna(row.get('failed_multi_no_nvidia', 0)) else 0
315
- failed_single_amd = int(row.get('failed_single_no_amd', 0)) if pd.notna(row.get('failed_single_no_amd', 0)) else 0
316
- failed_single_nvidia = int(row.get('failed_single_no_nvidia', 0)) if pd.notna(row.get('failed_single_no_nvidia', 0)) else 0
317
-
318
- # Calculate stats
319
- amd_stats = {
320
- 'passed': success_amd,
321
- 'failed': failed_multi_amd + failed_single_amd,
322
- 'skipped': 0,
323
- 'error': 0
324
- }
325
-
326
- nvidia_stats = {
327
- 'passed': success_nvidia,
328
- 'failed': failed_multi_nvidia + failed_single_nvidia,
329
- 'skipped': 0,
330
- 'error': 0
331
- }
332
-
333
- amd_total = sum(amd_stats.values())
334
- nvidia_total = sum(nvidia_stats.values())
335
-
336
- if amd_total == 0 and nvidia_total == 0:
337
- continue
338
-
339
- # Position for this model - use visible model count for spacing
340
- y_base = (2.2 + visible_model_count) * 1.8
341
- y_model_name = y_base # Model name above AMD bar
342
- y_amd_bar = y_base + 0.45 # AMD bar
343
- y_nvidia_bar = y_base + 0.97 # NVIDIA bar
344
- max_y = max(max_y, y_nvidia_bar + 0.5)
345
-
346
- # Model name centered above the AMD bar
347
- left_0 = 8
348
- bar_length = 92
349
- ax.text(bar_length / 2 + left_0, y_model_name, f"{model_name.lower()}",
350
- ha='center', va='center', color='#FFFFFF',
351
- fontsize=20, fontfamily='monospace', fontweight='bold')
352
-
353
- # AMD label and bar on the same level
354
- if amd_total > 0:
355
- ax.text(left_0 - 2, y_amd_bar, "amd",
356
- ha='right', va='center', color='#CCCCCC',
357
- fontsize=18, fontfamily='monospace', fontweight='normal')
358
-
359
- # AMD bar starts after labels
360
- left = left_0
361
- for category in ['passed', 'failed', 'skipped', 'error']:
362
- if amd_stats[category] > 0:
363
- width = amd_stats[category] / amd_total * bar_length
364
- ax.barh(y_amd_bar, width, left=left, height=0.405,
365
- color=colors[category], alpha=0.9)
366
- if width > 4:
367
- ax.text(left + width/2, y_amd_bar, str(amd_stats[category]),
368
- ha='center', va='center', color='black',
369
- fontweight='bold', fontsize=12, fontfamily='monospace')
370
- left += width
371
-
372
- # NVIDIA label and bar on the same level
373
- if nvidia_total > 0:
374
- ax.text(left_0 - 2, y_nvidia_bar, "nvidia",
375
- ha='right', va='center', color='#CCCCCC',
376
- fontsize=18, fontfamily='monospace', fontweight='normal')
377
-
378
- # NVIDIA bar starts after labels
379
- left = left_0
380
- for category in ['passed', 'failed', 'skipped', 'error']:
381
- if nvidia_stats[category] > 0:
382
- width = nvidia_stats[category] / nvidia_total * bar_length
383
- ax.barh(y_nvidia_bar, width, left=left, height=0.405,
384
- color=colors[category], alpha=0.9)
385
- if width > 4:
386
- ax.text(left + width/2, y_nvidia_bar, str(nvidia_stats[category]),
387
- ha='center', va='center', color='black',
388
- fontweight='bold', fontsize=12, fontfamily='monospace')
389
- left += width
390
-
391
- # Increment counter for next visible model
392
- visible_model_count += 1
393
-
394
- # Style the axes to be completely invisible and span full width
395
- ax.set_xlim(0, 100)
396
- ax.set_ylim(-0.5, max_y)
397
- ax.set_xlabel('')
398
- ax.set_ylabel('')
399
- ax.spines['bottom'].set_visible(False)
400
- ax.spines['left'].set_visible(False)
401
- ax.spines['top'].set_visible(False)
402
- ax.spines['right'].set_visible(False)
403
- ax.set_xticks([])
404
- ax.set_yticks([])
405
- ax.yaxis.set_inverted(True)
406
-
407
- # Remove all margins to make bars span full width
408
- plt.tight_layout()
409
- plt.subplots_adjust(left=0.02, right=0.98, top=0.98, bottom=0.02)
410
-
411
- return fig
412
 
413
  # Load CSS from external file
414
  def load_css():
@@ -416,7 +238,7 @@ def load_css():
416
  with open("styles.css", "r") as f:
417
  return f.read()
418
  except FileNotFoundError:
419
- print("Warning: styles.css not found, using minimal default styles")
420
  return "body { background: #000; color: #fff; }"
421
 
422
  # Create the Gradio interface with sidebar and dark theme
@@ -429,8 +251,8 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css()) as demo:
429
  gr.Markdown("**Transformer CI Dashboard**\n\n*Result overview by model and hardware*\n")
430
 
431
  # Data status indicator
432
- if last_update_time:
433
- status_text = f"📊 **Updated:** {last_update_time.strftime('%H:%M')}\n\n*Auto-refresh: 15min*"
434
  else:
435
  status_text = f"📊 **Loading...**\n\n*Auto-refresh: 15min*"
436
  status_display = gr.Markdown(status_text)
@@ -456,9 +278,9 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css()) as demo:
456
 
457
  # Back to simple buttons that work
458
  # Model selector dropdown - much better for long lists
459
- gr.Markdown(f"**Select Model ({len(available_models)}):**")
460
 
461
- model_choices = [model.lower() for model in available_models] if available_models else ["auto", "bert", "clip", "llama"]
462
  model_dropdown = gr.Dropdown(
463
  choices=model_choices,
464
  value=model_choices[0] if model_choices else "auto",
@@ -471,7 +293,7 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css()) as demo:
471
  with gr.Column(scale=4, elem_classes=["main-content"]):
472
  # Summary display (default view)
473
  summary_display = gr.Plot(
474
- value=create_summary_page(),
475
  label="",
476
  format="png",
477
  elem_classes=["plot-container"],
@@ -522,7 +344,7 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css()) as demo:
522
  # Summary button click handler
523
  def show_summary_and_update_links():
524
  """Show summary page and update CI links."""
525
- return create_summary_page(), get_ci_links()
526
 
527
  summary_button.click(
528
  fn=show_summary_and_update_links,
@@ -535,8 +357,8 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css()) as demo:
535
  # Function to get current status text
536
  def get_status_text():
537
  """Get current status text with last update time."""
538
- if last_update_time:
539
- return f"📊 **Updated:** {last_update_time.strftime('%H:%M')}\n\n*Auto-refresh: 15min*"
540
  else:
541
  return f"📊 **Loading...**\n\n*Auto-refresh: 15min*"
542
 
@@ -545,7 +367,7 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css()) as demo:
545
  """Get CI job links from the most recent data."""
546
  try:
547
  # Check if df exists and is not empty
548
- if 'df' not in globals() or df is None or df.empty:
549
  return "🔗 **CI Jobs:** *Loading...*"
550
 
551
  # Get links from any available model (they should be the same for all models in a run)
@@ -554,8 +376,8 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css()) as demo:
554
  nvidia_multi_link = None
555
  nvidia_single_link = None
556
 
557
- for model_name in df.index:
558
- row = df.loc[model_name]
559
 
560
  # Extract AMD links
561
  if pd.notna(row.get('job_link_amd')) and (not amd_multi_link or not amd_single_link):
@@ -603,19 +425,14 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css()) as demo:
603
 
604
  return links_md
605
  except Exception as e:
606
- print(f"Error getting CI links: {e}")
607
  return "🔗 **CI Jobs:** *Error loading links*"
608
 
609
  # Refresh button click handler
610
  def refresh_data_and_status():
611
  """Manual data refresh triggered by user."""
612
- success = load_data()
613
- if success:
614
- # Return updated summary page, status, and CI links
615
- return create_summary_page(), get_status_text(), get_ci_links()
616
- else:
617
- # Return current summary page, status, and CI links if reload failed
618
- return create_summary_page(), get_status_text(), get_ci_links()
619
 
620
  refresh_button.click(
621
  fn=refresh_data_and_status,
 
1
  import matplotlib.pyplot as plt
2
  import matplotlib
 
3
  import pandas as pd
4
  import gradio as gr
5
  import threading
6
+
7
+ from data import CIResults
8
+ from utils import logger, generate_underlined_line
9
+ from summary_page import create_summary_page
10
 
11
  # Configure matplotlib to prevent memory warnings and set dark background
 
12
  matplotlib.rcParams['figure.facecolor'] = '#000000'
13
  matplotlib.rcParams['axes.facecolor'] = '#000000'
14
  matplotlib.rcParams['savefig.facecolor'] = '#000000'
15
  plt.ioff() # Turn off interactive mode to prevent figure accumulation
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  # Load data once at startup
19
+ Ci_results = CIResults()
20
+ Ci_results.load_data()
 
 
21
  # Start the auto-reload scheduler
22
+ Ci_results.schedule_data_reload()
23
 
 
 
24
 
25
  def plot_model_stats(model_name: str) -> tuple[plt.Figure, str, str]:
26
  """Draws a pie chart of model's passed, failed, skipped, and error stats."""
27
+ if Ci_results.df.empty or model_name not in Ci_results.df.index:
28
  # Handle case where model data is not available
29
  fig, ax = plt.subplots(figsize=(10, 8), facecolor='#000000')
30
  ax.set_facecolor('#000000')
 
37
  ax.axis('off')
38
  return fig, "No data available", "No data available"
39
 
40
+ row = Ci_results.df.loc[model_name]
41
 
42
  # Handle missing values and get counts directly from dataframe
43
  success_amd = int(row.get('success_amd', 0)) if pd.notna(row.get('success_amd', 0)) else 0
 
230
  return f"Failures detected on {device} (Multi: {multi_count}, Single: {single_count})\nDetails unavailable: {str(e)}"
231
  return f"Error processing failure info for {device}: {str(e)}"
232
 
233
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
 
235
  # Load CSS from external file
236
  def load_css():
 
238
  with open("styles.css", "r") as f:
239
  return f.read()
240
  except FileNotFoundError:
241
+ logger.warning("styles.css not found, using minimal default styles")
242
  return "body { background: #000; color: #fff; }"
243
 
244
  # Create the Gradio interface with sidebar and dark theme
 
251
  gr.Markdown("**Transformer CI Dashboard**\n\n*Result overview by model and hardware*\n")
252
 
253
  # Data status indicator
254
+ if Ci_results.last_update_time:
255
+ status_text = f"📊 **Updated:** {Ci_results.last_update_time}\n\n*Auto-refresh: 15min*"
256
  else:
257
  status_text = f"📊 **Loading...**\n\n*Auto-refresh: 15min*"
258
  status_display = gr.Markdown(status_text)
 
278
 
279
  # Back to simple buttons that work
280
  # Model selector dropdown - much better for long lists
281
+ gr.Markdown(f"**Select Model ({len(Ci_results.available_models)}):**")
282
 
283
+ model_choices = [model.lower() for model in Ci_results.available_models] if Ci_results.available_models else ["auto", "bert", "clip", "llama"]
284
  model_dropdown = gr.Dropdown(
285
  choices=model_choices,
286
  value=model_choices[0] if model_choices else "auto",
 
293
  with gr.Column(scale=4, elem_classes=["main-content"]):
294
  # Summary display (default view)
295
  summary_display = gr.Plot(
296
+ value=create_summary_page(Ci_results.df, Ci_results.available_models),
297
  label="",
298
  format="png",
299
  elem_classes=["plot-container"],
 
344
  # Summary button click handler
345
  def show_summary_and_update_links():
346
  """Show summary page and update CI links."""
347
+ return create_summary_page(Ci_results.df, Ci_results.available_models), get_ci_links()
348
 
349
  summary_button.click(
350
  fn=show_summary_and_update_links,
 
357
  # Function to get current status text
358
  def get_status_text():
359
  """Get current status text with last update time."""
360
+ if Ci_results.last_update_time:
361
+ return f"📊 **Updated:** {Ci_results.last_update_time}\n\n*Auto-refresh: 15min*"
362
  else:
363
  return f"📊 **Loading...**\n\n*Auto-refresh: 15min*"
364
 
 
367
  """Get CI job links from the most recent data."""
368
  try:
369
  # Check if df exists and is not empty
370
+ if Ci_results.df is None or Ci_results.df.empty:
371
  return "🔗 **CI Jobs:** *Loading...*"
372
 
373
  # Get links from any available model (they should be the same for all models in a run)
 
376
  nvidia_multi_link = None
377
  nvidia_single_link = None
378
 
379
+ for model_name in Ci_results.df.index:
380
+ row = Ci_results.df.loc[model_name]
381
 
382
  # Extract AMD links
383
  if pd.notna(row.get('job_link_amd')) and (not amd_multi_link or not amd_single_link):
 
425
 
426
  return links_md
427
  except Exception as e:
428
+ logger.error(f"getting CI links: {e}")
429
  return "🔗 **CI Jobs:** *Error loading links*"
430
 
431
  # Refresh button click handler
432
  def refresh_data_and_status():
433
  """Manual data refresh triggered by user."""
434
+ Ci_results.load_data()
435
+ return create_summary_page(Ci_results.df, Ci_results.available_models), get_status_text(), get_ci_links()
 
 
 
 
 
436
 
437
  refresh_button.click(
438
  fn=refresh_data_and_status,
data.py CHANGED
@@ -1,5 +1,9 @@
1
- from huggingface_hub import HfApi, HfFileSystem, login
2
  import pandas as pd
 
 
 
 
3
 
4
  fs = HfFileSystem()
5
 
@@ -29,39 +33,26 @@ IMPORTANT_MODELS = [
29
  ]
30
 
31
 
32
- def get_data():
33
- files_amd = fs.glob(
34
- "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
35
- )
36
- files_amd.sort(reverse=True)
37
-
38
- df_amd = pd.read_json(f"hf://{files_amd[0]}", orient="index")
39
- df_amd.index.name = "model_name"
40
- df_amd["failed_multi_no_amd"] = df_amd["failures"].apply(
41
- lambda x: len(x["multi"]) if "multi" in x else 0
42
- )
43
- df_amd["failed_single_no_amd"] = df_amd["failures"].apply(
44
- lambda x: len(x["single"]) if "single" in x else 0
45
- )
46
-
47
- files_nvidia = fs.glob(
48
- "hf://datasets/hf-internal-testing/transformers_daily_ci/**/ci_results_run_models_gpu/model_results.json"
49
- )
50
- files_nvidia.sort(reverse=True)
51
-
52
- df_nvidia = pd.read_json(
53
- f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/main/{files_nvidia[0].lstrip('datasets/hf-internal-testing/transformers_daily_ci/')}",
54
- orient="index",
55
- )
56
- df_nvidia.index.name = "model_name"
57
- df_nvidia["failed_multi_no_nvidia"] = df_nvidia["failures"].apply(
58
- lambda x: len(x["multi"]) if "multi" in x else 0
59
- )
60
- df_nvidia["failed_single_no_nvidia"] = df_nvidia["failures"].apply(
61
- lambda x: len(x["single"]) if "single" in x else 0
62
- )
63
- df_nvidia
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  joined = df_amd.join(df_nvidia, rsuffix="_nvidia", lsuffix="_amd", how="outer")
66
  joined = joined[
67
  [
@@ -77,10 +68,58 @@ def get_data():
77
  "job_link_nvidia",
78
  ]
79
  ]
80
-
81
  joined.index = joined.index.str.replace("^models_", "", regex=True)
82
-
83
  important_models_lower = [model.lower() for model in IMPORTANT_MODELS]
84
  filtered_joined = joined[joined.index.str.lower().isin(important_models_lower)]
85
-
86
  return filtered_joined
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import HfFileSystem
2
  import pandas as pd
3
+ from utils import logger
4
+ import os
5
+ from datetime import datetime
6
+ import threading
7
 
8
  fs = HfFileSystem()
9
 
 
33
  ]
34
 
35
 
36
+ def read_one_dataframe(json_path: str, device_label: str) -> pd.DataFrame:
37
+ df = pd.read_json(json_path, orient="index")
38
+ df.index.name = "model_name"
39
+ df[f"failed_multi_no_{device_label}"] = df["failures"].apply(lambda x: len(x["multi"]) if "multi" in x else 0)
40
+ df[f"failed_single_no_{device_label}"] = df["failures"].apply(lambda x: len(x["single"]) if "single" in x else 0)
41
+ return df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ def get_distant_data() -> pd.DataFrame:
44
+ # Retrieve AMD dataframe
45
+ amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
46
+ files_amd = sorted(fs.glob(amd_src), reverse=True)
47
+ df_amd = read_one_dataframe(f"hf://{files_amd[0]}", "amd")
48
+ # Retrieve NVIDIA dataframe
49
+ nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/**/ci_results_run_models_gpu/model_results.json"
50
+ files_nvidia = sorted(fs.glob(nvidia_src), reverse=True)
51
+ # NOTE: should this be removeprefix instead of lstrip?
52
+ nvidia_path = files_nvidia[0].lstrip('datasets/hf-internal-testing/transformers_daily_ci/')
53
+ nvidia_path = "https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/main/" + nvidia_path
54
+ df_nvidia = read_one_dataframe(nvidia_path, "nvidia")
55
+ # Join both dataframes
56
  joined = df_amd.join(df_nvidia, rsuffix="_nvidia", lsuffix="_amd", how="outer")
57
  joined = joined[
58
  [
 
68
  "job_link_nvidia",
69
  ]
70
  ]
 
71
  joined.index = joined.index.str.replace("^models_", "", regex=True)
72
+ # Fitler out all but important models
73
  important_models_lower = [model.lower() for model in IMPORTANT_MODELS]
74
  filtered_joined = joined[joined.index.str.lower().isin(important_models_lower)]
 
75
  return filtered_joined
76
+
77
+
78
+ def get_sample_data() -> pd.DataFrame:
79
+ path = os.path.join(os.path.dirname(__file__), "sample_data.csv")
80
+ df = pd.read_csv(path)
81
+ df = df.set_index("model_name")
82
+ return df
83
+
84
+
85
+
86
+ class CIResults:
87
+
88
+ def __init__(self):
89
+ self.df = pd.DataFrame()
90
+ self.available_models = []
91
+ self.last_update_time = ""
92
+
93
+ def load_data(self) -> None:
94
+ """Load data from the data source."""
95
+ # Try loading the distant data, and fall back on sample data for local tinkering
96
+ try:
97
+ logger.info("Loading distant data...")
98
+ new_df = get_distant_data()
99
+ except Exception as e:
100
+ logger.error(f"Loading data failed: {e}")
101
+ logger.warning("Falling back on sample data.")
102
+ new_df = get_sample_data()
103
+ # Update attributes
104
+ self.df = new_df
105
+ self.available_models = new_df.index.tolist()
106
+ self.last_update_time = datetime.now().strftime('%H:%M')
107
+ # Log and return distant load status
108
+ logger.info(f"Data loaded successfully: {len(self.available_models)} models")
109
+ logger.info(f"Models: {self.available_models[:5]}{'...' if len(self.available_models) > 5 else ''}")
110
+
111
+ def schedule_data_reload(self):
112
+ """Schedule the next data reload."""
113
+ def reload_data():
114
+ self.load_data()
115
+ # Schedule the next reload in 15 minutes (900 seconds)
116
+ timer = threading.Timer(900.0, reload_data)
117
+ timer.daemon = True # Dies when main thread dies
118
+ timer.start()
119
+ logger.info("Next data reload scheduled in 15 minutes")
120
+
121
+ # Start the first reload timer
122
+ timer = threading.Timer(900.0, reload_data)
123
+ timer.daemon = True
124
+ timer.start()
125
+ logger.info("Data auto-reload scheduled every 15 minutes")
sample_data.csv ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name,success_amd,success_nvidia,failed_multi_no_amd,failed_multi_no_nvidia,failed_single_no_amd,failed_single_no_nvidia,failures_amd,failures_nvidia,job_link_amd,job_link_nvidia
2
+ sample_auto,80,226,0,0,0,0,{},{},"{'multi': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447501262', 'single': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447500785'}","{'single': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526561673', 'multi': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526561472'}"
3
+ sample_bert,239,527,2,2,2,2,"{'multi': [{'line': 'tests/models/bert/test_modeling_bert.py::BertModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4125) KeyError: 'eager'""}, {'line': 'tests/models/bert/test_modeling_bert.py::BertModelTest::test_sdpa_padding_matches_padding_free_with_position_ids', 'trace': '(line 4201) AssertionError: Tensor-likes are not equal!'}], 'single': [{'line': 'tests/models/bert/test_modeling_bert.py::BertModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4125) KeyError: 'eager'""}, {'line': 'tests/models/bert/test_modeling_bert.py::BertModelTest::test_sdpa_padding_matches_padding_free_with_position_ids', 'trace': '(line 4201) AssertionError: Tensor-likes are not equal!'}]}","{'single': [{'line': 'tests/models/bert/test_modeling_bert.py::BertModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}, {'line': 'tests/models/bert/test_modeling_bert.py::BertModelTest::test_sdpa_padding_matches_padding_free_with_position_ids', 'trace': '(line 4216) AssertionError: Tensor-likes are not equal!'}], 'multi': [{'line': 'tests/models/bert/test_modeling_bert.py::BertModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}, {'line': 'tests/models/bert/test_modeling_bert.py::BertModelTest::test_sdpa_padding_matches_padding_free_with_position_ids', 'trace': '(line 4216) AssertionError: Tensor-likes are not equal!'}]}","{'multi': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447501282', 'single': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447500788'}","{'single': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526561709', 'multi': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526561482'}"
4
+ clip,288,660,0,0,0,0,{},{},"{'single': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447500866', 'multi': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447501323'}","{'multi': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526561994', 'single': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562125'}"
5
+ detr,69,177,4,0,4,0,"{'multi': [{'line': 'tests/models/detr/test_modeling_detr.py::DetrModelIntegrationTestsTimmBackbone::test_inference_no_head', 'trace': '(line 595) AssertionError: Tensor-likes are not close!'}, {'line': 'tests/models/detr/test_modeling_detr.py::DetrModelIntegrationTestsTimmBackbone::test_inference_object_detection_head', 'trace': '(line 619) AssertionError: Tensor-likes are not close!'}, {'line': 'tests/models/detr/test_modeling_detr.py::DetrModelIntegrationTestsTimmBackbone::test_inference_panoptic_segmentation_head', 'trace': '(line 667) AssertionError: Tensor-likes are not close!'}, {'line': 'tests/models/detr/test_modeling_detr.py::DetrModelIntegrationTests::test_inference_no_head', 'trace': '(line 741) AssertionError: Tensor-likes are not close!'}], 'single': [{'line': 'tests/models/detr/test_modeling_detr.py::DetrModelIntegrationTestsTimmBackbone::test_inference_no_head', 'trace': '(line 595) AssertionError: Tensor-likes are not close!'}, {'line': 'tests/models/detr/test_modeling_detr.py::DetrModelIntegrationTestsTimmBackbone::test_inference_object_detection_head', 'trace': '(line 619) AssertionError: Tensor-likes are not close!'}, {'line': 'tests/models/detr/test_modeling_detr.py::DetrModelIntegrationTestsTimmBackbone::test_inference_panoptic_segmentation_head', 'trace': '(line 667) AssertionError: Tensor-likes are not close!'}, {'line': 'tests/models/detr/test_modeling_detr.py::DetrModelIntegrationTests::test_inference_no_head', 'trace': '(line 741) AssertionError: Tensor-likes are not close!'}]}",{},"{'multi': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447501397', 'single': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447500969'}","{'single': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562517', 'multi': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562397'}"
6
+ gemma3,349,499,8,8,7,7,"{'single': [{'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3ModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4125) KeyError: 'eager'""}, {'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3ModelTest::test_generate_compilation_all_outputs', 'trace': ""(line 317) torch._dynamo.exc.Unsupported: isinstance(NestedUserFunctionVariable(), TorchInGraphFunctionVariable(<class 'torch.nn.parameter.Parameter'>)): can't determine type of NestedUserFunctionVariable()""}, {'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3Vision2TextModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4125) KeyError: 'eager'""}, {'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3Vision2TextModelTest::test_generate_compilation_all_outputs', 'trace': ""(line 317) torch._dynamo.exc.Unsupported: isinstance(NestedUserFunctionVariable(), TorchInGraphFunctionVariable(<class 'torch.nn.parameter.Parameter'>)): can't determine type of NestedUserFunctionVariable()""}, {'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_4b_batch', 'trace': ""(line 675) AssertionError: Lists differ: ['use[374 chars]t scenes:\\n\\n* **Image 1** shows a cow on a beach.\\n'] != ['use[374 chars]t scenes. \\n\\n* **Image 1** shows a cow standing on a beach']""}, {'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_4b_batch_crops', 'trace': ""(line 675) AssertionError: Lists differ: ['use[251 chars]. The sky is blue with some white clouds. It’s[405 chars]h a'] != ['use[251 chars]. There are clouds in the blue sky above.', 'u[398 chars]h a']""}, {'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_4b_bf16', 'trace': ""(line 675) AssertionError: Lists differ: ['use[154 chars]each next to a turquoise ocean. There are some[16 chars]lue'] != ['use[154 chars]each with turquoise water and a distant coastl[28 chars]oks']""}], 'multi': [{'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3ModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4125) KeyError: 'eager'""}, {'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3ModelTest::test_generate_compilation_all_outputs', 'trace': ""(line 317) torch._dynamo.exc.Unsupported: isinstance(NestedUserFunctionVariable(), TorchInGraphFunctionVariable(<class 'torch.nn.parameter.Parameter'>)): can't determine type of NestedUserFunctionVariable()""}, {'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3ModelTest::test_sdpa_padding_matches_padding_free_with_position_ids', 'trace': '(line 4204) AssertionError: Tensor-likes are not close!'}, {'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3Vision2TextModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4125) KeyError: 'eager'""}, {'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3Vision2TextModelTest::test_generate_compilation_all_outputs', 'trace': ""(line 317) torch._dynamo.exc.Unsupported: isinstance(NestedUserFunctionVariable(), TorchInGraphFunctionVariable(<class 'torch.nn.parameter.Parameter'>)): can't determine type of NestedUserFunctionVariable()""}, {'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_4b_batch', 'trace': ""(line 675) AssertionError: Lists differ: ['use[374 chars]t scenes:\\n\\n* **Image 1** shows a cow on a beach.\\n'] != ['use[374 chars]t scenes. \\n\\n* **Image 1** shows a cow standing on a beach']""}, {'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_4b_batch_crops', 'trace': ""(line 675) AssertionError: Lists differ: ['use[251 chars]. The sky is blue with some white clouds. It’s[405 chars]h a'] != ['use[251 chars]. There are clouds in the blue sky above.', 'u[398 chars]h a']""}, {'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_4b_bf16', 'trace': ""(line 675) AssertionError: Lists differ: ['use[154 chars]each next to a turquoise ocean. There are some[16 chars]lue'] != ['use[154 chars]each with turquoise water and a distant coastl[28 chars]oks']""}]}","{'single': [{'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3ModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}, {'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3ModelTest::test_sdpa_padding_matches_padding_free_with_position_ids', 'trace': '(line 4216) AssertionError: Tensor-likes are not equal!'}, {'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3Vision2TextModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}, {'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_export_text_only_with_hybrid_cache', 'trace': ""(line 1642) torch._dynamo.exc.TorchRuntimeError: Dynamo failed to run FX node with fake tensors: call_function <built-in function scaled_dot_product_attention>(*(FakeTensor(..., size=(1, 4, 1, 256), grad_fn=<AddBackward0>), FakeTensor(..., size=(1, 4, 4096, 256), grad_fn=<CloneBackward0>), FakeTensor(..., size=(1, 4, 4096, 256), grad_fn=<CloneBackward0>)), **{'attn_mask': FakeTensor(..., size=(1, 1, 1, 512), dtype=torch.bool), 'dropout_p': 0.0, 'scale': 0.0625, 'is_causal': False}): got RuntimeError('Attempting to broadcast a dimension of length 512 at -1! Mismatching argument at index 1 had torch.Size([1, 1, 1, 512]); but expected shape should be broadcastable to [1, 4, 1, 4096]')""}, {'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_generation_beyond_sliding_window_1_sdpa', 'trace': '(line 81) RuntimeError: The expanded size of the tensor (4826) must match the existing size (4807) at non-singleton dimension 3. Target sizes: [2, 4, 4807, 4826]. Tensor sizes: [2, 1, 4807, 4807]'}, {'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_generation_beyond_sliding_window_2_eager', 'trace': '(line 265) RuntimeError: The size of tensor a (4826) must match the size of tensor b (4807) at non-singleton dimension 3'}, {'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_4b_batch_crops', 'trace': '(line 81) RuntimeError: The expanded size of the tensor (1646) must match the existing size (1617) at non-singleton dimension 3. Target sizes: [2, 8, 1617, 1646]. Tensor sizes: [2, 1, 1617, 1617]'}], 'multi': [{'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3ModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}, {'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3ModelTest::test_sdpa_padding_matches_padding_free_with_position_ids', 'trace': '(line 4219) AssertionError: Tensor-likes are not close!'}, {'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3Vision2TextModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}, {'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3Vision2TextModelTest::test_model_parallelism', 'trace': '(line 925) RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:1 and cuda:0!'}, {'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_export_text_only_with_hybrid_cache', 'trace': ""(line 1642) torch._dynamo.exc.TorchRuntimeError: Dynamo failed to run FX node with fake tensors: call_function <built-in function scaled_dot_product_attention>(*(FakeTensor(..., size=(1, 4, 1, 256), grad_fn=<AddBackward0>), FakeTensor(..., size=(1, 4, 4096, 256), grad_fn=<CloneBackward0>), FakeTensor(..., size=(1, 4, 4096, 256), grad_fn=<CloneBackward0>)), **{'attn_mask': FakeTensor(..., size=(1, 1, 1, 512), dtype=torch.bool), 'dropout_p': 0.0, 'scale': 0.0625, 'is_causal': False}): got RuntimeError('Attempting to broadcast a dimension of length 512 at -1! Mismatching argument at index 1 had torch.Size([1, 1, 1, 512]); but expected shape should be broadcastable to [1, 4, 1, 4096]')""}, {'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_generation_beyond_sliding_window_1_sdpa', 'trace': '(line 81) RuntimeError: The expanded size of the tensor (4826) must match the existing size (4807) at non-singleton dimension 3. Target sizes: [2, 4, 4807, 4826]. Tensor sizes: [2, 1, 4807, 4807]'}, {'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_generation_beyond_sliding_window_2_eager', 'trace': '(line 265) RuntimeError: The size of tensor a (4826) must match the size of tensor b (4807) at non-singleton dimension 3'}, {'line': 'tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_4b_batch_crops', 'trace': '(line 81) RuntimeError: The expanded size of the tensor (1646) must match the existing size (1617) at non-singleton dimension 3. Target sizes: [2, 8, 1617, 1646]. Tensor sizes: [2, 1, 1617, 1617]'}]}","{'single': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447501046', 'multi': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447501545'}","{'single': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563053', 'multi': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562857'}"
7
+ gemma3n,0,286,0,2,0,1,{},"{'multi': [{'line': 'tests/models/gemma3n/test_modeling_gemma3n.py::Gemma3nTextModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}, {'line': 'tests/models/gemma3n/test_modeling_gemma3n.py::Gemma3nTextModelTest::test_multi_gpu_data_parallel_forward', 'trace': ""(line 1305) AttributeError: 'DynamicCache' object has no attribute 'layers'""}], 'single': [{'line': 'tests/models/gemma3n/test_modeling_gemma3n.py::Gemma3nTextModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}]}","{'single': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447501047', 'multi': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447501538'}","{'multi': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562955', 'single': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563061'}"
8
+ got_ocr2,145,254,2,2,2,1,"{'multi': [{'line': 'tests/models/got_ocr2/test_modeling_got_ocr2.py::GotOcr2ModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4125) KeyError: 'eager'""}, {'line': 'tests/models/got_ocr2/test_modeling_got_ocr2.py::GotOcr2ModelTest::test_generate_compilation_all_outputs', 'trace': ""(line 317) torch._dynamo.exc.Unsupported: isinstance(NestedUserFunctionVariable(), TorchInGraphFunctionVariable(<class 'torch.nn.parameter.Parameter'>)): can't determine type of NestedUserFunctionVariable()""}], 'single': [{'line': 'tests/models/got_ocr2/test_modeling_got_ocr2.py::GotOcr2ModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4125) KeyError: 'eager'""}, {'line': 'tests/models/got_ocr2/test_modeling_got_ocr2.py::GotOcr2ModelTest::test_generate_compilation_all_outputs', 'trace': ""(line 317) torch._dynamo.exc.Unsupported: isinstance(NestedUserFunctionVariable(), TorchInGraphFunctionVariable(<class 'torch.nn.parameter.Parameter'>)): can't determine type of NestedUserFunctionVariable()""}]}","{'multi': [{'line': 'tests/models/got_ocr2/test_modeling_got_ocr2.py::GotOcr2ModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}, {'line': 'tests/models/got_ocr2/test_modeling_got_ocr2.py::GotOcr2ModelTest::test_multi_gpu_data_parallel_forward', 'trace': ""(line 1305) AttributeError: 'DynamicCache' object has no attribute 'layers'""}], 'single': [{'line': 'tests/models/got_ocr2/test_modeling_got_ocr2.py::GotOcr2ModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}]}","{'multi': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447501556', 'single': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447501063'}","{'multi': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562995', 'single': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563212'}"
9
+ gpt2,249,487,1,1,1,1,"{'single': [{'line': 'tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4125) KeyError: 'eager'""}], 'multi': [{'line': 'tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4125) KeyError: 'eager'""}]}","{'multi': [{'line': 'tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}], 'single': [{'line': 'tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}]}","{'single': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447501087', 'multi': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447501566'}","{'multi': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563001', 'single': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563255'}"
10
+ internvl,249,356,4,3,4,2,"{'single': [{'line': 'tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4125) KeyError: 'eager'""}, {'line': 'tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_generate_compilation_all_outputs', 'trace': ""(line 317) torch._dynamo.exc.Unsupported: isinstance(NestedUserFunctionVariable(), TorchInGraphFunctionVariable(<class 'torch.nn.parameter.Parameter'>)): can't determine type of NestedUserFunctionVariable()""}, {'line': 'tests/models/internvl/test_modeling_internvl.py::InternVLLlamaIntegrationTest::test_llama_small_model_integration_forward', 'trace': '(line 687) AssertionError: False is not true : Actual logits: tensor([ -9.8828, -0.5005, 1.4697, -10.3438, -10.3438], dtype=torch.float16)'}, {'line': 'tests/models/internvl/test_modeling_internvl.py::InternVLLlamaIntegrationTest::test_llama_small_model_integration_interleaved_images_videos', 'trace': ""(line 675) AssertionError: 'user[118 chars]nse. Upon closer inspection, the differences b[31 chars]. **' != 'user[118 chars]nse. After re-examining the images, I can see [13 chars]e no'""}], 'multi': [{'line': 'tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4125) KeyError: 'eager'""}, {'line': 'tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_generate_compilation_all_outputs', 'trace': ""(line 317) torch._dynamo.exc.Unsupported: isinstance(NestedUserFunctionVariable(), TorchInGraphFunctionVariable(<class 'torch.nn.parameter.Parameter'>)): can't determine type of NestedUserFunctionVariable()""}, {'line': 'tests/models/internvl/test_modeling_internvl.py::InternVLLlamaIntegrationTest::test_llama_small_model_integration_forward', 'trace': '(line 687) AssertionError: False is not true : Actual logits: tensor([ -9.8828, -0.5005, 1.4697, -10.3438, -10.3438], dtype=torch.float16)'}, {'line': 'tests/models/internvl/test_modeling_internvl.py::InternVLLlamaIntegrationTest::test_llama_small_model_integration_interleaved_images_videos', 'trace': ""(line 675) AssertionError: 'user[118 chars]nse. Upon closer inspection, the differences b[31 chars]. **' != 'user[118 chars]nse. After re-examining the images, I can see [13 chars]e no'""}]}","{'multi': [{'line': 'tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}, {'line': 'tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_flex_attention_with_grads', 'trace': '(line 439) torch._inductor.exc.InductorError: RuntimeError: No valid triton configs. OutOfResources: out of resource: shared memory, Required: 106496, Hardware limit: 101376. Reducing block sizes or `num_stages` may help.'}, {'line': 'tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_multi_gpu_data_parallel_forward', 'trace': ""(line 1305) AttributeError: 'DynamicCache' object has no attribute 'layers'""}], 'single': [{'line': 'tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}, {'line': 'tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_flex_attention_with_grads', 'trace': '(line 439) torch._inductor.exc.InductorError: RuntimeError: No valid triton configs. OutOfResources: out of resource: shared memory, Required: 106496, Hardware limit: 101376. Reducing block sizes or `num_stages` may help.'}]}","{'single': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447501143', 'multi': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447501636'}","{'multi': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563553', 'single': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563712'}"
11
+ llama,229,478,4,2,4,1,"{'multi': [{'line': 'tests/models/llama/test_modeling_llama.py::LlamaModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4125) KeyError: 'eager'""}, {'line': 'tests/models/llama/test_modeling_llama.py::LlamaModelTest::test_generate_compilation_all_outputs', 'trace': ""(line 317) torch._dynamo.exc.Unsupported: isinstance(NestedUserFunctionVariable(), TorchInGraphFunctionVariable(<class 'torch.nn.parameter.Parameter'>)): can't determine type of NestedUserFunctionVariable()""}, {'line': 'tests/models/llama/test_modeling_llama.py::LlamaModelTest::test_torch_compile_for_training', 'trace': '(line 951) AssertionError: expected size 2==2, stride 20==64 at dim=0; expected size 2==2, stride 10==32 at dim=1; expected size 10==32, stride 1==1 at dim=2'}, {'line': 'tests/models/llama/test_modeling_llama.py::LlamaIntegrationTest::test_model_7b_logits_bf16', 'trace': '(line 687) AssertionError: False is not true'}], 'single': [{'line': 'tests/models/llama/test_modeling_llama.py::LlamaModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4125) KeyError: 'eager'""}, {'line': 'tests/models/llama/test_modeling_llama.py::LlamaModelTest::test_generate_compilation_all_outputs', 'trace': ""(line 317) torch._dynamo.exc.Unsupported: isinstance(NestedUserFunctionVariable(), TorchInGraphFunctionVariable(<class 'torch.nn.parameter.Parameter'>)): can't determine type of NestedUserFunctionVariable()""}, {'line': 'tests/models/llama/test_modeling_llama.py::LlamaModelTest::test_torch_compile_for_training', 'trace': '(line 951) AssertionError: expected size 2==2, stride 20==64 at dim=0; expected size 2==2, stride 10==32 at dim=1; expected size 10==32, stride 1==1 at dim=2'}, {'line': 'tests/models/llama/test_modeling_llama.py::LlamaIntegrationTest::test_model_7b_logits_bf16', 'trace': '(line 687) AssertionError: False is not true'}]}","{'multi': [{'line': 'tests/models/llama/test_modeling_llama.py::LlamaModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}, {'line': 'tests/models/llama/test_modeling_llama.py::LlamaModelTest::test_multi_gpu_data_parallel_forward', 'trace': ""(line 1305) AttributeError: 'DynamicCache' object has no attribute 'layers'""}], 'single': [{'line': 'tests/models/llama/test_modeling_llama.py::LlamaModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}]}","{'multi': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447501675', 'single': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447501165'}","{'multi': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563871', 'single': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526564103'}"
12
+ llava,201,346,5,4,4,3,"{'single': [{'line': 'tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4125) KeyError: 'eager'""}, {'line': 'tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_flex_attention_with_grads', 'trace': '(line 687) AssertionError: False is not true'}, {'line': 'tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_generate_compilation_all_outputs', 'trace': ""(line 317) torch._dynamo.exc.Unsupported: isinstance(NestedUserFunctionVariable(), TorchInGraphFunctionVariable(<class 'torch.nn.parameter.Parameter'>)): can't determine type of NestedUserFunctionVariable()""}, {'line': 'tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationIntegrationTest::test_batched_generation', 'trace': '(line 548) importlib.metadata.PackageNotFoundError: No package metadata was found for bitsandbytes'}], 'multi': [{'line': 'tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4125) KeyError: 'eager'""}, {'line': 'tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_flex_attention_with_grads', 'trace': '(line 687) AssertionError: False is not true'}, {'line': 'tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_generate_compilation_all_outputs', 'trace': ""(line 317) torch._dynamo.exc.Unsupported: isinstance(NestedUserFunctionVariable(), TorchInGraphFunctionVariable(<class 'torch.nn.parameter.Parameter'>)): can't determine type of NestedUserFunctionVariable()""}, {'line': 'tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_sdpa_padding_matches_padding_free_with_position_ids', 'trace': '(line 4182) IndexError: The shape of the mask [3, 23] at index 1 does not match the shape of the indexed tensor [3, 3, 8, 8] at index 1'}, {'line': 'tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationIntegrationTest::test_batched_generation', 'trace': '(line 548) importlib.metadata.PackageNotFoundError: No package metadata was found for bitsandbytes'}]}","{'multi': [{'line': 'tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}, {'line': 'tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_flex_attention_with_grads', 'trace': '(line 687) AssertionError: False is not true'}, {'line': 'tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_multi_gpu_data_parallel_forward', 'trace': ""(line 1305) AttributeError: 'DynamicCache' object has no attribute 'layers'""}, {'line': 'tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_sdpa_padding_matches_padding_free_with_position_ids', 'trace': '(line 4197) IndexError: The shape of the mask [3, 23] at index 1 does not match the shape of the indexed tensor [3, 3, 8, 8] at index 1'}], 'single': [{'line': 'tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}, {'line': 'tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_flex_attention_with_grads', 'trace': '(line 687) AssertionError: False is not true'}, {'line': 'tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_sdpa_padding_matches_padding_free_with_position_ids', 'trace': '(line 4197) IndexError: The shape of the mask [3, 23] at index 1 does not match the shape of the indexed tensor [3, 3, 8, 8] at index 1'}]}","{'single': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447501186', 'multi': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447501727'}","{'multi': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526564002', 'single': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526564108'}"
13
+ mistral3,197,286,3,2,3,1,"{'multi': [{'line': 'tests/models/mistral3/test_modeling_mistral3.py::Mistral3ModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4125) KeyError: 'eager'""}, {'line': 'tests/models/mistral3/test_modeling_mistral3.py::Mistral3ModelTest::test_generate_compilation_all_outputs', 'trace': ""(line 317) torch._dynamo.exc.Unsupported: isinstance(NestedUserFunctionVariable(), TorchInGraphFunctionVariable(<class 'torch.nn.parameter.Parameter'>)): can't determine type of NestedUserFunctionVariable()""}, {'line': 'tests/models/mistral3/test_modeling_mistral3.py::Mistral3IntegrationTest::test_mistral3_integration_batched_generate', 'trace': '(line 675) AssertionError: \'Calm waters reflect\\nWooden path to distant shore\\nSilence in the scene\' != ""Wooden path to calm,\\nReflections whisper secrets,\\nNature\'s peace unfolds.""'}], 'single': [{'line': 'tests/models/mistral3/test_modeling_mistral3.py::Mistral3ModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4125) KeyError: 'eager'""}, {'line': 'tests/models/mistral3/test_modeling_mistral3.py::Mistral3ModelTest::test_generate_compilation_all_outputs', 'trace': ""(line 317) torch._dynamo.exc.Unsupported: isinstance(NestedUserFunctionVariable(), TorchInGraphFunctionVariable(<class 'torch.nn.parameter.Parameter'>)): can't determine type of NestedUserFunctionVariable()""}, {'line': 'tests/models/mistral3/test_modeling_mistral3.py::Mistral3IntegrationTest::test_mistral3_integration_batched_generate', 'trace': '(line 675) AssertionError: \'Calm waters reflect\\nWooden path to distant shore\\nSilence in the scene\' != ""Wooden path to calm,\\nReflections whisper secrets,\\nNature\'s peace unfolds.""'}]}","{'single': [{'line': 'tests/models/mistral3/test_modeling_mistral3.py::Mistral3ModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}], 'multi': [{'line': 'tests/models/mistral3/test_modeling_mistral3.py::Mistral3ModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}, {'line': 'tests/models/mistral3/test_modeling_mistral3.py::Mistral3ModelTest::test_multi_gpu_data_parallel_forward', 'trace': ""(line 1305) AttributeError: 'DynamicCache' object has no attribute 'layers'""}]}","{'multi': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447500305', 'single': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447499780'}","{'single': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526561480', 'multi': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526561618'}"
14
+ modernbert,132,164,5,5,5,5,"{'single': [{'line': 'tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_export', 'trace': ""(line 675) AssertionError: Lists differ: ['mechanic', 'lawyer', 'teacher', 'waiter', 'doctor'] != ['lawyer', 'mechanic', 'teacher', 'doctor', 'waiter']""}, {'line': 'tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_masked_lm', 'trace': '(line 401) AssertionError: Tensor-likes are not close!'}, {'line': 'tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_no_head', 'trace': '(line 423) AssertionError: Tensor-likes are not close!'}, {'line': 'tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_sequence_classification', 'trace': '(line 469) AssertionError: Tensor-likes are not close!'}, {'line': 'tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_token_classification', 'trace': '(line 446) AssertionError: Tensor-likes are not close!'}], 'multi': [{'line': 'tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_export', 'trace': ""(line 675) AssertionError: Lists differ: ['mechanic', 'lawyer', 'teacher', 'waiter', 'doctor'] != ['lawyer', 'mechanic', 'teacher', 'doctor', 'waiter']""}, {'line': 'tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_masked_lm', 'trace': '(line 401) AssertionError: Tensor-likes are not close!'}, {'line': 'tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_no_head', 'trace': '(line 423) AssertionError: Tensor-likes are not close!'}, {'line': 'tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_sequence_classification', 'trace': '(line 469) AssertionError: Tensor-likes are not close!'}, {'line': 'tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_token_classification', 'trace': '(line 446) AssertionError: Tensor-likes are not close!'}]}","{'multi': [{'line': 'tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_export', 'trace': ""(line 675) AssertionError: Lists differ: ['mechanic', 'lawyer', 'teacher', 'waiter', 'doctor'] != ['lawyer', 'mechanic', 'teacher', 'doctor', 'waiter']""}, {'line': 'tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_masked_lm', 'trace': '(line 401) AssertionError: Tensor-likes are not close!'}, {'line': 'tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_no_head', 'trace': '(line 423) AssertionError: Tensor-likes are not close!'}, {'line': 'tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_sequence_classification', 'trace': '(line 469) AssertionError: Tensor-likes are not close!'}, {'line': 'tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_token_classification', 'trace': '(line 446) AssertionError: Tensor-likes are not close!'}], 'single': [{'line': 'tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_export', 'trace': ""(line 675) AssertionError: Lists differ: ['mechanic', 'lawyer', 'teacher', 'waiter', 'doctor'] != ['lawyer', 'mechanic', 'teacher', 'doctor', 'waiter']""}, {'line': 'tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_masked_lm', 'trace': '(line 401) AssertionError: Tensor-likes are not close!'}, {'line': 'tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_no_head', 'trace': '(line 423) AssertionError: Tensor-likes are not close!'}, {'line': 'tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_sequence_classification', 'trace': '(line 469) AssertionError: Tensor-likes are not close!'}, {'line': 'tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_token_classification', 'trace': '(line 446) AssertionError: Tensor-likes are not close!'}]}","{'single': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447499811', 'multi': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447500326'}","{'multi': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526561668', 'single': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526561515'}"
15
+ qwen2,213,438,3,3,3,2,"{'multi': [{'line': 'tests/models/qwen2/test_modeling_qwen2.py::Qwen2ModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4125) KeyError: 'eager'""}, {'line': 'tests/models/qwen2/test_modeling_qwen2.py::Qwen2ModelTest::test_generate_compilation_all_outputs', 'trace': ""(line 317) torch._dynamo.exc.Unsupported: isinstance(NestedUserFunctionVariable(), TorchInGraphFunctionVariable(<class 'torch.nn.parameter.Parameter'>)): can't determine type of NestedUserFunctionVariable()""}, {'line': 'tests/models/qwen2/test_modeling_qwen2.py::Qwen2IntegrationTest::test_export_static_cache', 'trace': ""(line 1638) torch._dynamo.exc.TorchRuntimeError: Failed running call_method index_copy_(*(FakeTensor(..., size=(1, 2, 26, 64), dtype=torch.bfloat16), 2, FakeTensor(..., device='cuda:0', size=(1,), dtype=torch.int64), FakeTensor(..., device='cuda:0', size=(1, 2, 1, 64), dtype=torch.bfloat16,""}], 'single': [{'line': 'tests/models/qwen2/test_modeling_qwen2.py::Qwen2ModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4125) KeyError: 'eager'""}, {'line': 'tests/models/qwen2/test_modeling_qwen2.py::Qwen2ModelTest::test_generate_compilation_all_outputs', 'trace': ""(line 317) torch._dynamo.exc.Unsupported: isinstance(NestedUserFunctionVariable(), TorchInGraphFunctionVariable(<class 'torch.nn.parameter.Parameter'>)): can't determine type of NestedUserFunctionVariable()""}, {'line': 'tests/models/qwen2/test_modeling_qwen2.py::Qwen2IntegrationTest::test_export_static_cache', 'trace': ""(line 1638) torch._dynamo.exc.TorchRuntimeError: Failed running call_method index_copy_(*(FakeTensor(..., size=(1, 2, 26, 64), dtype=torch.bfloat16), 2, FakeTensor(..., device='cuda:0', size=(1,), dtype=torch.int64), FakeTensor(..., device='cuda:0', size=(1, 2, 1, 64), dtype=torch.bfloat16,""}]}","{'multi': [{'line': 'tests/models/qwen2/test_modeling_qwen2.py::Qwen2ModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}, {'line': 'tests/models/qwen2/test_modeling_qwen2.py::Qwen2ModelTest::test_multi_gpu_data_parallel_forward', 'trace': ""(line 1305) AttributeError: 'DynamicCache' object has no attribute 'layers'""}, {'line': 'tests/models/qwen2/test_modeling_qwen2.py::Qwen2IntegrationTest::test_export_static_cache', 'trace': ""(line 1642) torch._dynamo.exc.TorchRuntimeError: Dynamo failed to run FX node with fake tensors: call_method index_copy_(*(FakeTensor(..., size=(1, 2, 26, 64), dtype=torch.bfloat16), 2, FakeTensor(..., device='cuda:0', size=(1,), dtype=torch.int64), FakeTensor(..., device='cuda:0', size=(1, 2, 1, 64), dtype=torch.bfloat16,""}], 'single': [{'line': 'tests/models/qwen2/test_modeling_qwen2.py::Qwen2ModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}, {'line': 'tests/models/qwen2/test_modeling_qwen2.py::Qwen2IntegrationTest::test_export_static_cache', 'trace': ""(line 1642) torch._dynamo.exc.TorchRuntimeError: Dynamo failed to run FX node with fake tensors: call_method index_copy_(*(FakeTensor(..., size=(1, 2, 26, 64), dtype=torch.bfloat16), 2, FakeTensor(..., device='cuda:0', size=(1,), dtype=torch.int64), FakeTensor(..., device='cuda:0', size=(1, 2, 1, 64), dtype=torch.bfloat16,""}]}","{'multi': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447500458', 'single': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447499989'}","{'multi': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562376', 'single': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562270'}"
16
+ qwen2_5_omni,168,277,2,5,1,1,"{'single': [{'line': 'tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniModelIntegrationTest::test_small_model_integration_test_batch', 'trace': '(line 675) AssertionError: Lists differ: [""sys[96 chars]ant\\nsystem\\nYou are a helpful assistant.\\nuse[129 chars]er.""] != [""sys[96 chars]ant\\nThe sound is glass shattering, and the do[198 chars]er.""]'}], 'multi': [{'line': 'tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniThinkerForConditionalGenerationModelTest::test_model_parallelism', 'trace': '(line 675) AssertionError: Items in the second set but not the first:'}, {'line': 'tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniModelIntegrationTest::test_small_model_integration_test_batch', 'trace': '(line 675) AssertionError: Lists differ: [""sys[96 chars]ant\\nsystem\\nYou are a helpful assistant.\\nuse[129 chars]er.""] != [""sys[96 chars]ant\\nThe sound is glass shattering, and the do[198 chars]er.""]'}]}","{'multi': [{'line': 'tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniThinkerForConditionalGenerationModelTest::test_model_parallelism', 'trace': '(line 675) AssertionError: Items in the second set but not the first:'}, {'line': 'tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniThinkerForConditionalGenerationModelTest::test_multi_gpu_data_parallel_forward', 'trace': ""(line 1305) AttributeError: 'DynamicCache' object has no attribute 'layers'""}, {'line': 'tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniModelIntegrationTest::test_small_model_integration_test_batch', 'trace': '(line 675) AssertionError: Lists differ: [""sys[96 chars]ant\\nsystem\\nYou are a helpful assistant.\\nuse[129 chars]er.""] != [""sys[96 chars]ant\\nThe sound is glass shattering, and the do[198 chars]er.""]'}, {'line': 'tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniModelIntegrationTest::test_small_model_integration_test_multiturn', 'trace': '(line 849) torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 1 has a total capacity of 22.18 GiB of which 6.50 MiB is free. Process 51940 has 22.17 GiB memory in use. Of the allocated memory 21.74 GiB is allocated by PyTorch, and 27.83 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)'}, {'line': 'tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniModelIntegrationTest::test_small_model_integration_test_w_audio', 'trace': '(line 1000) torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 1 has a total capacity of 22.18 GiB of which 8.50 MiB is free. Process 51940 has 22.17 GiB memory in use. Of the allocated memory 21.75 GiB is allocated by PyTorch, and 17.78 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)'}], 'single': [{'line': 'tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniModelIntegrationTest::test_small_model_integration_test_batch', 'trace': '(line 675) AssertionError: Lists differ: [""sys[96 chars]ant\\nsystem\\nYou are a helpful assistant.\\nuse[129 chars]er.""] != [""sys[96 chars]ant\\nThe sound is glass shattering, and the do[198 chars]er.""]'}]}","{'single': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447499993', 'multi': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447500491'}","{'multi': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562375', 'single': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562289'}"
17
+ qwen2_5_vl,204,311,1,1,2,1,"{'single': [{'line': 'tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py::Qwen2_5_VLIntegrationTest::test_small_model_integration_test', 'trace': ""(line 700) requests.exceptions.ConnectionError: HTTPSConnectionPool(host='qianwen-res.oss-accelerate-overseas.aliyuncs.com', port=443): Max retries exceeded with url: /Qwen2-VL/demo_small.jpg (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7b289312aad0>: Failed to establish a new connection: [Errno -2] Name or service not known'))""}, {'line': 'tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py::Qwen2_5_VLIntegrationTest::test_small_model_integration_test_batch_different_resolutions', 'trace': ""(line 675) AssertionError: Lists differ: ['sys[314 chars]ion\\n addCriterion\\n\\n addCriterion\\n\\n addCri[75 chars]n\\n'] != ['sys[314 chars]ion\\nThe dog in the picture appears to be a La[81 chars] is']""}], 'multi': [{'line': 'tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py::Qwen2_5_VLIntegrationTest::test_small_model_integration_test_batch_different_resolutions', 'trace': ""(line 675) AssertionError: Lists differ: ['sys[314 chars]ion\\n addCriterion\\n\\n addCriterion\\n\\n addCri[75 chars]n\\n'] != ['sys[314 chars]ion\\nThe dog in the picture appears to be a La[81 chars] is']""}]}","{'multi': [{'line': 'tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py::Qwen2_5_VLIntegrationTest::test_small_model_integration_test_batch_different_resolutions', 'trace': ""(line 675) AssertionError: Lists differ: ['sys[314 chars]ion\\n addCriterion\\n\\n addCriterion\\n\\n addCri[75 chars]n\\n'] != ['sys[314 chars]ion\\nThe dog in the picture appears to be a La[81 chars] is']""}], 'single': [{'line': 'tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py::Qwen2_5_VLIntegrationTest::test_small_model_integration_test_batch_different_resolutions', 'trace': ""(line 675) AssertionError: Lists differ: ['sys[314 chars]ion\\n addCriterion\\n\\n addCriterion\\n\\n addCri[75 chars]n\\n'] != ['sys[314 chars]ion\\nThe dog in the picture appears to be a La[81 chars] is']""}]}","{'single': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447499984', 'multi': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447500447'}","{'multi': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562382', 'single': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562290'}"
18
+ smolvlm,323,499,1,1,1,1,"{'multi': [{'line': 'tests/models/smolvlm/test_modeling_smolvlm.py::SmolVLMForConditionalGenerationModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4125) KeyError: 'eager'""}], 'single': [{'line': 'tests/models/smolvlm/test_modeling_smolvlm.py::SmolVLMForConditionalGenerationModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4125) KeyError: 'eager'""}]}","{'single': [{'line': 'tests/models/smolvlm/test_modeling_smolvlm.py::SmolVLMForConditionalGenerationModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}], 'multi': [{'line': 'tests/models/smolvlm/test_modeling_smolvlm.py::SmolVLMForConditionalGenerationModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}]}","{'multi': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447500533', 'single': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447500052'}","{'single': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562675', 'multi': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562798'}"
19
+ t5,254,592,4,3,3,2,"{'multi': [{'line': 'tests/models/t5/test_modeling_t5.py::T5ModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4125) KeyError: 'eager'""}, {'line': 'tests/models/t5/test_modeling_t5.py::T5ModelTest::test_multi_gpu_data_parallel_forward', 'trace': ""(line 130) TypeError: EncoderDecoderCache.__init__() missing 1 required positional argument: 'cross_attention_cache'""}, {'line': 'tests/models/t5/test_modeling_t5.py::T5ModelIntegrationTests::test_export_t5_summarization', 'trace': ""(line 885) torch._dynamo.exc.TorchRuntimeError: Failed running call_function <built-in function add>(*(FakeTensor(..., size=(1, 8, 1, 1234)), FakeTensor(..., device='cuda:1', size=(1, 1, 1, 1234))), **{}):""}, {'line': 'tests/models/t5/test_modeling_t5.py::T5ModelIntegrationTests::test_small_integration_test', 'trace': '(line 687) AssertionError: False is not true'}], 'single': [{'line': 'tests/models/t5/test_modeling_t5.py::T5ModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4125) KeyError: 'eager'""}, {'line': 'tests/models/t5/test_modeling_t5.py::T5ModelIntegrationTests::test_export_t5_summarization', 'trace': ""(line 885) torch._dynamo.exc.TorchRuntimeError: Failed running call_function <built-in function add>(*(FakeTensor(..., size=(1, 8, 1, 1234)), FakeTensor(..., device='cuda:0', size=(1, 1, 1, 1234))), **{}):""}, {'line': 'tests/models/t5/test_modeling_t5.py::T5ModelIntegrationTests::test_small_integration_test', 'trace': '(line 687) AssertionError: False is not true'}]}","{'multi': [{'line': 'tests/models/t5/test_modeling_t5.py::T5ModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}, {'line': 'tests/models/t5/test_modeling_t5.py::T5ModelTest::test_multi_gpu_data_parallel_forward', 'trace': ""(line 131) TypeError: EncoderDecoderCache.__init__() missing 1 required positional argument: 'cross_attention_cache'""}, {'line': 'tests/models/t5/test_modeling_t5.py::T5ModelIntegrationTests::test_export_t5_summarization', 'trace': ""(line 687) AttributeError: 'dict' object has no attribute 'batch_size'""}], 'single': [{'line': 'tests/models/t5/test_modeling_t5.py::T5ModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}, {'line': 'tests/models/t5/test_modeling_t5.py::T5ModelIntegrationTests::test_export_t5_summarization', 'trace': ""(line 687) AttributeError: 'dict' object has no attribute 'batch_size'""}]}","{'multi': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447500560', 'single': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447500103'}","{'multi': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563047', 'single': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562939'}"
20
+ vit,135,217,0,0,0,0,{},{},"{'multi': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447500654', 'single': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447500177'}","{'multi': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563537', 'single': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563397'}"
21
+ wav2vec2,0,672,0,4,0,4,{},"{'multi': [{'line': 'tests/models/wav2vec2/test_modeling_wav2vec2.py::Wav2Vec2ModelIntegrationTest::test_inference_mms_1b_all', 'trace': '(line 989) RuntimeError: Dataset scripts are no longer supported, but found common_voice_11_0.py'}, {'line': 'tests/models/wav2vec2/test_modeling_wav2vec2.py::Wav2Vec2ModelIntegrationTest::test_wav2vec2_with_lm', 'trace': '(line 989) RuntimeError: Dataset scripts are no longer supported, but found common_voice_11_0.py'}, {'line': 'tests/models/wav2vec2/test_modeling_wav2vec2.py::Wav2Vec2ModelIntegrationTest::test_wav2vec2_with_lm_invalid_pool', 'trace': '(line 675) AssertionError: Traceback (most recent call last):'}, {'line': 'tests/models/wav2vec2/test_modeling_wav2vec2.py::Wav2Vec2ModelIntegrationTest::test_wav2vec2_with_lm_pool', 'trace': '(line 989) RuntimeError: Dataset scripts are no longer supported, but found common_voice_11_0.py'}], 'single': [{'line': 'tests/models/wav2vec2/test_modeling_wav2vec2.py::Wav2Vec2ModelIntegrationTest::test_inference_mms_1b_all', 'trace': '(line 989) RuntimeError: Dataset scripts are no longer supported, but found common_voice_11_0.py'}, {'line': 'tests/models/wav2vec2/test_modeling_wav2vec2.py::Wav2Vec2ModelIntegrationTest::test_wav2vec2_with_lm', 'trace': '(line 989) RuntimeError: Dataset scripts are no longer supported, but found common_voice_11_0.py'}, {'line': 'tests/models/wav2vec2/test_modeling_wav2vec2.py::Wav2Vec2ModelIntegrationTest::test_wav2vec2_with_lm_invalid_pool', 'trace': '(line 675) AssertionError: Traceback (most recent call last):'}, {'line': 'tests/models/wav2vec2/test_modeling_wav2vec2.py::Wav2Vec2ModelIntegrationTest::test_wav2vec2_with_lm_pool', 'trace': '(line 989) RuntimeError: Dataset scripts are no longer supported, but found common_voice_11_0.py'}]}","{'multi': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447500676', 'single': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447500194'}","{'multi': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563711', 'single': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563582'}"
22
+ whisper,0,1010,0,11,0,8,{},"{'single': [{'line': 'tests/models/whisper/test_modeling_whisper.py::WhisperModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}, {'line': 'tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_batched_generation_multilingual', 'trace': '(line 756) RuntimeError: The frame has 0 channels, expected 1. If you are hitting this, it may be because you are using a buggy FFmpeg version. FFmpeg4 is known to fail here in some valid scenarios. Try to upgrade FFmpeg?'}, {'line': 'tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_small_longform_timestamps_generation', 'trace': '(line 756) RuntimeError: The frame has 0 channels, expected 1. If you are hitting this, it may be because you are using a buggy FFmpeg version. FFmpeg4 is known to fail here in some valid scenarios. Try to upgrade FFmpeg?'}, {'line': 'tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_longform_timestamps_generation', 'trace': '(line 756) RuntimeError: The frame has 0 channels, expected 1. If you are hitting this, it may be because you are using a buggy FFmpeg version. FFmpeg4 is known to fail here in some valid scenarios. Try to upgrade FFmpeg?'}, {'line': 'tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch_hard', 'trace': '(line 675) AssertionError: Lists differ: ["" Fo[272 chars]ting of classics, Sicilian, nade door variatio[8147 chars]le!\'] != ["" Fo[272 chars]ting a classic Sicilian, nade door variation o[8150 chars]le!\']'}, {'line': 'tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch_hard_prev_cond', 'trace': '(line 675) AssertionError: Lists differ: ["" Fo[422 chars]to a fisher shows in lip-nitsky attack that cu[7903 chars]le!""] != ["" Fo[422 chars]to a Fisher shows in lip-nitsky attack that cu[7918 chars]le.""]'}, {'line': 'tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_shortform_single_batch_prev_cond', 'trace': '(line 675) AssertionError: Lists differ: ["" Fo[268 chars]ating, so soft, it would make JD power and her[196 chars]ke.""] != ["" Fo[268 chars]ating so soft, it would make JD power and her [195 chars]ke.""]'}, {'line': 'tests/models/whisper/test_modeling_whisper.py::WhisperStandaloneDecoderModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}], 'multi': [{'line': 'tests/models/whisper/test_modeling_whisper.py::WhisperModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}, {'line': 'tests/models/whisper/test_modeling_whisper.py::WhisperModelTest::test_multi_gpu_data_parallel_forward', 'trace': ""(line 131) TypeError: EncoderDecoderCache.__init__() missing 1 required positional argument: 'cross_attention_cache'""}, {'line': 'tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_generate_with_forced_decoder_ids', 'trace': '(line 713) requests.exceptions.ReadTimeout: (ReadTimeoutError(""HTTPSConnectionPool(host=\'huggingface.co\', port=443): Read timed out. (read timeout=10)""), \'(Request ID: 13cb0b08-c261-4ca3-a58f-91a2f3e327ed)\')'}, {'line': 'tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_batched_generation_multilingual', 'trace': '(line 756) RuntimeError: The frame has 0 channels, expected 1. If you are hitting this, it may be because you are using a buggy FFmpeg version. FFmpeg4 is known to fail here in some valid scenarios. Try to upgrade FFmpeg?'}, {'line': 'tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_small_longform_timestamps_generation', 'trace': '(line 756) RuntimeError: The frame has 0 channels, expected 1. If you are hitting this, it may be because you are using a buggy FFmpeg version. FFmpeg4 is known to fail here in some valid scenarios. Try to upgrade FFmpeg?'}, {'line': 'tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_longform_timestamps_generation', 'trace': '(line 756) RuntimeError: The frame has 0 channels, expected 1. If you are hitting this, it may be because you are using a buggy FFmpeg version. FFmpeg4 is known to fail here in some valid scenarios. Try to upgrade FFmpeg?'}, {'line': 'tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch_hard', 'trace': '(line 675) AssertionError: Lists differ: ["" Fo[272 chars]ting of classics, Sicilian, nade door variatio[8147 chars]le!\'] != ["" Fo[272 chars]ting a classic Sicilian, nade door variation o[8150 chars]le!\']'}, {'line': 'tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch_hard_prev_cond', 'trace': '(line 675) AssertionError: Lists differ: ["" Fo[422 chars]to a fisher shows in lip-nitsky attack that cu[7903 chars]le!""] != ["" Fo[422 chars]to a Fisher shows in lip-nitsky attack that cu[7918 chars]le.""]'}, {'line': 'tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_shortform_single_batch_prev_cond', 'trace': '(line 675) AssertionError: Lists differ: ["" Fo[268 chars]ating, so soft, it would make JD power and her[196 chars]ke.""] != ["" Fo[268 chars]ating so soft, it would make JD power and her [195 chars]ke.""]'}, {'line': 'tests/models/whisper/test_modeling_whisper.py::WhisperStandaloneDecoderModelTest::test_eager_padding_matches_padding_free_with_position_ids', 'trace': ""(line 4140) KeyError: 'eager'""}, {'line': 'tests/models/whisper/test_modeling_whisper.py::WhisperStandaloneDecoderModelTest::test_multi_gpu_data_parallel_forward', 'trace': ""(line 1305) AttributeError: 'DynamicCache' object has no attribute 'layers'""}]}","{'multi': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447500690', 'single': 'https://github.com/huggingface/transformers/actions/runs/16433423306/job/46447500204'}","{'single': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563737', 'multi': 'https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563862'}"
summary_page.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ import pandas as pd
3
+
4
+ def create_summary_page(df: pd.DataFrame, available_models: list[str]) -> plt.Figure:
5
+ """Create a summary page with model names and both AMD/NVIDIA test stats bars."""
6
+ if df.empty:
7
+ fig, ax = plt.subplots(figsize=(16, 8), facecolor='#000000')
8
+ ax.set_facecolor('#000000')
9
+ ax.text(0.5, 0.5, 'No data available',
10
+ horizontalalignment='center', verticalalignment='center',
11
+ transform=ax.transAxes, fontsize=20, color='#888888',
12
+ fontfamily='monospace', weight='normal')
13
+ ax.axis('off')
14
+ return fig
15
+
16
+ fig, ax = plt.subplots(figsize=(16, len(available_models) * 2.5 + 2), facecolor='#000000')
17
+ ax.set_facecolor('#000000')
18
+
19
+ colors = {
20
+ 'passed': '#4CAF50',
21
+ 'failed': '#E53E3E',
22
+ 'skipped': '#FFD54F',
23
+ 'error': '#8B0000',
24
+ 'empty': "#5B5B5B"
25
+ }
26
+
27
+ visible_model_count = 0
28
+ max_y = 0
29
+
30
+ for i, model_name in enumerate(available_models):
31
+ if model_name not in df.index:
32
+ continue
33
+
34
+ row = df.loc[model_name]
35
+
36
+ # Get values directly from dataframe
37
+ success_amd = int(row.get('success_amd', 0)) if pd.notna(row.get('success_amd', 0)) else 0
38
+ success_nvidia = int(row.get('success_nvidia', 0)) if pd.notna(row.get('success_nvidia', 0)) else 0
39
+ failed_multi_amd = int(row.get('failed_multi_no_amd', 0)) if pd.notna(row.get('failed_multi_no_amd', 0)) else 0
40
+ failed_multi_nvidia = int(row.get('failed_multi_no_nvidia', 0)) if pd.notna(row.get('failed_multi_no_nvidia', 0)) else 0
41
+ failed_single_amd = int(row.get('failed_single_no_amd', 0)) if pd.notna(row.get('failed_single_no_amd', 0)) else 0
42
+ failed_single_nvidia = int(row.get('failed_single_no_nvidia', 0)) if pd.notna(row.get('failed_single_no_nvidia', 0)) else 0
43
+
44
+ # Calculate stats
45
+ amd_stats = {
46
+ 'passed': success_amd,
47
+ 'failed': failed_multi_amd + failed_single_amd,
48
+ 'skipped': 0,
49
+ 'error': 0
50
+ }
51
+
52
+ nvidia_stats = {
53
+ 'passed': success_nvidia,
54
+ 'failed': failed_multi_nvidia + failed_single_nvidia,
55
+ 'skipped': 0,
56
+ 'error': 0
57
+ }
58
+
59
+ amd_total = sum(amd_stats.values())
60
+ nvidia_total = sum(nvidia_stats.values())
61
+
62
+ if amd_total == 0 and nvidia_total == 0:
63
+ continue
64
+
65
+ # Position for this model - use visible model count for spacing
66
+ y_base = (2.2 + visible_model_count) * 1.8
67
+ y_model_name = y_base # Model name above AMD bar
68
+ y_amd_bar = y_base + 0.45 # AMD bar
69
+ y_nvidia_bar = y_base + 0.97 # NVIDIA bar
70
+ max_y = max(max_y, y_nvidia_bar + 0.5)
71
+
72
+ # Model name centered above the AMD bar
73
+ left_0 = 8
74
+ bar_length = 92
75
+ ax.text(bar_length / 2 + left_0, y_model_name, f"{i} - {y_model_name} - {model_name.lower()}",
76
+ ha='center', va='center', color='#FFFFFF',
77
+ fontsize=20, fontfamily='monospace', fontweight='bold')
78
+
79
+ # AMD label and bar on the same level
80
+ ax.text(left_0 - 2, y_amd_bar, "amd", ha='right', va='center', color='#CCCCCC', fontsize=18, fontfamily='monospace', fontweight='normal')
81
+ if amd_total > 0:
82
+ # AMD bar starts after labels
83
+ left = left_0
84
+ for category in ['passed', 'failed', 'skipped', 'error']:
85
+ if amd_stats[category] > 0:
86
+ width = amd_stats[category] / amd_total * bar_length
87
+ ax.barh(y_amd_bar, width, left=left, height=0.405,
88
+ color=colors[category], alpha=0.9)
89
+ if width > 4:
90
+ ax.text(left + width/2, y_amd_bar, str(amd_stats[category]),
91
+ ha='center', va='center', color='black',
92
+ fontweight='bold', fontsize=12, fontfamily='monospace')
93
+ left += width
94
+ else:
95
+ ax.barh(y_amd_bar, bar_length, left=left_0, height=0.405, color=colors['empty'], alpha=0.9)
96
+ ax.text(left_0 + bar_length/2, y_amd_bar, "No data", ha='center', va='center', color='black', fontweight='bold', fontsize=12, fontfamily='monospace')
97
+
98
+ # NVIDIA label and bar on the same level
99
+ ax.text(left_0 - 2, y_nvidia_bar, "nvidia", ha='right', va='center', color='#CCCCCC', fontsize=18, fontfamily='monospace', fontweight='normal')
100
+
101
+ if nvidia_total > 0:
102
+ # NVIDIA bar starts after labels
103
+ left = left_0
104
+ for category in ['passed', 'failed', 'skipped', 'error']:
105
+ if nvidia_stats[category] > 0:
106
+ width = nvidia_stats[category] / nvidia_total * bar_length
107
+ ax.barh(y_nvidia_bar, width, left=left, height=0.405,
108
+ color=colors[category], alpha=0.9)
109
+ if width > 4:
110
+ ax.text(left + width/2, y_nvidia_bar, str(nvidia_stats[category]),
111
+ ha='center', va='center', color='black',
112
+ fontweight='bold', fontsize=12, fontfamily='monospace')
113
+ left += width
114
+ else:
115
+ ax.barh(y_nvidia_bar, bar_length, left=left_0, height=0.405, color=colors['empty'], alpha=0.9)
116
+ ax.text(left_0 + bar_length/2, y_nvidia_bar, "No data", ha='center', va='center', color='black', fontweight='bold', fontsize=12, fontfamily='monospace')
117
+
118
+ # Increment counter for next visible model
119
+ visible_model_count += 1
120
+
121
+ # Style the axes to be completely invisible and span full width
122
+ ax.set_xlim(0, 100)
123
+ ax.set_ylim(0, max_y)
124
+ ax.set_xlabel('')
125
+ ax.set_ylabel('')
126
+ ax.spines['bottom'].set_visible(False)
127
+ ax.spines['left'].set_visible(False)
128
+ ax.spines['top'].set_visible(False)
129
+ ax.spines['right'].set_visible(False)
130
+ ax.set_xticks([])
131
+ ax.set_yticks([])
132
+ ax.yaxis.set_inverted(True)
133
+
134
+ # Remove all margins to make bars span full width
135
+ plt.tight_layout()
136
+ plt.subplots_adjust(left=0.02, right=0.98, top=0.98, bottom=0.02)
137
+
138
+ return fig
utils.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+ from datetime import datetime
4
+
5
+
6
+ class TimestampFormatter(logging.Formatter):
7
+ """Custom formatter that matches the existing timestamp format used in print statements."""
8
+
9
+ def format(self, record):
10
+ # Create timestamp in the same format as existing print statements
11
+ timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
12
+
13
+ # Format the message with timestamp prefix
14
+ if record.levelno == logging.WARNING:
15
+ return f"WARNING: {record.getMessage()}"
16
+ elif record.levelno == logging.ERROR:
17
+ return f"Error {record.getMessage()}"
18
+ else:
19
+ return f"[{timestamp}] {record.getMessage()}"
20
+
21
+
22
+ def setup_logger(name="tcid", level=logging.INFO):
23
+ """Set up logger with custom timestamp formatting to match existing print format."""
24
+ logger = logging.getLogger(name)
25
+
26
+ # Avoid adding multiple handlers if logger already exists
27
+ if logger.handlers:
28
+ return logger
29
+
30
+ logger.setLevel(level)
31
+
32
+ # Create console handler
33
+ handler = logging.StreamHandler(sys.stdout)
34
+ handler.setLevel(level)
35
+
36
+ # Set custom formatter
37
+ formatter = TimestampFormatter()
38
+ handler.setFormatter(formatter)
39
+
40
+ logger.addHandler(handler)
41
+
42
+ return logger
43
+
44
+
45
+ # Create default logger instance
46
+ logger = setup_logger()
47
+
48
+
49
+
50
+ def generate_underlined_line(text: str) -> str:
51
+ return text + "\n" + "─" * len(text) + "\n"