stillerman HF Staff commited on
Commit
d10cc3f
·
1 Parent(s): a379dd4

improved viewer tab

Browse files
Files changed (1) hide show
  1. src/components/viewer-tab.tsx +101 -18
src/components/viewer-tab.tsx CHANGED
@@ -15,6 +15,7 @@ import {
15
  SelectValue
16
  } from "@/components/ui/select";
17
  import { Run as ForceGraphRun } from "@/components/reasoning-trace";
 
18
 
19
  const models = {
20
  "Qwen3-14B": q3Results,
@@ -29,6 +30,18 @@ interface Run {
29
  result: string;
30
  }
31
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  export default function ViewerTab({
33
  handleTryRun,
34
  }: {
@@ -37,6 +50,7 @@ export default function ViewerTab({
37
  const [selectedRun, setSelectedRun] = useState<number | null>(null);
38
  const [runs, setRuns] = useState<Run[]>([]);
39
  const [selectedModel, setSelectedModel] = useState<string>("Qwen3-14B");
 
40
 
41
  useEffect(() => {
42
  // Convert the model data to the format expected by RunsList
@@ -52,6 +66,45 @@ export default function ViewerTab({
52
  result: run.result
53
  }));
54
  setRuns(convertedRuns);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  }, [selectedModel]);
56
 
57
  const handleRunSelect = (runId: number) => {
@@ -73,24 +126,54 @@ export default function ViewerTab({
73
 
74
  return (
75
  <div className="grid grid-cols-1 md:grid-cols-12 gap-4 h-[calc(100vh-200px)] max-h-[calc(100vh-200px)] overflow-hidden p-2">
76
- <Card className="p-2 col-span-12 h-12 row-start-1">
77
- <div className="flex items-center justify-between h-full">
78
- <h3 className="text-sm font-medium text-muted-foreground flex-shrink-0">
79
- Models
80
- </h3>
81
- <Select value={selectedModel} onValueChange={setSelectedModel}>
82
- <SelectTrigger className="w-[180px]">
83
- <SelectValue placeholder="Select model" />
84
- </SelectTrigger>
85
- <SelectContent>
86
- {Object.keys(models).map((modelName) => (
87
- <SelectItem key={modelName} value={modelName}>
88
- {modelName}
89
- </SelectItem>
90
- ))}
91
- </SelectContent>
92
- </Select>
93
- </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  </Card>
95
  <div className="md:col-span-3 flex flex-col max-h-full overflow-hidden">
96
  <div className="bg-card rounded-lg p-3 border flex-grow overflow-hidden flex flex-col">
 
15
  SelectValue
16
  } from "@/components/ui/select";
17
  import { Run as ForceGraphRun } from "@/components/reasoning-trace";
18
+ import { Badge } from "@/components/ui/badge";
19
 
20
  const models = {
21
  "Qwen3-14B": q3Results,
 
30
  result: string;
31
  }
32
 
33
+ // Interface for model statistics
34
+ interface ModelStats {
35
+ winPercentage: number;
36
+ avgSteps: number;
37
+ stdDevSteps: number;
38
+ totalRuns: number;
39
+ wins: number;
40
+ medianSteps: number;
41
+ minSteps: number;
42
+ maxSteps: number;
43
+ }
44
+
45
  export default function ViewerTab({
46
  handleTryRun,
47
  }: {
 
50
  const [selectedRun, setSelectedRun] = useState<number | null>(null);
51
  const [runs, setRuns] = useState<Run[]>([]);
52
  const [selectedModel, setSelectedModel] = useState<string>("Qwen3-14B");
53
+ const [modelStats, setModelStats] = useState<ModelStats | null>(null);
54
 
55
  useEffect(() => {
56
  // Convert the model data to the format expected by RunsList
 
66
  result: run.result
67
  }));
68
  setRuns(convertedRuns);
69
+
70
+ // Calculate model statistics
71
+ const winRuns = convertedRuns.filter(run => run.result === "win");
72
+ const totalRuns = convertedRuns.length;
73
+ const wins = winRuns.length;
74
+ const winPercentage = totalRuns > 0 ? (wins / totalRuns) * 100 : 0;
75
+
76
+ // Calculate steps statistics for winning runs
77
+ const stepCounts = winRuns.map(run => run.steps.length);
78
+ const avgSteps = stepCounts.length > 0
79
+ ? stepCounts.reduce((sum, count) => sum + count, 0) / stepCounts.length
80
+ : 0;
81
+
82
+ // Calculate standard deviation
83
+ const variance = stepCounts.length > 0
84
+ ? stepCounts.reduce((sum, count) => sum + Math.pow(count - avgSteps, 2), 0) / stepCounts.length
85
+ : 0;
86
+ const stdDevSteps = Math.sqrt(variance);
87
+
88
+ // Calculate median, min, max steps
89
+ const sortedSteps = [...stepCounts].sort((a, b) => a - b);
90
+ const medianSteps = stepCounts.length > 0
91
+ ? stepCounts.length % 2 === 0
92
+ ? (sortedSteps[stepCounts.length / 2 - 1] + sortedSteps[stepCounts.length / 2]) / 2
93
+ : sortedSteps[Math.floor(stepCounts.length / 2)]
94
+ : 0;
95
+ const minSteps = stepCounts.length > 0 ? Math.min(...stepCounts) : 0;
96
+ const maxSteps = stepCounts.length > 0 ? Math.max(...stepCounts) : 0;
97
+
98
+ setModelStats({
99
+ winPercentage,
100
+ avgSteps,
101
+ stdDevSteps,
102
+ totalRuns,
103
+ wins,
104
+ medianSteps,
105
+ minSteps,
106
+ maxSteps
107
+ });
108
  }, [selectedModel]);
109
 
110
  const handleRunSelect = (runId: number) => {
 
126
 
127
  return (
128
  <div className="grid grid-cols-1 md:grid-cols-12 gap-4 h-[calc(100vh-200px)] max-h-[calc(100vh-200px)] overflow-hidden p-2">
129
+ <Card className="p-3 col-span-12 row-start-1">
130
+ <div className="flex flex-col sm:flex-row items-start sm:items-center gap-3">
131
+ <div className="flex-shrink-0">
132
+ <Select value={selectedModel} onValueChange={setSelectedModel}>
133
+ <SelectTrigger className="w-[180px]">
134
+ <SelectValue placeholder="Select model" />
135
+ </SelectTrigger>
136
+ <SelectContent>
137
+ {Object.keys(models).map((modelName) => (
138
+ <SelectItem key={modelName} value={modelName}>
139
+ {modelName}
140
+ </SelectItem>
141
+ ))}
142
+ </SelectContent>
143
+ </Select>
144
+ </div>
145
+
146
+ {modelStats && (
147
+ <div className="flex flex-wrap gap-1.5 items-center">
148
+ <Badge variant="outline" className="px-2 py-0.5 flex gap-1 items-center">
149
+ <span className="text-xs font-medium">Success:</span>
150
+ <span className="text-xs font-semibold">{modelStats.winPercentage.toFixed(1)}%</span>
151
+ <span className="text-xs text-muted-foreground">({modelStats.wins}/{modelStats.totalRuns})</span>
152
+ </Badge>
153
+
154
+ <Badge variant="outline" className="px-2 py-0.5 flex gap-1 items-center">
155
+ <span className="text-xs font-medium">Mean:</span>
156
+ <span className="text-xs font-semibold">{modelStats.avgSteps.toFixed(1)}</span>
157
+ <span className="text-xs text-muted-foreground">±{modelStats.stdDevSteps.toFixed(1)}</span>
158
+ </Badge>
159
+
160
+ <Badge variant="outline" className="px-2 py-0.5 flex gap-1 items-center">
161
+ <span className="text-xs font-medium">Median:</span>
162
+ <span className="text-xs font-semibold">{modelStats.medianSteps.toFixed(1)}</span>
163
+ </Badge>
164
+
165
+ <Badge variant="outline" className="px-2 py-0.5 flex gap-1 items-center">
166
+ <span className="text-xs font-medium">Min:</span>
167
+ <span className="text-xs font-semibold">{modelStats.minSteps}</span>
168
+ </Badge>
169
+
170
+ <Badge variant="outline" className="px-2 py-0.5 flex gap-1 items-center">
171
+ <span className="text-xs font-medium">Max:</span>
172
+ <span className="text-xs font-semibold">{modelStats.maxSteps}</span>
173
+ </Badge>
174
+ </div>
175
+ )}
176
+ </div>
177
  </Card>
178
  <div className="md:col-span-3 flex flex-col max-h-full overflow-hidden">
179
  <div className="bg-card rounded-lg p-3 border flex-grow overflow-hidden flex flex-col">