Spaces:
Running
Running
Commit
·
d10cc3f
1
Parent(s):
a379dd4
improved viewer tab
Browse files- src/components/viewer-tab.tsx +101 -18
src/components/viewer-tab.tsx
CHANGED
@@ -15,6 +15,7 @@ import {
|
|
15 |
SelectValue
|
16 |
} from "@/components/ui/select";
|
17 |
import { Run as ForceGraphRun } from "@/components/reasoning-trace";
|
|
|
18 |
|
19 |
const models = {
|
20 |
"Qwen3-14B": q3Results,
|
@@ -29,6 +30,18 @@ interface Run {
|
|
29 |
result: string;
|
30 |
}
|
31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
export default function ViewerTab({
|
33 |
handleTryRun,
|
34 |
}: {
|
@@ -37,6 +50,7 @@ export default function ViewerTab({
|
|
37 |
const [selectedRun, setSelectedRun] = useState<number | null>(null);
|
38 |
const [runs, setRuns] = useState<Run[]>([]);
|
39 |
const [selectedModel, setSelectedModel] = useState<string>("Qwen3-14B");
|
|
|
40 |
|
41 |
useEffect(() => {
|
42 |
// Convert the model data to the format expected by RunsList
|
@@ -52,6 +66,45 @@ export default function ViewerTab({
|
|
52 |
result: run.result
|
53 |
}));
|
54 |
setRuns(convertedRuns);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
}, [selectedModel]);
|
56 |
|
57 |
const handleRunSelect = (runId: number) => {
|
@@ -73,24 +126,54 @@ export default function ViewerTab({
|
|
73 |
|
74 |
return (
|
75 |
<div className="grid grid-cols-1 md:grid-cols-12 gap-4 h-[calc(100vh-200px)] max-h-[calc(100vh-200px)] overflow-hidden p-2">
|
76 |
-
<Card className="p-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
</Card>
|
95 |
<div className="md:col-span-3 flex flex-col max-h-full overflow-hidden">
|
96 |
<div className="bg-card rounded-lg p-3 border flex-grow overflow-hidden flex flex-col">
|
|
|
15 |
SelectValue
|
16 |
} from "@/components/ui/select";
|
17 |
import { Run as ForceGraphRun } from "@/components/reasoning-trace";
|
18 |
+
import { Badge } from "@/components/ui/badge";
|
19 |
|
20 |
const models = {
|
21 |
"Qwen3-14B": q3Results,
|
|
|
30 |
result: string;
|
31 |
}
|
32 |
|
33 |
+
// Interface for model statistics
|
34 |
+
interface ModelStats {
|
35 |
+
winPercentage: number;
|
36 |
+
avgSteps: number;
|
37 |
+
stdDevSteps: number;
|
38 |
+
totalRuns: number;
|
39 |
+
wins: number;
|
40 |
+
medianSteps: number;
|
41 |
+
minSteps: number;
|
42 |
+
maxSteps: number;
|
43 |
+
}
|
44 |
+
|
45 |
export default function ViewerTab({
|
46 |
handleTryRun,
|
47 |
}: {
|
|
|
50 |
const [selectedRun, setSelectedRun] = useState<number | null>(null);
|
51 |
const [runs, setRuns] = useState<Run[]>([]);
|
52 |
const [selectedModel, setSelectedModel] = useState<string>("Qwen3-14B");
|
53 |
+
const [modelStats, setModelStats] = useState<ModelStats | null>(null);
|
54 |
|
55 |
useEffect(() => {
|
56 |
// Convert the model data to the format expected by RunsList
|
|
|
66 |
result: run.result
|
67 |
}));
|
68 |
setRuns(convertedRuns);
|
69 |
+
|
70 |
+
// Calculate model statistics
|
71 |
+
const winRuns = convertedRuns.filter(run => run.result === "win");
|
72 |
+
const totalRuns = convertedRuns.length;
|
73 |
+
const wins = winRuns.length;
|
74 |
+
const winPercentage = totalRuns > 0 ? (wins / totalRuns) * 100 : 0;
|
75 |
+
|
76 |
+
// Calculate steps statistics for winning runs
|
77 |
+
const stepCounts = winRuns.map(run => run.steps.length);
|
78 |
+
const avgSteps = stepCounts.length > 0
|
79 |
+
? stepCounts.reduce((sum, count) => sum + count, 0) / stepCounts.length
|
80 |
+
: 0;
|
81 |
+
|
82 |
+
// Calculate standard deviation
|
83 |
+
const variance = stepCounts.length > 0
|
84 |
+
? stepCounts.reduce((sum, count) => sum + Math.pow(count - avgSteps, 2), 0) / stepCounts.length
|
85 |
+
: 0;
|
86 |
+
const stdDevSteps = Math.sqrt(variance);
|
87 |
+
|
88 |
+
// Calculate median, min, max steps
|
89 |
+
const sortedSteps = [...stepCounts].sort((a, b) => a - b);
|
90 |
+
const medianSteps = stepCounts.length > 0
|
91 |
+
? stepCounts.length % 2 === 0
|
92 |
+
? (sortedSteps[stepCounts.length / 2 - 1] + sortedSteps[stepCounts.length / 2]) / 2
|
93 |
+
: sortedSteps[Math.floor(stepCounts.length / 2)]
|
94 |
+
: 0;
|
95 |
+
const minSteps = stepCounts.length > 0 ? Math.min(...stepCounts) : 0;
|
96 |
+
const maxSteps = stepCounts.length > 0 ? Math.max(...stepCounts) : 0;
|
97 |
+
|
98 |
+
setModelStats({
|
99 |
+
winPercentage,
|
100 |
+
avgSteps,
|
101 |
+
stdDevSteps,
|
102 |
+
totalRuns,
|
103 |
+
wins,
|
104 |
+
medianSteps,
|
105 |
+
minSteps,
|
106 |
+
maxSteps
|
107 |
+
});
|
108 |
}, [selectedModel]);
|
109 |
|
110 |
const handleRunSelect = (runId: number) => {
|
|
|
126 |
|
127 |
return (
|
128 |
<div className="grid grid-cols-1 md:grid-cols-12 gap-4 h-[calc(100vh-200px)] max-h-[calc(100vh-200px)] overflow-hidden p-2">
|
129 |
+
<Card className="p-3 col-span-12 row-start-1">
|
130 |
+
<div className="flex flex-col sm:flex-row items-start sm:items-center gap-3">
|
131 |
+
<div className="flex-shrink-0">
|
132 |
+
<Select value={selectedModel} onValueChange={setSelectedModel}>
|
133 |
+
<SelectTrigger className="w-[180px]">
|
134 |
+
<SelectValue placeholder="Select model" />
|
135 |
+
</SelectTrigger>
|
136 |
+
<SelectContent>
|
137 |
+
{Object.keys(models).map((modelName) => (
|
138 |
+
<SelectItem key={modelName} value={modelName}>
|
139 |
+
{modelName}
|
140 |
+
</SelectItem>
|
141 |
+
))}
|
142 |
+
</SelectContent>
|
143 |
+
</Select>
|
144 |
+
</div>
|
145 |
+
|
146 |
+
{modelStats && (
|
147 |
+
<div className="flex flex-wrap gap-1.5 items-center">
|
148 |
+
<Badge variant="outline" className="px-2 py-0.5 flex gap-1 items-center">
|
149 |
+
<span className="text-xs font-medium">Success:</span>
|
150 |
+
<span className="text-xs font-semibold">{modelStats.winPercentage.toFixed(1)}%</span>
|
151 |
+
<span className="text-xs text-muted-foreground">({modelStats.wins}/{modelStats.totalRuns})</span>
|
152 |
+
</Badge>
|
153 |
+
|
154 |
+
<Badge variant="outline" className="px-2 py-0.5 flex gap-1 items-center">
|
155 |
+
<span className="text-xs font-medium">Mean:</span>
|
156 |
+
<span className="text-xs font-semibold">{modelStats.avgSteps.toFixed(1)}</span>
|
157 |
+
<span className="text-xs text-muted-foreground">±{modelStats.stdDevSteps.toFixed(1)}</span>
|
158 |
+
</Badge>
|
159 |
+
|
160 |
+
<Badge variant="outline" className="px-2 py-0.5 flex gap-1 items-center">
|
161 |
+
<span className="text-xs font-medium">Median:</span>
|
162 |
+
<span className="text-xs font-semibold">{modelStats.medianSteps.toFixed(1)}</span>
|
163 |
+
</Badge>
|
164 |
+
|
165 |
+
<Badge variant="outline" className="px-2 py-0.5 flex gap-1 items-center">
|
166 |
+
<span className="text-xs font-medium">Min:</span>
|
167 |
+
<span className="text-xs font-semibold">{modelStats.minSteps}</span>
|
168 |
+
</Badge>
|
169 |
+
|
170 |
+
<Badge variant="outline" className="px-2 py-0.5 flex gap-1 items-center">
|
171 |
+
<span className="text-xs font-medium">Max:</span>
|
172 |
+
<span className="text-xs font-semibold">{modelStats.maxSteps}</span>
|
173 |
+
</Badge>
|
174 |
+
</div>
|
175 |
+
)}
|
176 |
+
</div>
|
177 |
</Card>
|
178 |
<div className="md:col-span-3 flex flex-col max-h-full overflow-hidden">
|
179 |
<div className="bg-card rounded-lg p-3 border flex-grow overflow-hidden flex flex-col">
|