Lisa Dunlap commited on
Commit
c2053c9
·
1 Parent(s): 4862c84

added more data

Browse files
data/taubench_airline/cluster_scores.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05c7b68afbfa350790133e01ba82752bad496f7e76922353a6f63add7fabc7b2
3
+ size 38460
data/taubench_airline/clustered_results_lightweight.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e0280cfd7e9d335169015a74d6f5f98c5687dcd910c4796ac68b06186e898f4
3
+ size 179823400
data/taubench_airline/clusters.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec781a06055b97489a84d8186d0e6892e9d52e85aac556a82a624ff0e091d509
3
+ size 643694
data/taubench_airline/full_dataset.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae69c83d3e9b0512972e452da27185ff18b1f0397b0ce93d1c17640ea733138d
3
+ size 36503699
data/taubench_airline/model_cluster_scores.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3787032a5155496f062589f41b1f9c1c29a19a8d625bfb770c504726135f1b29
3
+ size 104366
data/taubench_airline/model_scores.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f338646c2efa8bc252c54091689ab62dcab0a28f2bb94f9ecd67c3aaac98be25
3
+ size 1268
data/taubench_airline/model_stats.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61677c44e1d4fbef6c5382b372b66c7740694517ef9d4b9bb9f312c1330d0604
3
+ size 103618
data/taubench_airline/parsed_properties.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e1d94d31a16129055c26c98b5568b115df2a8b70b2259e44043904f75bb1954
3
+ size 3047869
data/taubench_airline/parsing_stats.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdffc752beae578ad95619d45f152b14ececb26e3d19fd9462259eb4d75c5215
3
+ size 177
data/taubench_airline/summary.txt ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LMM-Vibes Results Summary
2
+ ==================================================
3
+
4
+ Total conversations: 400
5
+ Total properties: 3648
6
+ Models analyzed: 1
7
+
8
+ Output files:
9
+ - raw_properties.jsonl: Raw LLM responses
10
+ - extraction_stats.json: Extraction statistics
11
+ - extraction_samples.jsonl: Sample inputs/outputs
12
+ - parsed_properties.jsonl: Parsed property objects
13
+ - parsing_stats.json: Parsing statistics
14
+ - parsing_failures.jsonl: Failed parsing attempts
15
+ - validated_properties.jsonl: Validated properties
16
+ - validation_stats.json: Validation statistics
17
+ - clustered_results.jsonl: Complete clustered data
18
+ - embeddings.parquet: Embeddings data
19
+ - clustered_results_lightweight.jsonl: Data without embeddings
20
+ - summary_table.jsonl: Clustering summary
21
+ - model_cluster_scores.json: Per model-cluster combination metrics
22
+ - cluster_scores.json: Per cluster metrics (aggregated across models)
23
+ - model_scores.json: Per model metrics (aggregated across clusters)
24
+ - full_dataset.json: Complete PropertyDataset (JSON format)
25
+ - full_dataset.parquet: Complete PropertyDataset (parquet format, or .jsonl if mixed data types)
26
+
27
+ Model Rankings (by average quality score):
28
+ 1. claude-sonnet-35: 0.460
29
+ 2. gpt-4o: 0.420
data/taubench_airline/summary_table.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2069433760a247d954a2494c6c7d07077536568abd6552eb6d31599e9eccec04
3
+ size 117991
data/taubench_airline/summary_table.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e95e94101ca9ad42238de760b611b92ad6571da6024417ee73d9dab3627c7fe
3
+ size 71375
data/taubench_airline/validated_properties.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e1d94d31a16129055c26c98b5568b115df2a8b70b2259e44043904f75bb1954
3
+ size 3047869
data/taubench_airline/validation_stats.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:393161bea62a99be4ebe21fd1045f6c75c1c69bfb27f2b5499fbf89eb15973e3
3
+ size 137