Ayanami0730 commited on
Commit
927e909
·
1 Parent(s): 0abc04f

Add DeepResearch Bench application with LFS support

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. .gitignore +11 -0
  3. Dockerfile +11 -0
  4. README.md +25 -12
  5. __pycache__/create_leaderboard.cpython-38.pyc +0 -0
  6. __pycache__/create_leaderboard.cpython-39.pyc +0 -0
  7. __pycache__/gradio.cpython-310.pyc +0 -0
  8. __pycache__/gradio.cpython-39.pyc +0 -0
  9. app.py +16 -0
  10. create_leaderboard.py +91 -0
  11. data/data_viewer.jsonl +3 -0
  12. data/leaderboard.csv +17 -0
  13. data/raw_data/claude-3-5-sonnet-with-search.jsonl +3 -0
  14. data/raw_data/claude-3-7-sonnet-with-search.jsonl +3 -0
  15. data/raw_data/gemini-2.5-flash-with-grounding.jsonl +3 -0
  16. data/raw_data/gemini-2.5-pro-deepresearch.jsonl +3 -0
  17. data/raw_data/gemini-2.5-pro-with-grounding.jsonl +3 -0
  18. data/raw_data/gpt-4.1-mini-with-search.jsonl +3 -0
  19. data/raw_data/gpt-4.1-with-search.jsonl +3 -0
  20. data/raw_data/gpt-4o-mini-search-preview.jsonl +3 -0
  21. data/raw_data/gpt-4o-search-preview.jsonl +3 -0
  22. data/raw_data/grok-deeper-search.jsonl +3 -0
  23. data/raw_data/openai-deepresearch.jsonl +3 -0
  24. data/raw_data/perplexity-Research.jsonl +3 -0
  25. data/raw_data/perplexity-sonar-pro.jsonl +3 -0
  26. data/raw_data/perplexity-sonar-reasoning-pro.jsonl +3 -0
  27. data/raw_data/perplexity-sonar-reasoning.jsonl +3 -0
  28. data/raw_data/perplexity-sonar.jsonl +3 -0
  29. data/raw_results/claude-3-5-sonnet-with-search.jsonl +3 -0
  30. data/raw_results/claude-3-7-sonnet-with-search.jsonl +3 -0
  31. data/raw_results/gemini-2.5-flash-with-grounding.jsonl +3 -0
  32. data/raw_results/gemini-2.5-pro-deepresearch.jsonl +3 -0
  33. data/raw_results/gemini-2.5-pro-with-grounding.jsonl +3 -0
  34. data/raw_results/gpt-4.1-mini-with-search.jsonl +3 -0
  35. data/raw_results/gpt-4.1-with-search.jsonl +3 -0
  36. data/raw_results/gpt-4o-mini-search-preview.jsonl +3 -0
  37. data/raw_results/gpt-4o-search-preview.jsonl +3 -0
  38. data/raw_results/grok-deeper-search.jsonl +3 -0
  39. data/raw_results/openai-deepresearch.jsonl +3 -0
  40. data/raw_results/perplexity-Research.jsonl +3 -0
  41. data/raw_results/perplexity-sonar-pro.jsonl +3 -0
  42. data/raw_results/perplexity-sonar-reasoning-pro.jsonl +3 -0
  43. data/raw_results/perplexity-sonar-reasoning.jsonl +3 -0
  44. data/raw_results/perplexity-sonar.jsonl +3 -0
  45. requirements.txt +6 -0
  46. tabs/__pycache__/data_viewer_side_by_side_tab.cpython-38.pyc +0 -0
  47. tabs/__pycache__/data_viewer_side_by_side_tab.cpython-39.pyc +0 -0
  48. tabs/__pycache__/data_viewer_tab.cpython-38.pyc +0 -0
  49. tabs/__pycache__/data_viewer_tab.cpython-39.pyc +0 -0
  50. tabs/__pycache__/leaderboard_tab.cpython-38.pyc +0 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.jsonl filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ .env
5
+ .venv
6
+ env/
7
+ venv/
8
+ ENV/
9
+ .DS_Store
10
+ *.log
11
+ data/data_viewer.jsonl
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /code
4
+
5
+ COPY . /code/
6
+
7
+ RUN pip install --no-cache-dir --upgrade pip && \
8
+ pip install --no-cache-dir -r requirements.txt
9
+
10
+ # 默认运行命令
11
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,12 +1,25 @@
1
- ---
2
- title: DeepResearch Bench
3
- emoji: 🏆
4
- colorFrom: green
5
- colorTo: red
6
- sdk: gradio
7
- sdk_version: 5.31.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # DeepResearch Bench
2
+
3
+ **DeepResearch Bench: A Comprehensive Benchmark for Deep Research Agents**
4
+
5
+ This application showcases comprehensive evaluation results for Deep Research Agents. The app includes:
6
+
7
+ - 🏆 **Leaderboard** - View overall performance metrics across all evaluated models
8
+ - 🔍 **Data Viewer** - Explore detailed results for individual research tasks
9
+ - 📊 **Side-by-Side Comparison** - Compare different models' responses to the same research questions
10
+
11
+ Visit our [project website](https://deepresearch-bench.github.io) for more information.
12
+
13
+ ## Citation
14
+ ```bibtex
15
+ @article{du2025deepresearch,
16
+ author = {Mingxuan Du and Benfeng Xu and Chiwei Zhu and Xiaorui Wang and Zhendong Mao},
17
+ title = {DeepResearch Bench: A Comprehensive Benchmark for Deep Research Agents},
18
+ journal = {arXiv preprint},
19
+ year = {2025},
20
+ }
21
+ ```
22
+
23
+ ## Hugging Face Space Details
24
+ - SDK: Gradio
25
+ - SDK Version: 3.50.0
__pycache__/create_leaderboard.cpython-38.pyc ADDED
Binary file (2.35 kB). View file
 
__pycache__/create_leaderboard.cpython-39.pyc ADDED
Binary file (2.52 kB). View file
 
__pycache__/gradio.cpython-310.pyc ADDED
Binary file (422 Bytes). View file
 
__pycache__/gradio.cpython-39.pyc ADDED
Binary file (420 Bytes). View file
 
app.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ DeepResearch Bench HF Space 入口文件
5
+ """
6
+
7
+ from __future__ import annotations
8
+ from create_leaderboard import demo
9
+
10
+ # 在Hugging Face Space中运行
11
+ if __name__ == "__main__":
12
+ demo.launch(
13
+ server_name="0.0.0.0", # 必须这样设置以允许外部访问
14
+ share=False, # HF Space 自己有分享功能,无需额外分享
15
+ show_api=False, # 隐藏API文档页面
16
+ )
create_leaderboard.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Gradio UI – v2.1 (Leaderboard · Data Viewer · Prompt-to-Leaderboard)
5
+ """
6
+
7
+ from __future__ import annotations
8
+ from pathlib import Path
9
+ import gradio as gr
10
+
11
+ # ---- Tab 组件 ----
12
+ from tabs.leaderboard_tab import create_leaderboard_tab
13
+ from tabs.data_viewer_tab import create_data_viewer_tab
14
+ from tabs.data_viewer_side_by_side_tab import create_data_viewer_side_by_side_tab
15
+
16
+ # ---------------------------------------------------------------------------
17
+ # UI
18
+ # ---------------------------------------------------------------------------
19
+
20
+ with gr.Blocks(title="DeepResearch Bench") as demo:
21
+
22
+ # ========= 全局 CSS(仅作用于自定义标题 & 简介) =========
23
+ gr.HTML("""
24
+ <style>
25
+ .title-block{
26
+ /* 渐变文字效果 - 改进版 */
27
+ background: linear-gradient(to right, #009CFF, #823AFF);
28
+ background: -webkit-linear-gradient(to right, #009CFF, #823AFF);
29
+ background: -moz-linear-gradient(to right, #009CFF, #823AFF);
30
+ -webkit-background-clip: text;
31
+ -webkit-text-fill-color: transparent;
32
+ background-clip: text;
33
+ color: transparent;
34
+
35
+ text-align: center;
36
+ font-size: 2.1rem;
37
+ font-weight: 700;
38
+ margin: 0 0 1rem 0;
39
+ padding-bottom: 0.2rem;
40
+ display: inline-block; /* 重要:确保渐变效果正常 */
41
+ width: 100%; /* 确保居中对齐 */
42
+ }
43
+ .intro-block{
44
+ text-align:center;
45
+ margin-bottom:1.25rem;
46
+ line-height:2;
47
+ }
48
+ .intro-block a{
49
+ color:#0a58ca;
50
+ text-decoration:none;
51
+ margin:0 .3rem;
52
+ }
53
+ .intro-block a:hover{ text-decoration:underline; }
54
+ </style>
55
+ """)
56
+
57
+ # ========= 顶部标题 & 简介(不使用 Markdown 标题语法) =========
58
+ gr.HTML("""
59
+ <div class="title-block">
60
+ DeepResearch Bench: A Comprehensive Benchmark for Deep Research Agents
61
+ </div>
62
+
63
+ <div class="intro-block">
64
+ The research aims to comprehensively evaluate the capabilities of Deep Research Agents.<br>
65
+ <a href="#">Code</a> |
66
+ <a href="#">Website</a> |
67
+ <a href="#">Paper</a> |
68
+ <a href="#">Eval Dataset</a> |
69
+ Total models: 16 | Last Update: 28 May 2025
70
+ </div>
71
+ """)
72
+
73
+ # ========= 主 Tabs =========
74
+ with gr.Tabs():
75
+ create_leaderboard_tab() # 🏆 Leaderboard
76
+ create_data_viewer_tab() # 🔍 Data Viewer
77
+ create_data_viewer_side_by_side_tab()
78
+
79
+ with gr.Tab("💬Prompt-to-Leaderboard"):
80
+ gr.Markdown(
81
+ """
82
+ 🚧 **Prompt-to-Leaderboard** module not implemented yet.
83
+ Planned: inspect how individual prompts affect overall model ranking.
84
+ """
85
+ )
86
+
87
+ # ---------------------------------------------------------------------------
88
+ # Entrypoint
89
+ # ---------------------------------------------------------------------------
90
+ if __name__ == "__main__":
91
+ demo.launch()
data/data_viewer.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7ab11f250f4ffd6bf9c74ff8dc1e68f86d7abbf4f6319164bb476177ad7bf6e
3
+ size 28044256
data/leaderboard.csv ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model,overall_score,comprehensiveness,insight,instruction_following,readability,citation_accuracy,effective_citations
2
+ gemini-2.5-pro-deepresearch,48.88,48.53,48.50,49.18,49.44,81.44,111.21
3
+ openai-deepresearch,46.98,46.87,45.25,49.27,47.14,77.96,40.79
4
+ perplexity-Research,42.25,40.69,39.39,46.40,44.28,90.24,31.26
5
+ claude-3-7-sonnet-with-search,40.67,38.99,37.66,45.77,41.46,93.68,32.48
6
+ grok-deeper-search,40.24,37.97,35.37,46.30,44.05,83.59,8.15
7
+ perplexity-sonar-reasoning-pro,40.22,37.38,36.11,45.66,44.74,39.36,8.35
8
+ perplexity-sonar-reasoning,40.18,37.14,36.73,45.15,44.35,48.67,11.34
9
+ perplexity-sonar-pro,38.93,36.38,34.26,44.70,43.35,78.66,14.74
10
+ gemini-2.5-pro-with-grounding,35.12,34.06,29.79,41.67,37.16,81.81,32.88
11
+ gpt-4o-search-preview,35.10,31.99,27.57,43.17,41.23,88.41,4.79
12
+ perplexity-sonar,34.54,30.95,27.51,42.33,41.60,74.42,8.67
13
+ gpt-4.1-with-search,33.46,29.42,25.38,42.33,40.77,87.83,4.42
14
+ gemini-2.5-flash-preview-04-17,32.39,31.63,26.73,38.82,34.48,81.92,31.08
15
+ gpt-4o-mini-search-preview,31.55,27.38,22.64,40.67,39.91,84.98,4.95
16
+ gpt-4.1-mini-with-search,30.26,26.05,20.75,39.65,39.33,84.58,4.35
17
+ claude-3-5-sonnet-with-search,28.48,24.82,22.82,35.12,35.08,94.04,9.78
data/raw_data/claude-3-5-sonnet-with-search.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8769fc2e0cf4f059da6e34839f9df09a6fdab9e2872faa467eafa1aa42316a69
3
+ size 505860
data/raw_data/claude-3-7-sonnet-with-search.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc16f997d3ecd09bccf6d9e756d9ad36d2834d2ed0827b8f39579b6321b98837
3
+ size 2281964
data/raw_data/gemini-2.5-flash-with-grounding.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:908295fc145ac2f8833396b56eac8913726d93288ad6b93a9c01a69cbdbbf78a
3
+ size 1016172
data/raw_data/gemini-2.5-pro-deepresearch.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33c5d28e76595f22fae1b0fbbe2700958bfe707dafe53f7c5842d3067ccfddef
3
+ size 8523353
data/raw_data/gemini-2.5-pro-with-grounding.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b409b1031cff2876fd20cd8e9fc95501f2a95ad0154d3634b6538c165373447
3
+ size 1050267
data/raw_data/gpt-4.1-mini-with-search.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf809806c294364bb45cb337355d360b0e5e023c8e4ffdbf9557880a02137bab
3
+ size 463012
data/raw_data/gpt-4.1-with-search.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0228624b09e9d6c25c72156f4dd7f5702e3adcdd71a1f309094c2913eb50639
3
+ size 492406
data/raw_data/gpt-4o-mini-search-preview.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd49e75b1e7eb6ff40cd4c030032459d727987dd298863b488b9657ae18815a1
3
+ size 541532
data/raw_data/gpt-4o-search-preview.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfb9de873345d6789197013f0cd60fb2d888957bc123447f2a8486e81c296f04
3
+ size 565183
data/raw_data/grok-deeper-search.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea6428dcf2e729d84f019c302fb3862a85cefbea08282b5ffcc5c400306ab077
3
+ size 1149933
data/raw_data/openai-deepresearch.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77d31b8ea1abd9aa8e924303451dc6a0f334f2e9d4d61ec71847c4db004ac62a
3
+ size 6903938
data/raw_data/perplexity-Research.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f27cb31cbab84f60efc3286592e84690fd117355dd84f9e4a9299108245c2a5
3
+ size 1747979
data/raw_data/perplexity-sonar-pro.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d577c0a208b35eb2c0454c00c70b12759cd8a1687f730f2133d8f392c1831ee
3
+ size 750234
data/raw_data/perplexity-sonar-reasoning-pro.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e08c6c4094bbf0aa1749e7b1a45e856a6635b2df6afdf0de8eeafea99e7477fc
3
+ size 495156
data/raw_data/perplexity-sonar-reasoning.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5aecbee30882b3ccd2d65470526fe48c7c016869f00593933a35e7096fe4fb74
3
+ size 659883
data/raw_data/perplexity-sonar.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc0ef26282e404b700d56e158644f44228c49a3d5126fa12c8068e053444131e
3
+ size 574856
data/raw_results/claude-3-5-sonnet-with-search.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0c47d1bab126886420bd53bb41a8905cdfb97f105711bcc2f5a27e3d53652ea
3
+ size 1992421
data/raw_results/claude-3-7-sonnet-with-search.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b3a6bf74400c89d24fa47853ab034ed3696ee0694c2d190ba83c3f5dcd8a0ef
3
+ size 2002379
data/raw_results/gemini-2.5-flash-with-grounding.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43b9f71819babb5c00f65f0dd71d707323fb803c585bd74976f49cdc34ab80aa
3
+ size 1951481
data/raw_results/gemini-2.5-pro-deepresearch.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac2fc53c99697e3276c98d735ed630df6fa49d2972c70a5409adc1958ecaa7b7
3
+ size 1937730
data/raw_results/gemini-2.5-pro-with-grounding.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e911a18cf8b8a8207eb45584ac650e4640f79db7352055ca5e92356de37f911
3
+ size 1944815
data/raw_results/gpt-4.1-mini-with-search.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:948a403d12bcf6b0e3ce6664f83afeb95413684ab0b7912003ed756a4df15c5e
3
+ size 1992345
data/raw_results/gpt-4.1-with-search.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:908a5989af337e381bf2bce6795438edd21966f313b5194f532feb1f47e5b812
3
+ size 2090582
data/raw_results/gpt-4o-mini-search-preview.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4277a9a91fcdaaeff1afe948c1088095d5f01092404fcd1a62407b7a58b7906e
3
+ size 2074673
data/raw_results/gpt-4o-search-preview.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7adcd70d49d3b5dd6050201aa4fcd31f51288945f4a23de14432a301cbf295a7
3
+ size 2063854
data/raw_results/grok-deeper-search.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b19fb7ec93872317eae94abeb02ed9c19912057acfa82600167ca853b750f476
3
+ size 1968989
data/raw_results/openai-deepresearch.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae45c25f5b5c56a772331543e4eefe7c80e63f33b441dfe83cb4a5c830c88a35
3
+ size 2007501
data/raw_results/perplexity-Research.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7715271d17cc344873653464ae3fef884e0f3c6bec89deee347ed7a0651beb9
3
+ size 2030483
data/raw_results/perplexity-sonar-pro.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a453f5b29492f684f53364121e7c79eeb81aee2737a383e2748830a4e4453afb
3
+ size 1975770
data/raw_results/perplexity-sonar-reasoning-pro.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:669a4a14232c63c716de766af7be050f8712f74a6d5437cc8fa637ded39f3c40
3
+ size 1957092
data/raw_results/perplexity-sonar-reasoning.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bceb5637a9d0092af5ddcca49557a4f8f3604be9ebb430be32e820fa4d6723b3
3
+ size 1951258
data/raw_results/perplexity-sonar.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36ecd1540447863f66bfe1a43905070f9c9b0d40de803348c3450a396df3d8fc
3
+ size 2016838
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio>=3.50.0
2
+ pandas
3
+ numpy
4
+ plotly
5
+ pathlib
6
+ requests
tabs/__pycache__/data_viewer_side_by_side_tab.cpython-38.pyc ADDED
Binary file (8.31 kB). View file
 
tabs/__pycache__/data_viewer_side_by_side_tab.cpython-39.pyc ADDED
Binary file (8.41 kB). View file
 
tabs/__pycache__/data_viewer_tab.cpython-38.pyc ADDED
Binary file (6.95 kB). View file
 
tabs/__pycache__/data_viewer_tab.cpython-39.pyc ADDED
Binary file (6.98 kB). View file
 
tabs/__pycache__/leaderboard_tab.cpython-38.pyc ADDED
Binary file (3.2 kB). View file