Commit
·
4dcba74
1
Parent(s):
3489875
add '-'
Browse files- app.py +2 -2
- content.py +6 -4
app.py
CHANGED
@@ -12,10 +12,10 @@ from content import format_error, format_warning, format_log, TITLE, LINKS, INTR
|
|
12 |
|
13 |
TOKEN = os.environ.get("TOKEN", None)
|
14 |
|
15 |
-
OWNER="Online
|
16 |
# api = HfApi()
|
17 |
|
18 |
-
YEAR_VERSION = "
|
19 |
|
20 |
LOCAL_DEBUG = True
|
21 |
|
|
|
12 |
|
13 |
TOKEN = os.environ.get("TOKEN", None)
|
14 |
|
15 |
+
OWNER="Online-Mind2Web"
|
16 |
# api = HfApi()
|
17 |
|
18 |
+
YEAR_VERSION = "2025"
|
19 |
|
20 |
LOCAL_DEBUG = True
|
21 |
|
content.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
TITLE = """<h1 align="center" id="space-title">🏆 Online
|
2 |
LINKS = """
|
3 |
<div align="center">
|
4 |
<a href="https://tiancixue.notion.site/An-Illusion-of-Progress-Assessing-the-Current-State-of-Web-Agents-1ac6cd2b9aac80719cd6f68374aaf4b4?pvs=4">Blog</a> |
|
@@ -9,7 +9,7 @@ LINKS = """
|
|
9 |
"""
|
10 |
|
11 |
INTRODUCTION_TEXT = """
|
12 |
-
Online
|
13 |
Based on the number of steps required by human annotators, tasks are divided into three difficulty levels: Easy (1–5 steps), Medium (6–10 steps), and Hard (11+ steps).
|
14 |
"""
|
15 |
|
@@ -53,6 +53,7 @@ CITATION_BUTTON_TEXT = r"""
|
|
53 |
|
54 |
SUBMIT_INTRODUCTION = """
|
55 |
## ⚠ Please submit the trajectory file with the following format:
|
|
|
56 |
Each task is stored in a folder named after its `task_id`, containing:
|
57 |
|
58 |
- `trajectory/`: Stores screenshots of each step.
|
@@ -79,10 +80,11 @@ main_directory/
|
|
79 |
```
|
80 |
Please send your agent's name, model family, and organization via email to [email protected], along with the trajectory directory attached.
|
81 |
|
82 |
-
|
|
|
83 |
|
84 |
"""
|
85 |
-
DATA_DATASET = """## More Statistics for Online
|
86 |
"""
|
87 |
|
88 |
|
|
|
1 |
+
TITLE = """<h1 align="center" id="space-title">🏆 Online-Mind2Web Leaderboard</h1>"""
|
2 |
LINKS = """
|
3 |
<div align="center">
|
4 |
<a href="https://tiancixue.notion.site/An-Illusion-of-Progress-Assessing-the-Current-State-of-Web-Agents-1ac6cd2b9aac80719cd6f68374aaf4b4?pvs=4">Blog</a> |
|
|
|
9 |
"""
|
10 |
|
11 |
INTRODUCTION_TEXT = """
|
12 |
+
Online-Mind2Web is a benchmark designed to evaluate the real-world performance of web agents on live websites, featuring 300 tasks across 136 popular sites in diverse domains.
|
13 |
Based on the number of steps required by human annotators, tasks are divided into three difficulty levels: Easy (1–5 steps), Medium (6–10 steps), and Hard (11+ steps).
|
14 |
"""
|
15 |
|
|
|
53 |
|
54 |
SUBMIT_INTRODUCTION = """
|
55 |
## ⚠ Please submit the trajectory file with the following format:
|
56 |
+
|
57 |
Each task is stored in a folder named after its `task_id`, containing:
|
58 |
|
59 |
- `trajectory/`: Stores screenshots of each step.
|
|
|
80 |
```
|
81 |
Please send your agent's name, model family, and organization via email to [email protected], along with the trajectory directory attached.
|
82 |
|
83 |
+
Here is an [example](https://github.com/OSU-NLP-Group/Online-Mind2Web/tree/main/data/example/fb7b4f784cfde003e2548fdf4e8d6b4f) of the format. We encourage you to use the script provided in our GitHub repository to obtain evaluation results and submit them. To ensure the authenticity and reliability of the reported results, we will also conduct a verification.
|
84 |
+
If you have conducted your own human evaluation, please also attach your human eval results—we will spot-check these before adding them to the human-eval table.
|
85 |
|
86 |
"""
|
87 |
+
DATA_DATASET = """## More Statistics for Online-Mind2Web Benchmark
|
88 |
"""
|
89 |
|
90 |
|