David Pomerenke commited on
Commit
3dc9ba2
·
1 Parent(s): 9983b5f

Add GH Action for nightly evals

Browse files
.github/workflows/huggingface-upload.yml CHANGED
@@ -3,6 +3,8 @@ name: Upload to Hugging Face
3
  on:
4
  push:
5
  branches: [ main, master ]
 
 
6
 
7
  jobs:
8
  upload:
@@ -23,7 +25,7 @@ jobs:
23
  - name: Upload to Hugging Face
24
  env:
25
  HUGGINGFACE_ACCESS_TOKEN: ${{ secrets.HUGGINGFACE_ACCESS_TOKEN }}
26
- COMMIT_MESSAGE: ${{ github.event.head_commit.message }}
27
  run: |
28
  python -c '
29
  from huggingface_hub import upload_folder
 
3
  on:
4
  push:
5
  branches: [ main, master ]
6
+ repository_dispatch:
7
+ types: [trigger-hf-upload]
8
 
9
  jobs:
10
  upload:
 
25
  - name: Upload to Hugging Face
26
  env:
27
  HUGGINGFACE_ACCESS_TOKEN: ${{ secrets.HUGGINGFACE_ACCESS_TOKEN }}
28
+ COMMIT_MESSAGE: ${{ github.event.head_commit.message || 'Update from nightly evaluation' }}
29
  run: |
30
  python -c '
31
  from huggingface_hub import upload_folder
.github/workflows/nightly-evals.yml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Nightly Evaluation Run
2
+
3
+ on:
4
+ schedule:
5
+ - cron: '0 3 * * *' # Run at 3am UTC every day
6
+ workflow_dispatch: # Allow manual triggering
7
+
8
+ jobs:
9
+ run-evals:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v3
13
+
14
+ - name: Set up Python
15
+ uses: actions/setup-python@v4
16
+ with:
17
+ python-version: '3.9'
18
+
19
+ - name: Install dependencies
20
+ run: |
21
+ python -m pip install --upgrade pip
22
+ pip install -r requirements.txt
23
+
24
+ - name: Run evaluations
25
+ env:
26
+ OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
27
+ run: |
28
+ cd evals
29
+ python main.py
30
+
31
+ - name: Commit changes
32
+ run: |
33
+ git config --local user.email "github-actions[bot]@users.noreply.github.com"
34
+ git config --local user.name "github-actions[bot]"
35
+ git add results.json models.json languages.json
36
+ git commit -m "Update evaluation results [skip ci]" || echo "No changes to commit"
37
+ git push
38
+
39
+ - name: Trigger Hugging Face upload
40
+ if: success()
41
+ uses: peter-evans/repository-dispatch@v2
42
+ with:
43
+ event-type: trigger-hf-upload
44
+ token: ${{ secrets.GITHUB_TOKEN }}
requirements.txt CHANGED
@@ -1,168 +1,55 @@
1
  # This file was autogenerated by uv via the following command:
2
  # uv pip compile pyproject.toml -o requirements.txt
3
- aiofiles==23.2.1
4
- # via gradio
5
  annotated-types==0.7.0
6
  # via pydantic
7
  anyio==4.8.0
8
- # via
9
- # gradio
10
- # httpx
11
- # starlette
12
- certifi==2025.1.31
13
- # via
14
- # httpcore
15
- # httpx
16
- # requests
17
- charset-normalizer==3.4.1
18
- # via requests
19
  click==8.1.8
20
- # via
21
- # typer
22
- # uvicorn
23
  exceptiongroup==1.2.2
24
  # via anyio
25
  fastapi==0.115.8
26
- # via gradio
27
- ffmpy==0.5.0
28
- # via gradio
29
- filelock==3.17.0
30
- # via huggingface-hub
31
- fsspec==2025.2.0
32
- # via
33
- # gradio-client
34
- # huggingface-hub
35
- gradio==5.16.2
36
- # via
37
- # languagebench (pyproject.toml)
38
- # gradio-rangeslider
39
- gradio-client==1.7.1
40
- # via gradio
41
- gradio-rangeslider==0.0.8
42
  # via languagebench (pyproject.toml)
43
  h11==0.14.0
44
- # via
45
- # httpcore
46
- # uvicorn
47
- httpcore==1.0.7
48
- # via httpx
49
- httpx==0.28.1
50
- # via
51
- # gradio
52
- # gradio-client
53
- # safehttpx
54
- huggingface-hub==0.29.1
55
- # via
56
- # gradio
57
- # gradio-client
58
  idna==3.10
59
- # via
60
- # anyio
61
- # httpx
62
- # requests
63
- jinja2==3.1.5
64
- # via gradio
65
  language-data==1.3.0
66
  # via languagebench (pyproject.toml)
67
  marisa-trie==1.2.1
68
  # via language-data
69
- markdown-it-py==3.0.0
70
- # via rich
71
- markupsafe==2.1.5
72
- # via
73
- # gradio
74
- # jinja2
75
- mdurl==0.1.2
76
- # via markdown-it-py
77
- narwhals==1.27.1
78
- # via plotly
79
  numpy==2.2.3
80
  # via
81
- # gradio
82
  # pandas
83
- orjson==3.10.15
84
- # via gradio
85
- packaging==24.2
86
- # via
87
- # gradio
88
- # gradio-client
89
- # huggingface-hub
90
- # plotly
91
  pandas==2.2.3
92
- # via
93
- # languagebench (pyproject.toml)
94
- # gradio
95
- pillow==11.1.0
96
- # via gradio
97
- plotly==6.0.0
98
- # via languagebench (pyproject.toml)
99
- pycountry==24.6.1
100
  # via languagebench (pyproject.toml)
101
  pydantic==2.10.6
102
- # via
103
- # fastapi
104
- # gradio
105
  pydantic-core==2.27.2
106
  # via pydantic
107
- pydub==0.25.1
108
- # via gradio
109
- pygments==2.19.1
110
- # via rich
111
  python-dateutil==2.9.0.post0
112
  # via pandas
113
- python-multipart==0.0.20
114
- # via gradio
115
  pytz==2025.1
116
  # via pandas
117
- pyyaml==6.0.2
118
- # via
119
- # gradio
120
- # huggingface-hub
121
- requests==2.32.3
122
- # via huggingface-hub
123
- rich==13.9.4
124
- # via typer
125
- ruff==0.9.7
126
- # via gradio
127
- safehttpx==0.1.6
128
- # via gradio
129
- semantic-version==2.10.0
130
- # via gradio
131
  setuptools==75.8.2
132
  # via marisa-trie
133
- shellingham==1.5.4
134
- # via typer
135
  six==1.17.0
136
  # via python-dateutil
137
  sniffio==1.3.1
138
  # via anyio
139
  starlette==0.45.3
140
- # via
141
- # fastapi
142
- # gradio
143
- tomlkit==0.13.2
144
- # via gradio
145
- tqdm==4.67.1
146
- # via huggingface-hub
147
- typer==0.15.1
148
- # via gradio
149
  typing-extensions==4.12.2
150
  # via
151
  # anyio
152
  # fastapi
153
- # gradio
154
- # gradio-client
155
- # huggingface-hub
156
  # pydantic
157
  # pydantic-core
158
- # rich
159
- # typer
160
  # uvicorn
161
  tzdata==2025.1
162
  # via pandas
163
- urllib3==2.3.0
164
- # via requests
165
  uvicorn==0.34.0
166
- # via gradio
167
- websockets==14.2
168
- # via gradio-client
 
1
  # This file was autogenerated by uv via the following command:
2
  # uv pip compile pyproject.toml -o requirements.txt
 
 
3
  annotated-types==0.7.0
4
  # via pydantic
5
  anyio==4.8.0
6
+ # via starlette
 
 
 
 
 
 
 
 
 
 
7
  click==8.1.8
8
+ # via uvicorn
 
 
9
  exceptiongroup==1.2.2
10
  # via anyio
11
  fastapi==0.115.8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  # via languagebench (pyproject.toml)
13
  h11==0.14.0
14
+ # via uvicorn
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  idna==3.10
16
+ # via anyio
17
+ joblib==1.5.0
18
+ # via languagebench (pyproject.toml)
 
 
 
19
  language-data==1.3.0
20
  # via languagebench (pyproject.toml)
21
  marisa-trie==1.2.1
22
  # via language-data
 
 
 
 
 
 
 
 
 
 
23
  numpy==2.2.3
24
  # via
25
+ # languagebench (pyproject.toml)
26
  # pandas
 
 
 
 
 
 
 
 
27
  pandas==2.2.3
 
 
 
 
 
 
 
 
28
  # via languagebench (pyproject.toml)
29
  pydantic==2.10.6
30
+ # via fastapi
 
 
31
  pydantic-core==2.27.2
32
  # via pydantic
 
 
 
 
33
  python-dateutil==2.9.0.post0
34
  # via pandas
 
 
35
  pytz==2025.1
36
  # via pandas
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  setuptools==75.8.2
38
  # via marisa-trie
 
 
39
  six==1.17.0
40
  # via python-dateutil
41
  sniffio==1.3.1
42
  # via anyio
43
  starlette==0.45.3
44
+ # via fastapi
 
 
 
 
 
 
 
 
45
  typing-extensions==4.12.2
46
  # via
47
  # anyio
48
  # fastapi
 
 
 
49
  # pydantic
50
  # pydantic-core
 
 
51
  # uvicorn
52
  tzdata==2025.1
53
  # via pandas
 
 
54
  uvicorn==0.34.0
55
+ # via languagebench (pyproject.toml)