diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..e2fdd3c78bfef7d609c3b755310e6439737447f2 100644 --- a/.gitattributes +++ b/.gitattributes @@ -25,7 +25,6 @@ *.safetensors filter=lfs diff=lfs merge=lfs -text saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.tar.* filter=lfs diff=lfs merge=lfs -text -*.tar filter=lfs diff=lfs merge=lfs -text *.tflite filter=lfs diff=lfs merge=lfs -text *.tgz filter=lfs diff=lfs merge=lfs -text *.wasm filter=lfs diff=lfs merge=lfs -text @@ -33,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +scale-hf-logo.png filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..a8de302f89b0af22888ac234e3b0d1a6f208a591 --- /dev/null +++ b/.gitignore @@ -0,0 +1,15 @@ +auto_evals/ +venv/ +__pycache__/ +.env +.ipynb_checkpoints +*ipynb +.vscode/ + +#eval-queue/ +#eval-results/ +#eval-queue-bk/ +#eval-results-bk/ +logs/ +.idea/ + diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0710dad252bda2ac9fd5b7e4e2e4dc0afeff43cf --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,53 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +default_language_version: + python: python3 + +ci: + autofix_prs: true + autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions' + autoupdate_schedule: quarterly + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.3.0 + hooks: + - id: check-yaml + - id: check-case-conflict + - id: detect-private-key + - id: check-added-large-files + args: ['--maxkb=1000'] + - id: requirements-txt-fixer + - id: end-of-file-fixer + - id: trailing-whitespace + + - repo: https://github.com/PyCQA/isort + rev: 5.12.0 + hooks: + - id: isort + name: Format imports + + - repo: https://github.com/psf/black + rev: 22.12.0 + hooks: + - id: black + name: Format code + additional_dependencies: ['click==8.0.2'] + + - repo: https://github.com/charliermarsh/ruff-pre-commit + # Ruff version. + rev: 'v0.0.267' + hooks: + - id: ruff diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..b5685772804c8af4235a8504dc6752bfc9ae5d1d --- /dev/null +++ b/Makefile @@ -0,0 +1,13 @@ +.PHONY: style format + + +style: + python -m black --line-length 119 . + python -m isort . + ruff check --fix . + + +quality: + python -m black --check --line-length 119 . + python -m isort --check-only . + ruff check . diff --git a/README.md b/README.md index 44a674d5ac1dbe83021c43798376f3053504b5b9..c55e90b54a9280558aaadccb94f743cc2ee48b8e 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,12 @@ --- -title: OmniGenomeLeaderboard -emoji: 🐠 -colorFrom: indigo -colorTo: purple +title: OmniGenomeBench +emoji: 🥇 +colorFrom: green +colorTo: indigo sdk: gradio -sdk_version: 5.14.0 +sdk_version: 4.36.1 app_file: app.py -pinned: false -license: apache-2.0 +pinned: true +license: mit --- -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..7e18c3fa4b51124de631038d7ba61a77a8fa32c3 --- /dev/null +++ b/app.py @@ -0,0 +1,228 @@ +import gradio as gr +from apscheduler.schedulers.background import BackgroundScheduler +from gradio_leaderboard import ColumnFilter, Leaderboard, SelectColumns +from huggingface_hub import snapshot_download + +from src.about import ( + CITATION_BUTTON_LABEL, + CITATION_BUTTON_TEXT, + EVALUATION_QUEUE_TEXT, + INTRODUCTION_TEXT, + LLM_BENCHMARKS_TEXT, + TITLE, +) +from src.display.css_html_js import custom_css +from src.display.utils import ( + RGB_BENCHMARK_COLS, PGB_BENCHMARK_COLS, + GUE_BENCHMARK_COLS, GB_BENCHMARK_COLS, + RGB_COLS, PGB_COLS, GUE_COLS, GB_COLS, + EVAL_COLS, + EVAL_TYPES, + AutoEvalColumnRGB, AutoEvalColumnPGB, + AutoEvalColumnGUE, AutoEvalColumnGB, + ModelType, + Precision, + WeightType, + fields, +) +from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN +from src.populate import get_evaluation_queue_df, get_leaderboard_df +from src.submission.submit import add_new_eval + + +def restart_space(): + API.restart_space(repo_id=REPO_ID) + + +### Space initialisation +# try: +# print(EVAL_REQUESTS_PATH) +# snapshot_download( +# repo_id=QUEUE_REPO, +# local_dir=EVAL_REQUESTS_PATH, +# repo_type="dataset", +# tqdm_class=None, +# etag_timeout=30, +# token=TOKEN, +# ) +# except Exception: +# restart_space() +# try: +# print(EVAL_RESULTS_PATH) +# snapshot_download( +# repo_id=RESULTS_REPO, +# local_dir=EVAL_RESULTS_PATH, +# repo_type="dataset", +# tqdm_class=None, +# etag_timeout=30, +# token=TOKEN, +# ) +# except Exception: +# restart_space() + +RGB_LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH+"/RGB/", EVAL_REQUESTS_PATH+"/RGB/", RGB_COLS, RGB_BENCHMARK_COLS) +PGB_LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH+"/PGB/", EVAL_REQUESTS_PATH+"/PGB/", PGB_COLS, PGB_BENCHMARK_COLS) +GUE_LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH+"/GUE/", EVAL_REQUESTS_PATH+"/GUE/", GUE_COLS, GUE_BENCHMARK_COLS) +GB_LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH+"/GB/", EVAL_REQUESTS_PATH+"/GB/", GB_COLS, GB_BENCHMARK_COLS) + +( + finished_eval_queue_df, + running_eval_queue_df, + pending_eval_queue_df, +) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS) + + +def init_leaderboard(dataframe, AutoEvalColumn): + if dataframe is None or dataframe.empty: + raise ValueError("Leaderboard DataFrame is empty or None.") + return Leaderboard( + value=dataframe, + datatype=[c.type for c in fields(AutoEvalColumn)], + select_columns=SelectColumns( + default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default], + cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden], + label="Select Columns to Display:", + ), + search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name], + hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden], + filter_columns=[ + ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"), + ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"), + ColumnFilter( + AutoEvalColumn.params.name, + type="slider", + min=0, + max=2000, + label="Select the number of parameters (M)", + ), + # ColumnFilter( + # AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True + # ), + ], + # bool_checkboxgroup_label="Hide models", + # interactive=False, + ) + + +demo = gr.Blocks(css=custom_css) +with demo: + gr.HTML(TITLE) + gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") + + with gr.Tabs(elem_classes="tab-buttons") as tabs: + with gr.TabItem("RGB", elem_id="rgb-benchmark-tab-table", id=0): + leaderboard = init_leaderboard(RGB_LEADERBOARD_DF, AutoEvalColumnRGB) + + with gr.TabItem("PGB", elem_id="pgb-benchmark-tab-table", id=1): + leaderboard2 = init_leaderboard(PGB_LEADERBOARD_DF, AutoEvalColumnPGB) + + with gr.TabItem("GUE", elem_id="gue-benchmark-tab-table", id=2): + leaderboard3 = init_leaderboard(GUE_LEADERBOARD_DF, AutoEvalColumnGUE) + + with gr.TabItem("GB", elem_id="gb-benchmark-tab-table", id=3): + leaderboard4 = init_leaderboard(GB_LEADERBOARD_DF, AutoEvalColumnGB) + + with gr.TabItem("📝 About", elem_id="rgb-benchmark-tab-table", id=4): + gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") + + with gr.TabItem("🚀 Submit here! ", elem_id="rgb-benchmark-tab-table", id=5): + with gr.Column(): + with gr.Row(): + gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") + + with gr.Column(): + with gr.Accordion( + f"✅ Finished Evaluations ({len(finished_eval_queue_df)})", + open=False, + ): + with gr.Row(): + finished_eval_table = gr.components.Dataframe( + value=finished_eval_queue_df, + headers=EVAL_COLS, + datatype=EVAL_TYPES, + row_count=5, + ) + with gr.Accordion( + f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})", + open=False, + ): + with gr.Row(): + running_eval_table = gr.components.Dataframe( + value=running_eval_queue_df, + headers=EVAL_COLS, + datatype=EVAL_TYPES, + row_count=5, + ) + + with gr.Accordion( + f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})", + open=False, + ): + with gr.Row(): + pending_eval_table = gr.components.Dataframe( + value=pending_eval_queue_df, + headers=EVAL_COLS, + datatype=EVAL_TYPES, + row_count=5, + ) + with gr.Row(): + gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text") + + with gr.Row(): + with gr.Column(): + model_name_textbox = gr.Textbox(label="Model name") + revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main") + model_type = gr.Dropdown( + choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown], + label="Model type", + multiselect=False, + value=None, + interactive=True, + ) + + with gr.Column(): + precision = gr.Dropdown( + choices=[i.value.name for i in Precision if i != Precision.Unknown], + label="Precision", + multiselect=False, + value="float16", + interactive=True, + ) + weight_type = gr.Dropdown( + choices=[i.value.name for i in WeightType], + label="Weights type", + multiselect=False, + value="Original", + interactive=True, + ) + base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)") + + submit_button = gr.Button("Submit Eval") + submission_result = gr.Markdown() + submit_button.click( + add_new_eval, + [ + model_name_textbox, + base_model_name_textbox, + revision_name_textbox, + precision, + weight_type, + model_type, + ], + submission_result, + ) + + with gr.Row(): + with gr.Accordion("📙 Citation", open=False): + citation_button = gr.Textbox( + value=CITATION_BUTTON_TEXT, + label=CITATION_BUTTON_LABEL, + lines=20, + elem_id="citation-button", + show_copy_button=True, + ) + +scheduler = BackgroundScheduler() +scheduler.add_job(restart_space, "interval", seconds=1800) +scheduler.start() +demo.queue(default_concurrency_limit=40).launch() diff --git a/bak/eval-queue/.gitattributes b/bak/eval-queue/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..28df5f900b358436f0267334b3e3e9af33f917ba --- /dev/null +++ b/bak/eval-queue/.gitattributes @@ -0,0 +1,55 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.lz4 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +# Audio files - uncompressed +*.pcm filter=lfs diff=lfs merge=lfs -text +*.sam filter=lfs diff=lfs merge=lfs -text +*.raw filter=lfs diff=lfs merge=lfs -text +# Audio files - compressed +*.aac filter=lfs diff=lfs merge=lfs -text +*.flac filter=lfs diff=lfs merge=lfs -text +*.mp3 filter=lfs diff=lfs merge=lfs -text +*.ogg filter=lfs diff=lfs merge=lfs -text +*.wav filter=lfs diff=lfs merge=lfs -text +# Image files - uncompressed +*.bmp filter=lfs diff=lfs merge=lfs -text +*.gif filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.tiff filter=lfs diff=lfs merge=lfs -text +# Image files - compressed +*.jpg filter=lfs diff=lfs merge=lfs -text +*.jpeg filter=lfs diff=lfs merge=lfs -text +*.webp filter=lfs diff=lfs merge=lfs -text diff --git a/bak/eval-queue/GB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json b/bak/eval-queue/GB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..2471a5c9364821d4d97616d03ed65047aaac92fc --- /dev/null +++ b/bak/eval-queue/GB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 96, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/GB/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json b/bak/eval-queue/GB/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..005daf5291c7bd4b370b71fbda580cec7d169b80 --- /dev/null +++ b/bak/eval-queue/GB/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "LongSafari/hyenadna-large-1m-seqlen-hf", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 47, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/GB/README.md b/bak/eval-queue/GB/README.md new file mode 100644 index 0000000000000000000000000000000000000000..32897cd3e640101ba184f8c4ccd896981de3804a --- /dev/null +++ b/bak/eval-queue/GB/README.md @@ -0,0 +1,3 @@ +--- +license: mit +--- diff --git a/bak/eval-queue/GB/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json b/bak/eval-queue/GB/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..0ea7d38471486d3d8f8e0df988dfd653e456f51a --- /dev/null +++ b/bak/eval-queue/GB/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 7.73, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/GB/multimolecule/splicebert_eval_request_False_bfloat16_Original.json b/bak/eval-queue/GB/multimolecule/splicebert_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..658b35d504d33099cd421e629b53295fd3e20568 --- /dev/null +++ b/bak/eval-queue/GB/multimolecule/splicebert_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "multimolecule/splicebert", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 19.7, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/GB/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json b/bak/eval-queue/GB/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..7f886f5471d1d50db29f073ad7cd8ce2fd20c91e --- /dev/null +++ b/bak/eval-queue/GB/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "multimolecule/utrbert-4mer", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 86, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/GB/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json b/bak/eval-queue/GB/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..c429db5ae1ff77b18ac6e4f8f0c58dcc5f4892a4 --- /dev/null +++ b/bak/eval-queue/GB/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "yangheng/omnigenome-186M", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 186, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/GB/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json b/bak/eval-queue/GB/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..ce39c9bd5427e85973ceadcd79c009dbe2d8dc49 --- /dev/null +++ b/bak/eval-queue/GB/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "zhihan1996/DNABERT-2-117M", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 117, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/GUE/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json b/bak/eval-queue/GUE/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..2471a5c9364821d4d97616d03ed65047aaac92fc --- /dev/null +++ b/bak/eval-queue/GUE/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 96, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/GUE/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json b/bak/eval-queue/GUE/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..005daf5291c7bd4b370b71fbda580cec7d169b80 --- /dev/null +++ b/bak/eval-queue/GUE/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "LongSafari/hyenadna-large-1m-seqlen-hf", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 47, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/GUE/README.md b/bak/eval-queue/GUE/README.md new file mode 100644 index 0000000000000000000000000000000000000000..32897cd3e640101ba184f8c4ccd896981de3804a --- /dev/null +++ b/bak/eval-queue/GUE/README.md @@ -0,0 +1,3 @@ +--- +license: mit +--- diff --git a/bak/eval-queue/GUE/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json b/bak/eval-queue/GUE/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..0ea7d38471486d3d8f8e0df988dfd653e456f51a --- /dev/null +++ b/bak/eval-queue/GUE/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 7.73, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/GUE/multimolecule/splicebert_eval_request_False_bfloat16_Original.json b/bak/eval-queue/GUE/multimolecule/splicebert_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..658b35d504d33099cd421e629b53295fd3e20568 --- /dev/null +++ b/bak/eval-queue/GUE/multimolecule/splicebert_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "multimolecule/splicebert", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 19.7, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/GUE/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json b/bak/eval-queue/GUE/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..7f886f5471d1d50db29f073ad7cd8ce2fd20c91e --- /dev/null +++ b/bak/eval-queue/GUE/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "multimolecule/utrbert-4mer", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 86, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/GUE/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json b/bak/eval-queue/GUE/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..c429db5ae1ff77b18ac6e4f8f0c58dcc5f4892a4 --- /dev/null +++ b/bak/eval-queue/GUE/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "yangheng/omnigenome-186M", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 186, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/GUE/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json b/bak/eval-queue/GUE/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..ce39c9bd5427e85973ceadcd79c009dbe2d8dc49 --- /dev/null +++ b/bak/eval-queue/GUE/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "zhihan1996/DNABERT-2-117M", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 117, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/GleghornLab/cdsBERT_eval_request_False_bfloat16_Original.json b/bak/eval-queue/GleghornLab/cdsBERT_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..576be2fb71b8a4784aabcba74c89753ce508b229 --- /dev/null +++ b/bak/eval-queue/GleghornLab/cdsBERT_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "GleghornLab/cdsBERT", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 420, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/InstaDeepAI/agro-nucleotide-transformer-1b_eval_request_False_bfloat16_Original.json b/bak/eval-queue/InstaDeepAI/agro-nucleotide-transformer-1b_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..5082721283bbd97ebdb90d70d72fe7448cad8087 --- /dev/null +++ b/bak/eval-queue/InstaDeepAI/agro-nucleotide-transformer-1b_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "InstaDeepAI/agro-nucleotide-transformer-1b", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 985, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json b/bak/eval-queue/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..005daf5291c7bd4b370b71fbda580cec7d169b80 --- /dev/null +++ b/bak/eval-queue/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "LongSafari/hyenadna-large-1m-seqlen-hf", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 47, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/PGB/GleghornLab/cdsBERT_eval_request_False_bfloat16_Original.json b/bak/eval-queue/PGB/GleghornLab/cdsBERT_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..576be2fb71b8a4784aabcba74c89753ce508b229 --- /dev/null +++ b/bak/eval-queue/PGB/GleghornLab/cdsBERT_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "GleghornLab/cdsBERT", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 420, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/PGB/InstaDeepAI/agro-nucleotide-transformer-1b_eval_request_False_bfloat16_Original.json b/bak/eval-queue/PGB/InstaDeepAI/agro-nucleotide-transformer-1b_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..5082721283bbd97ebdb90d70d72fe7448cad8087 --- /dev/null +++ b/bak/eval-queue/PGB/InstaDeepAI/agro-nucleotide-transformer-1b_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "InstaDeepAI/agro-nucleotide-transformer-1b", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 985, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/PGB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json b/bak/eval-queue/PGB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..2471a5c9364821d4d97616d03ed65047aaac92fc --- /dev/null +++ b/bak/eval-queue/PGB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 96, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/PGB/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json b/bak/eval-queue/PGB/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..005daf5291c7bd4b370b71fbda580cec7d169b80 --- /dev/null +++ b/bak/eval-queue/PGB/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "LongSafari/hyenadna-large-1m-seqlen-hf", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 47, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/PGB/README.md b/bak/eval-queue/PGB/README.md new file mode 100644 index 0000000000000000000000000000000000000000..32897cd3e640101ba184f8c4ccd896981de3804a --- /dev/null +++ b/bak/eval-queue/PGB/README.md @@ -0,0 +1,3 @@ +--- +license: mit +--- diff --git a/bak/eval-queue/PGB/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json b/bak/eval-queue/PGB/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..0ea7d38471486d3d8f8e0df988dfd653e456f51a --- /dev/null +++ b/bak/eval-queue/PGB/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 7.73, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/PGB/multimolecule/rnabert_eval_request_False_bfloat16_Original.json b/bak/eval-queue/PGB/multimolecule/rnabert_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..026a555399655334d70dcb4fc55e56669ff8c00b --- /dev/null +++ b/bak/eval-queue/PGB/multimolecule/rnabert_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "multimolecule/rnabert", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 0.48, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/PGB/multimolecule/rnafm_eval_request_False_bfloat16_Original.json b/bak/eval-queue/PGB/multimolecule/rnafm_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..ea2ffc0dab27a753c373703fccfd7eb88ea57716 --- /dev/null +++ b/bak/eval-queue/PGB/multimolecule/rnafm_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "multimolecule/rnafm", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 99.52, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/PGB/multimolecule/rnamsm_eval_request_False_bfloat16_Original.json b/bak/eval-queue/PGB/multimolecule/rnamsm_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..7095daa6152aea1c8b87abe6e9e25c3ac3fb74ab --- /dev/null +++ b/bak/eval-queue/PGB/multimolecule/rnamsm_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "multimolecule/rnamsm", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 96.5, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/PGB/multimolecule/splicebert_eval_request_False_bfloat16_Original.json b/bak/eval-queue/PGB/multimolecule/splicebert_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..658b35d504d33099cd421e629b53295fd3e20568 --- /dev/null +++ b/bak/eval-queue/PGB/multimolecule/splicebert_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "multimolecule/splicebert", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 19.7, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/PGB/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json b/bak/eval-queue/PGB/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..7f886f5471d1d50db29f073ad7cd8ce2fd20c91e --- /dev/null +++ b/bak/eval-queue/PGB/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "multimolecule/utrbert-4mer", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 86, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/PGB/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json b/bak/eval-queue/PGB/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..c429db5ae1ff77b18ac6e4f8f0c58dcc5f4892a4 --- /dev/null +++ b/bak/eval-queue/PGB/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "yangheng/omnigenome-186M", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 186, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/PGB/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json b/bak/eval-queue/PGB/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..ce39c9bd5427e85973ceadcd79c009dbe2d8dc49 --- /dev/null +++ b/bak/eval-queue/PGB/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "zhihan1996/DNABERT-2-117M", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 117, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/README.md b/bak/eval-queue/README.md new file mode 100644 index 0000000000000000000000000000000000000000..32897cd3e640101ba184f8c4ccd896981de3804a --- /dev/null +++ b/bak/eval-queue/README.md @@ -0,0 +1,3 @@ +--- +license: mit +--- diff --git a/bak/eval-queue/RGB/GleghornLab/cdsBERT_eval_request_False_bfloat16_Original.json b/bak/eval-queue/RGB/GleghornLab/cdsBERT_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..576be2fb71b8a4784aabcba74c89753ce508b229 --- /dev/null +++ b/bak/eval-queue/RGB/GleghornLab/cdsBERT_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "GleghornLab/cdsBERT", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 420, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/RGB/InstaDeepAI/agro-nucleotide-transformer-1b_eval_request_False_bfloat16_Original.json b/bak/eval-queue/RGB/InstaDeepAI/agro-nucleotide-transformer-1b_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..5082721283bbd97ebdb90d70d72fe7448cad8087 --- /dev/null +++ b/bak/eval-queue/RGB/InstaDeepAI/agro-nucleotide-transformer-1b_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "InstaDeepAI/agro-nucleotide-transformer-1b", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 985, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/RGB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json b/bak/eval-queue/RGB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..b253088dd1195bd80fc63e2184ecd9231d02cc14 --- /dev/null +++ b/bak/eval-queue/RGB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 100, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/RGB/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json b/bak/eval-queue/RGB/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..005daf5291c7bd4b370b71fbda580cec7d169b80 --- /dev/null +++ b/bak/eval-queue/RGB/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "LongSafari/hyenadna-large-1m-seqlen-hf", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 47, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/RGB/README.md b/bak/eval-queue/RGB/README.md new file mode 100644 index 0000000000000000000000000000000000000000..32897cd3e640101ba184f8c4ccd896981de3804a --- /dev/null +++ b/bak/eval-queue/RGB/README.md @@ -0,0 +1,3 @@ +--- +license: mit +--- diff --git a/bak/eval-queue/RGB/multimolecule/splicebert_eval_request_False_bfloat16_Original.json b/bak/eval-queue/RGB/multimolecule/splicebert_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..658b35d504d33099cd421e629b53295fd3e20568 --- /dev/null +++ b/bak/eval-queue/RGB/multimolecule/splicebert_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "multimolecule/splicebert", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 19.7, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/RGB/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json b/bak/eval-queue/RGB/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..7f886f5471d1d50db29f073ad7cd8ce2fd20c91e --- /dev/null +++ b/bak/eval-queue/RGB/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "multimolecule/utrbert-4mer", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 86, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/RGB/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json b/bak/eval-queue/RGB/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..c429db5ae1ff77b18ac6e4f8f0c58dcc5f4892a4 --- /dev/null +++ b/bak/eval-queue/RGB/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "yangheng/omnigenome-186M", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 186, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/RGB/yangheng/omnigenome-52M_eval_request_False_bfloat16_Original.json b/bak/eval-queue/RGB/yangheng/omnigenome-52M_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..4076dc7827c8ae12fc87cc4670e462df5f98968a --- /dev/null +++ b/bak/eval-queue/RGB/yangheng/omnigenome-52M_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "yangheng/omnigenome-52M", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 52, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/RGB/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json b/bak/eval-queue/RGB/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..ce39c9bd5427e85973ceadcd79c009dbe2d8dc49 --- /dev/null +++ b/bak/eval-queue/RGB/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "zhihan1996/DNABERT-2-117M", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 117, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/multimolecule/splicebert_eval_request_False_bfloat16_Original.json b/bak/eval-queue/multimolecule/splicebert_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..658b35d504d33099cd421e629b53295fd3e20568 --- /dev/null +++ b/bak/eval-queue/multimolecule/splicebert_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "multimolecule/splicebert", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 19.7, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json b/bak/eval-queue/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..7f886f5471d1d50db29f073ad7cd8ce2fd20c91e --- /dev/null +++ b/bak/eval-queue/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "multimolecule/utrbert-4mer", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 86, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json b/bak/eval-queue/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..c429db5ae1ff77b18ac6e4f8f0c58dcc5f4892a4 --- /dev/null +++ b/bak/eval-queue/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "yangheng/omnigenome-186M", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 186, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/yangheng/omnigenome-52M_eval_request_False_bfloat16_Original.json b/bak/eval-queue/yangheng/omnigenome-52M_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..4076dc7827c8ae12fc87cc4670e462df5f98968a --- /dev/null +++ b/bak/eval-queue/yangheng/omnigenome-52M_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "yangheng/omnigenome-52M", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 52, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-queue/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json b/bak/eval-queue/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..ce39c9bd5427e85973ceadcd79c009dbe2d8dc49 --- /dev/null +++ b/bak/eval-queue/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "zhihan1996/DNABERT-2-117M", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 117, "license": "custom"} \ No newline at end of file diff --git a/bak/eval-results/.gitattributes b/bak/eval-results/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..28df5f900b358436f0267334b3e3e9af33f917ba --- /dev/null +++ b/bak/eval-results/.gitattributes @@ -0,0 +1,55 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.lz4 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +# Audio files - uncompressed +*.pcm filter=lfs diff=lfs merge=lfs -text +*.sam filter=lfs diff=lfs merge=lfs -text +*.raw filter=lfs diff=lfs merge=lfs -text +# Audio files - compressed +*.aac filter=lfs diff=lfs merge=lfs -text +*.flac filter=lfs diff=lfs merge=lfs -text +*.mp3 filter=lfs diff=lfs merge=lfs -text +*.ogg filter=lfs diff=lfs merge=lfs -text +*.wav filter=lfs diff=lfs merge=lfs -text +# Image files - uncompressed +*.bmp filter=lfs diff=lfs merge=lfs -text +*.gif filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.tiff filter=lfs diff=lfs merge=lfs -text +# Image files - compressed +*.jpg filter=lfs diff=lfs merge=lfs -text +*.jpeg filter=lfs diff=lfs merge=lfs -text +*.webp filter=lfs diff=lfs merge=lfs -text diff --git a/bak/eval-results/GB/3UTRBERT.json b/bak/eval-results/GB/3UTRBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..ce357a624ac6d43f25bc2ecdbcc16c11fe9787e3 --- /dev/null +++ b/bak/eval-results/GB/3UTRBERT.json @@ -0,0 +1,48 @@ +{ + "config":{ + "model":"multimolecule/utrbert-4mer", + "model_args":"pretrained=multimolecule/utrbert-4mer,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"86M", "Pretraining Data":"20,362 Sequences", "Species":"Multi-Species", "Nucleic Acid":"mRNA 3'UTR"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/utrbert-4mer", + "model_sha":"main" + }, + "results":{ + "DEM":{ + "F1":0.8950 + }, + "DOW":{ + "F1":0.9022 + }, + "DRE":{ + "F1":0.7435 + }, + "DME":{ + "F1":0.8014 + }, + "HCE":{ + "F1":0.7023 + }, + "HEE":{ + "F1":0.7633 + }, + "HRE":{ + "F1":0.9847 + }, + "HNP":{ + "F1":0.8249 + }, + "HOR":{ + "F1":0.6678 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/GB/Caduceus.json b/bak/eval-results/GB/Caduceus.json new file mode 100644 index 0000000000000000000000000000000000000000..4240022d6b17dd8de28fc9e7dc9c5a1873760630 --- /dev/null +++ b/bak/eval-results/GB/Caduceus.json @@ -0,0 +1,48 @@ +{ + "config":{ + "model":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", + "model_args":"pretrained= kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"7.73M", "Pretraining Data":"35 billion nucleotide base pairs", "Species":"Human", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", + "model_sha":"main" + }, + "results":{ + "DEM":{ + "F1":0.9213 + }, + "DOW":{ + "F1":0.9474 + }, + "DRE":{ + "F1":0.7203 + }, + "DME":{ + "F1":0.7561 + }, + "HCE":{ + "F1":0.7020 + }, + "HEE":{ + "F1":0.7647 + }, + "HRE":{ + "F1":0.7916 + }, + "HNP":{ + "F1":0.8436 + }, + "HOR":{ + "F1":0.6317 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/GB/DNABERT-2-117M.json b/bak/eval-results/GB/DNABERT-2-117M.json new file mode 100644 index 0000000000000000000000000000000000000000..dc6e4754bd33a1a1da61ab44d2c2bbb87dc1b96a --- /dev/null +++ b/bak/eval-results/GB/DNABERT-2-117M.json @@ -0,0 +1,48 @@ +{ + "config":{ + "model":"zhihan1996/DNABERT-2-117M", + "model_args":"pretrained=zhihan1996/DNABERT-2-117M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"BPE", "# of Params":"117M", "Pretraining Data":"32.49B Tokens", "Species":"Human + 135 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"zhihan1996/DNABERT-2-117M", + "model_sha":"main" + }, + "results":{ + "DEM":{ + "F1":0.9267 + }, + "DOW":{ + "F1":0.9517 + }, + "DRE":{ + "F1":0.4377 + }, + "DME":{ + "F1":0.7721 + }, + "HCE":{ + "F1":0.7558 + }, + "HEE":{ + "F1":0.8066 + }, + "HRE":{ + "F1":0.7814 + }, + "HNP":{ + "F1":0.8580 + }, + "HOR":{ + "F1":0.6803 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/GB/HyenaDNA.json b/bak/eval-results/GB/HyenaDNA.json new file mode 100644 index 0000000000000000000000000000000000000000..e16541883eeac9d04b2bff41acbb2800fbb61d17 --- /dev/null +++ b/bak/eval-results/GB/HyenaDNA.json @@ -0,0 +1,48 @@ +{ + "config":{ + "model":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_args":"pretrained=LongSafari/hyenadna-large-1m-seqlen-hf,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"47M", "Pretraining Data":"~3.2B Tokens", "Species":"Human", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_sha":"main" + }, + "results":{ + "DEM":{ + "F1":0.8821 + }, + "DOW":{ + "F1":0.9413 + }, + "DRE":{ + "F1":0.7011 + }, + "DME":{ + "F1":0.7644 + }, + "HCE":{ + "F1":0.7038 + }, + "HEE":{ + "F1":0.7958 + }, + "HRE":{ + "F1":0.9633 + }, + "HNP":{ + "F1":0.8599 + }, + "HOR":{ + "F1":0.6703 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/GB/NT-V2-100M.json b/bak/eval-results/GB/NT-V2-100M.json new file mode 100644 index 0000000000000000000000000000000000000000..04e8c8e3026dee318fba5ff3f68d2b32f1cfe7f7 --- /dev/null +++ b/bak/eval-results/GB/NT-V2-100M.json @@ -0,0 +1,48 @@ +{ + "config":{ + "model":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_args":"pretrained=InstaDeepAI/nucleotide-transformer-v2-100m-multi-species,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"96M", "Pretraining Data":"300B Tokens", "Species":"Human + 850 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_sha":"main" + }, + "results":{ + "DEM":{ + "F1":0.9166 + }, + "DOW":{ + "F1":0.9432 + }, + "DRE":{ + "F1":0.7820 + }, + "DME":{ + "F1":0.8172 + }, + "HCE":{ + "F1":0.7198 + }, + "HEE":{ + "F1":0.7985 + }, + "HRE":{ + "F1":0.9330 + }, + "HNP":{ + "F1":0.8530 + }, + "HOR":{ + "F1":0.6853 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/GB/OmniGenome186M.json b/bak/eval-results/GB/OmniGenome186M.json new file mode 100644 index 0000000000000000000000000000000000000000..4f53aeb07e97ab71bbc8f78e16fa87545fb0c43c --- /dev/null +++ b/bak/eval-results/GB/OmniGenome186M.json @@ -0,0 +1,48 @@ +{ + "config":{ + "model":"yangheng/omnigenome-186M", + "model_args":"pretrained=yangheng/omnigenome-186M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"186M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"}, + "model_dtype":"bfloat16", + "model_name":"yangheng/omnigenome-186M", + "model_sha":"main" + }, + "results":{ + "DEM":{ + "F1":0.9416 + }, + "DOW":{ + "F1":0.9349 + }, + "DRE":{ + "F1":0.7717 + }, + "DME":{ + "F1":0.8034 + }, + "HCE":{ + "F1":0.7351 + }, + "HEE":{ + "F1":0.8223 + }, + "HRE":{ + "F1":0.9566 + }, + "HNP":{ + "F1":0.8787 + }, + "HOR":{ + "F1":0.6897 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/GB/SpliceBERT.json b/bak/eval-results/GB/SpliceBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..ea4b1c7c66306ef5379832c7558d16a550a55561 --- /dev/null +++ b/bak/eval-results/GB/SpliceBERT.json @@ -0,0 +1,48 @@ +{ + "config":{ + "model":"multimolecule/splicebert", + "model_args":"pretrained=multimolecule/splicebert,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"19.7M", "Pretraining Data":"65 billion nucleotides", "Species":"Multi-Species", "Nucleic Acid":"mRNA"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/splicebert", + "model_sha":"main" + }, + "results":{ + "DEM":{ + "F1":0.9472 + }, + "DOW":{ + "F1":0.9642 + }, + "DRE":{ + "F1":0.7229 + }, + "DME":{ + "F1":0.7470 + }, + "HCE":{ + "F1":0.7350 + }, + "HEE":{ + "F1":0.7960 + }, + "HRE":{ + "F1":0.9523 + }, + "HNP":{ + "F1":0.8957 + }, + "HOR":{ + "F1":0.6889 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/GUE/3UTRBERT.json b/bak/eval-results/GUE/3UTRBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..95d23ee36786021be7ec587e5b278036700807fc --- /dev/null +++ b/bak/eval-results/GUE/3UTRBERT.json @@ -0,0 +1,42 @@ +{ + "config":{ + "model":"multimolecule/utrbert-4mer", + "model_args":"pretrained=multimolecule/utrbert-4mer,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"86M", "Pretraining Data":"20,362 Sequences", "Species":"Multi-Species", "Nucleic Acid":"mRNA 3'UTR"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/utrbert-4mer", + "model_sha":"main" + }, + "results":{ + "Yeast EMP":{ + "F1":0.7189 + }, + "Mouse TF-M":{ + "F1":0.7146 + }, + "Virus CVC":{ + "F1":0.6871 + }, + "Human TF-H":{ + "F1":0.7485 + }, + "Human PD":{ + "F1":0.8237 + }, + "Human CPD":{ + "F1":0.9051 + }, + "Human SSP":{ + "F1":0.8195 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/GUE/Caduceus.json b/bak/eval-results/GUE/Caduceus.json new file mode 100644 index 0000000000000000000000000000000000000000..5efcd074500e32eb68a3417305640a6e65ad74bb --- /dev/null +++ b/bak/eval-results/GUE/Caduceus.json @@ -0,0 +1,42 @@ +{ + "config":{ + "model":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", + "model_args":"pretrained= kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"7.73M", "Pretraining Data":"35 billion nucleotide base pairs", "Species":"Human", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", + "model_sha":"main" + }, + "results":{ + "Yeast EMP":{ + "F1":0.7349 + }, + "Mouse TF-M":{ + "F1":0.7818 + }, + "Virus CVC":{ + "F1":0.4909 + }, + "Human TF-H":{ + "F1":0.7956 + }, + "Human PD":{ + "F1":0.8913 + }, + "Human CPD":{ + "F1":0.8509 + }, + "Human SSP":{ + "F1":0.8182 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/GUE/DNABERT-2-117M.json b/bak/eval-results/GUE/DNABERT-2-117M.json new file mode 100644 index 0000000000000000000000000000000000000000..9ec4062de7f854995b73467107515d49ae9c4fb6 --- /dev/null +++ b/bak/eval-results/GUE/DNABERT-2-117M.json @@ -0,0 +1,42 @@ +{ + "config":{ + "model":"zhihan1996/DNABERT-2-117M", + "model_args":"pretrained=zhihan1996/DNABERT-2-117M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"BPE", "# of Params":"117M", "Pretraining Data":"32.49B Tokens", "Species":"Human + 135 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"zhihan1996/DNABERT-2-117M", + "model_sha":"main" + }, + "results":{ + "Yeast EMP":{ + "F1":0.7585 + }, + "Mouse TF-M":{ + "F1":0.8623 + }, + "Virus CVC":{ + "F1":0.6890 + }, + "Human TF-H":{ + "F1":0.8180 + }, + "Human PD":{ + "F1":0.9017 + }, + "Human CPD":{ + "F1":0.8257 + }, + "Human SSP":{ + "F1":0.8521 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/GUE/HyenaDNA.json b/bak/eval-results/GUE/HyenaDNA.json new file mode 100644 index 0000000000000000000000000000000000000000..2e8f09165601c1cccda8918d51e6785afef7cc75 --- /dev/null +++ b/bak/eval-results/GUE/HyenaDNA.json @@ -0,0 +1,42 @@ +{ + "config":{ + "model":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_args":"pretrained=LongSafari/hyenadna-large-1m-seqlen-hf,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"47M", "Pretraining Data":"~3.2B Tokens", "Species":"Human", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_sha":"main" + }, + "results":{ + "Yeast EMP":{ + "F1":0.7308 + }, + "Mouse TF-M":{ + "F1":0.7344 + }, + "Virus CVC":{ + "F1":0.6637 + }, + "Human TF-H":{ + "F1":0.7762 + }, + "Human PD":{ + "F1":0.9119 + }, + "Human CPD":{ + "F1":0.8431 + }, + "Human SSP":{ + "F1":0.8334 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/GUE/NT-V2-100M.json b/bak/eval-results/GUE/NT-V2-100M.json new file mode 100644 index 0000000000000000000000000000000000000000..2b3a39d97cf155bf7888c9cdd54c03c6bddfd8b9 --- /dev/null +++ b/bak/eval-results/GUE/NT-V2-100M.json @@ -0,0 +1,42 @@ +{ + "config":{ + "model":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_args":"pretrained=InstaDeepAI/nucleotide-transformer-v2-100m-multi-species,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"96M", "Pretraining Data":"300B Tokens", "Species":"Human + 850 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_sha":"main" + }, + "results":{ + "Yeast EMP":{ + "F1":0.7493 + }, + "Mouse TF-M":{ + "F1":0.7810 + }, + "Virus CVC":{ + "F1":0.5923 + }, + "Human TF-H":{ + "F1":0.7912 + }, + "Human PD":{ + "F1":0.9087 + }, + "Human CPD":{ + "F1":0.8470 + }, + "Human SSP":{ + "F1":0.8413 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/GUE/OmniGenome186M.json b/bak/eval-results/GUE/OmniGenome186M.json new file mode 100644 index 0000000000000000000000000000000000000000..7a8f30ed4659430d7155800e51113eb113cea5ea --- /dev/null +++ b/bak/eval-results/GUE/OmniGenome186M.json @@ -0,0 +1,42 @@ +{ + "config":{ + "model":"yangheng/omnigenome-186M", + "model_args":"pretrained=yangheng/omnigenome-186M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"186M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"}, + "model_dtype":"bfloat16", + "model_name":"yangheng/omnigenome-186M", + "model_sha":"main" + }, + "results":{ + "Yeast EMP":{ + "F1":0.7851 + }, + "Mouse TF-M":{ + "F1":0.8472 + }, + "Virus CVC":{ + "F1":0.7472 + }, + "Human TF-H":{ + "F1":0.8173 + }, + "Human PD":{ + "F1":0.9004 + }, + "Human CPD":{ + "F1":0.8522 + }, + "Human SSP":{ + "F1":0.9039 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/GUE/SpliceBERT.json b/bak/eval-results/GUE/SpliceBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..e68ed40e24cf3d44c7e36b5e0126543abdc237ac --- /dev/null +++ b/bak/eval-results/GUE/SpliceBERT.json @@ -0,0 +1,42 @@ +{ + "config":{ + "model":"multimolecule/splicebert", + "model_args":"pretrained=multimolecule/splicebert,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"19.7M", "Pretraining Data":"65 billion nucleotides", "Species":"Multi-Species", "Nucleic Acid":"mRNA"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/splicebert", + "model_sha":"main" + }, + "results":{ + "Yeast EMP":{ + "F1":0.7766 + }, + "Mouse TF-M":{ + "F1":0.8497 + }, + "Virus CVC":{ + "F1":0.5624 + }, + "Human TF-H":{ + "F1":0.8277 + }, + "Human PD":{ + "F1":0.9224 + }, + "Human CPD":{ + "F1":0.8396 + }, + "Human SSP":{ + "F1":0.9381 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/PGB/3UTRBERT.json b/bak/eval-results/PGB/3UTRBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..095b1f90d8ed20191ab55b722ed6a4ec48c26e48 --- /dev/null +++ b/bak/eval-results/PGB/3UTRBERT.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"multimolecule/utrbert-4mer", + "model_args":"pretrained=multimolecule/utrbert-4mer,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"86M", "Pretraining Data":"20,362 Sequences", "Species":"Multi-Species", "Nucleic Acid":"mRNA 3'UTR"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/utrbert-4mer", + "model_sha":"main" + }, +"results":{ + "PolyA":{ + "F1":0.7648 + }, + "LncRNA":{ + "F1":0.7075 + }, + "Chrom Acc":{ + "F1":0.6371 + }, + "Prom Str":{ + "RMSE":1.04 + }, + "Term Str":{ + "RMSE":0.36 + }, + "Splice":{ + "F1":0.9444 + }, + "Gene Exp":{ + "RMSE":14.87 + }, + "Enhancer":{ + "F1":0.7167 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/PGB/Agro-NT.json b/bak/eval-results/PGB/Agro-NT.json new file mode 100644 index 0000000000000000000000000000000000000000..9c1cf5acde27b681f67c9ca8e23b5bd3b9c6e63e --- /dev/null +++ b/bak/eval-results/PGB/Agro-NT.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"InstaDeepAI/agro-nucleotide-transformer-1b", + "model_args":"pretrained=InstaDeepAI/agro-nucleotide-transformer-1b,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"985M", "Pretraining Data":"472.5B Tokens", "Species":"48 Edible Plants", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"InstaDeepAI/agro-nucleotide-transformer-1b", + "model_sha":"main" + }, +"results":{ + "PolyA":{ + "F1":0.7889 + }, + "LncRNA":{ + "F1":0.6724 + }, + "Chrom Acc":{ + "F1":0.6327 + }, + "Prom Str":{ + "RMSE":0.94 + }, + "Term Str":{ + "RMSE":0.78 + }, + "Splice":{ + "F1":0.8845 + }, + "Gene Exp":{ + "RMSE":15.56 + }, + "Enhancer":{ + "F1":0.6283 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/PGB/CDSBERT.json b/bak/eval-results/PGB/CDSBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..92365f0f58bc175190a3ff95cbc24d2093494f09 --- /dev/null +++ b/bak/eval-results/PGB/CDSBERT.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"GleghornLab/cdsBERT", + "model_args":"pretrained=GleghornLab/cdsBERT,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"420M", "Pretraining Data":"4M Sequences", "Species":"4,069 RNA families", "Nucleic Acid":"CDS"}, + "model_dtype":"bfloat16", + "model_name":"GleghornLab/cdsBERT", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.3972 + }, + "LncRNA":{ + "F1":0.3306 + }, + "Chrom Acc":{ + "F1":0.4895 + }, + "Prom Str":{ + "RMSE":2.19 + }, + "Term Str":{ + "RMSE":0.59 + }, + "Splice":{ + "F1":0.5220 + }, + "Gene Exp":{ + "RMSE":14.77 + }, + "Enhancer":{ + "F1":0.3393 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/PGB/Caduceus.json b/bak/eval-results/PGB/Caduceus.json new file mode 100644 index 0000000000000000000000000000000000000000..80cfc4a525127aff7dbc5d0a71224d432319bbc6 --- /dev/null +++ b/bak/eval-results/PGB/Caduceus.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", + "model_args":"pretrained= kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"7.73M", "Pretraining Data":"35 billion nucleotide base pairs", "Species":"Human", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.7089 + }, + "LncRNA":{ + "F1":0.6840 + }, + "Chrom Acc":{ + "F1":0.6453 + }, + "Prom Str":{ + "RMSE":0.91 + }, + "Term Str":{ + "RMSE":0.26 + }, + "Splice":{ + "F1":0.7951 + }, + "Gene Exp":{ + "RMSE":14.72 + }, + "Enhancer":{ + "F1":0.6083 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/PGB/DNABERT-2-117M.json b/bak/eval-results/PGB/DNABERT-2-117M.json new file mode 100644 index 0000000000000000000000000000000000000000..3906893d2ce05c38ed29efe43ba07f466bdc0e58 --- /dev/null +++ b/bak/eval-results/PGB/DNABERT-2-117M.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"zhihan1996/DNABERT-2-117M", + "model_args":"pretrained=zhihan1996/DNABERT-2-117M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"BPE", "# of Params":"117M", "Pretraining Data":"32.49B Tokens", "Species":"Human + 135 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"zhihan1996/DNABERT-2-117M", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.4135 + }, + "LncRNA":{ + "F1":0.7255 + }, + "Chrom Acc":{ + "F1":0.6149 + }, + "Prom Str":{ + "RMSE":0.99 + }, + "Term Str":{ + "RMSE":0.24 + }, + "Splice":{ + "F1":0.4534 + }, + "Gene Exp":{ + "RMSE":14.78 + }, + "Enhancer":{ + "F1":0.3640 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/PGB/HyenaDNA.json b/bak/eval-results/PGB/HyenaDNA.json new file mode 100644 index 0000000000000000000000000000000000000000..af033dd0115d50a3bdebed9eb98038b3850d3c41 --- /dev/null +++ b/bak/eval-results/PGB/HyenaDNA.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_args":"pretrained=LongSafari/hyenadna-large-1m-seqlen-hf,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"47M", "Pretraining Data":"~3.2B Tokens", "Species":"Human", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.8311 + }, + "LncRNA":{ + "F1":0.5821 + }, + "Chrom Acc":{ + "F1":0.5220 + }, + "Prom Str":{ + "RMSE":0.88 + }, + "Term Str":{ + "RMSE":0.26 + }, + "Splice":{ + "F1":0.9028 + }, + "Gene Exp":{ + "RMSE":14.76 + }, + "Enhancer":{ + "F1":0.6617 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/PGB/NT-V2-100M.json b/bak/eval-results/PGB/NT-V2-100M.json new file mode 100644 index 0000000000000000000000000000000000000000..9dbcaf073a15ce2d3e1c89b16d7b65d074e97a89 --- /dev/null +++ b/bak/eval-results/PGB/NT-V2-100M.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_args":"pretrained=InstaDeepAI/nucleotide-transformer-v2-100m-multi-species,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"96M", "Pretraining Data":"300B Tokens", "Species":"Human + 850 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.7126 + }, + "LncRNA":{ + "F1":0.7308 + }, + "Chrom Acc":{ + "F1":0.6571 + }, + "Prom Str":{ + "RMSE":0.81 + }, + "Term Str":{ + "RMSE":0.27 + }, + "Splice":{ + "F1":0.9505 + }, + "Gene Exp":{ + "RMSE":14.69 + }, + "Enhancer":{ + "F1":0.7389 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/PGB/OmniGenome186M.json b/bak/eval-results/PGB/OmniGenome186M.json new file mode 100644 index 0000000000000000000000000000000000000000..06b0c51eead662453dcfb7fe0c41ae7a8297046b --- /dev/null +++ b/bak/eval-results/PGB/OmniGenome186M.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"yangheng/omnigenome-186M", + "model_args":"pretrained=yangheng/omnigenome-186M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"186M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"}, + "model_dtype":"bfloat16", + "model_name":"yangheng/omnigenome-186M", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.8755 + }, + "LncRNA":{ + "F1":0.7796 + }, + "Chrom Acc":{ + "F1":0.6769 + }, + "Prom Str":{ + "RMSE":0.59 + }, + "Term Str":{ + "RMSE":0.18 + }, + "Splice":{ + "F1":0.9841 + }, + "Gene Exp":{ + "RMSE":14.71 + }, + "Enhancer":{ + "F1":0.7977 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/PGB/RNA-BERT.json b/bak/eval-results/PGB/RNA-BERT.json new file mode 100644 index 0000000000000000000000000000000000000000..dc87b9183fd0d44c57fc5f39596f9670be1d7b88 --- /dev/null +++ b/bak/eval-results/PGB/RNA-BERT.json @@ -0,0 +1,46 @@ +{ + "config":{ + "model":"multimolecule/rnabert", + "model_args":"pretrained=multimolecule/rnabert,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"0.48M", "Pretraining Data":"76,237 human ncRNA sequences", "Species":"Human", "Nucleic Acid":"ncRNA"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/rnabert", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.7854 + }, + "LncRNA":{ + "F1":0.6199 + }, + "Chrom Acc":{ + "F1":0.4894 + }, + "Prom Str":{ + "RMSE":1.81 + }, + "Term Str":{ + "RMSE":0.38 + }, + "Splice":{ + "F1":0.9445 + }, + "Gene Exp":{ + "RMSE":14.89 + }, + "Enhancer":{ + "F1":0.5761 + } + + } +} \ No newline at end of file diff --git a/bak/eval-results/PGB/RNA-FM.json b/bak/eval-results/PGB/RNA-FM.json new file mode 100644 index 0000000000000000000000000000000000000000..73cc0c1f5125e649358e73058bb3310733f164d3 --- /dev/null +++ b/bak/eval-results/PGB/RNA-FM.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"multimolecule/rnafm", + "model_args":"pretrained=multimolecule/rnafm,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"99.52M", "Pretraining Data":"23.7 million non-redundant RNA sequences", "Species":"Multi-Species", "Nucleic Acid":"ncRNA"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/rnafm", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.8494 + }, + "LncRNA":{ + "F1":0.6875 + }, + "Chrom Acc":{ + "F1":0.5492 + }, + "Prom Str":{ + "RMSE":0.95 + }, + "Term Str":{ + "RMSE":0.27 + }, + "Splice":{ + "F1":0.9595 + }, + "Gene Exp":{ + "RMSE":14.83 + }, + "Enhancer":{ + "F1":0.5714 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/PGB/RNA-MSM.json b/bak/eval-results/PGB/RNA-MSM.json new file mode 100644 index 0000000000000000000000000000000000000000..282ad3a11ed1f200ccd93c78fcd961d94c4364de --- /dev/null +++ b/bak/eval-results/PGB/RNA-MSM.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"multimolecule/rnamsm", + "model_args":"pretrained=multimolecule/rnamsm,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"96.5M", "Pretraining Data":"3,932 RNA families", "Species":"Multi-Species", "Nucleic Acid":"RNA"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/rnamsm", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.8425 + }, + "LncRNA":{ + "F1":0.6749 + }, + "Chrom Acc":{ + "F1":0.5352 + }, + "Prom Str":{ + "RMSE":1.28 + }, + "Term Str":{ + "RMSE":0.28 + }, + "Splice":{ + "F1":0.9549 + }, + "Gene Exp":{ + "RMSE":14.87 + }, + "Enhancer":{ + "F1":0.6145 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/PGB/SpliceBERT.json b/bak/eval-results/PGB/SpliceBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..f52106504f5a96e28e24a29e9ef7019115a7f8ab --- /dev/null +++ b/bak/eval-results/PGB/SpliceBERT.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"multimolecule/splicebert", + "model_args":"pretrained=multimolecule/splicebert,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"19.7M", "Pretraining Data":"65 billion nucleotides", "Species":"Multi-Species", "Nucleic Acid":"mRNA"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/splicebert", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.6523 + }, + "LncRNA":{ + "F1":0.7188 + }, + "Chrom Acc":{ + "F1":0.6362 + }, + "Prom Str":{ + "RMSE":0.75 + }, + "Term Str":{ + "RMSE":0.22 + }, + "Splice":{ + "F1":0.9645 + }, + "Gene Exp":{ + "RMSE":14.70 + }, + "Enhancer":{ + "F1":0.6971 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/README.md b/bak/eval-results/README.md new file mode 100644 index 0000000000000000000000000000000000000000..32897cd3e640101ba184f8c4ccd896981de3804a --- /dev/null +++ b/bak/eval-results/README.md @@ -0,0 +1,3 @@ +--- +license: mit +--- diff --git a/bak/eval-results/RGB/yangheng/3UTRBERT.json b/bak/eval-results/RGB/yangheng/3UTRBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..1adec75827e33722413afddd29d504c61349c815 --- /dev/null +++ b/bak/eval-results/RGB/yangheng/3UTRBERT.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"multimolecule/utrbert-4mer", + "model_args":"pretrained=multimolecule/utrbert-4mer,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"86M", "Pretraining Data":"20,362 Sequences", "Species":"Multi-Species", "Nucleic Acid":"mRNA 3'UTR"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/utrbert-4mer", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7772 + }, + "SNMD":{ + "AUC":0.5002 + }, + "SNMR":{ + "F1":0.2401 + }, + "ArchiveII":{ + "F1":0.7898 + }, + "bpRNA":{ + "F1":0.5693 + }, + "RNAStralign":{ + "F1":0.9203 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/RGB/yangheng/Agro-NT.json b/bak/eval-results/RGB/yangheng/Agro-NT.json new file mode 100644 index 0000000000000000000000000000000000000000..846f30ec63c16f794989b0db1a154c59e1f3697c --- /dev/null +++ b/bak/eval-results/RGB/yangheng/Agro-NT.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"InstaDeepAI/agro-nucleotide-transformer-1b", + "model_args":"pretrained=InstaDeepAI/agro-nucleotide-transformer-1b,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"985M", "Pretraining Data":"472.5B Tokens", "Species":"48 Edible Plants", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"InstaDeepAI/agro-nucleotide-transformer-1b", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7830 + }, + "SNMD":{ + "AUC":0.4999 + }, + "SNMR":{ + "F1":0.2638 + }, + "ArchiveII":{ + "F1":0.7013 + }, + "bpRNA":{ + "F1":0.4871 + }, + "RNAStralign":{ + "F1":0.7521 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/RGB/yangheng/CDSBERT.json b/bak/eval-results/RGB/yangheng/CDSBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..3c4b2d0bacaf8c5ada6c773d2f2fbf5ed1b05fa4 --- /dev/null +++ b/bak/eval-results/RGB/yangheng/CDSBERT.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"GleghornLab/cdsBERT", + "model_args":"pretrained=GleghornLab/cdsBERT,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"420M", "Pretraining Data":"4M Sequences", "Species":"4,069 RNA families", "Nucleic Acid":"CDS"}, + "model_dtype":"bfloat16", + "model_name":"GleghornLab/cdsBERT", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7468 + }, + "SNMD":{ + "AUC":0.5503 + }, + "SNMR":{ + "F1":0.3616 + }, + "ArchiveII":{ + "F1":0.8934 + }, + "bpRNA":{ + "F1":0.7001 + }, + "RNAStralign":{ + "F1":0.9715 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/RGB/yangheng/DNABERT-2-117M.json b/bak/eval-results/RGB/yangheng/DNABERT-2-117M.json new file mode 100644 index 0000000000000000000000000000000000000000..d5420bb518b4cc9bbfaddbc533c30c0d9f49bb06 --- /dev/null +++ b/bak/eval-results/RGB/yangheng/DNABERT-2-117M.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"zhihan1996/DNABERT-2-117M", + "model_args":"pretrained=zhihan1996/DNABERT-2-117M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"BPE", "# of Params":"117M", "Pretraining Data":"32.49B Tokens", "Species":"Human + 135 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"zhihan1996/DNABERT-2-117M", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.8158 + }, + "SNMD":{ + "AUC":0.4994 + }, + "SNMR":{ + "F1":0.1586 + }, + "ArchiveII":{ + "F1":0.5982 + }, + "bpRNA":{ + "F1":0.4340 + }, + "RNAStralign":{ + "F1":0.6549 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/RGB/yangheng/HyenaDNA.json b/bak/eval-results/RGB/yangheng/HyenaDNA.json new file mode 100644 index 0000000000000000000000000000000000000000..64a6a4fc545031813e11311068ee7011518c6410 --- /dev/null +++ b/bak/eval-results/RGB/yangheng/HyenaDNA.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_args":"pretrained=LongSafari/hyenadna-large-1m-seqlen-hf,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"47M", "Pretraining Data":"~3.2B Tokens", "Species":"Human", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.8056 + }, + "SNMD":{ + "AUC":0.5332 + }, + "SNMR":{ + "F1":0.3980 + }, + "ArchiveII":{ + "F1":0.8423 + }, + "bpRNA":{ + "F1":0.5662 + }, + "RNAStralign":{ + "F1":0.9542 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/RGB/yangheng/NT-V2-100M.json b/bak/eval-results/RGB/yangheng/NT-V2-100M.json new file mode 100644 index 0000000000000000000000000000000000000000..f22a639787974b5149a8cd30360c249cf9409922 --- /dev/null +++ b/bak/eval-results/RGB/yangheng/NT-V2-100M.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_args":"pretrained=InstaDeepAI/nucleotide-transformer-v2-100m-multi-species,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"96M", "Pretraining Data":"300B Tokens", "Species":"Human + 850 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7826 + }, + "SNMD":{ + "AUC":0.5049 + }, + "SNMR":{ + "F1":0.2601 + }, + "ArchiveII":{ + "F1":0.7990 + }, + "bpRNA":{ + "F1":0.5660 + }, + "RNAStralign":{ + "F1":0.9084 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/RGB/yangheng/OmniGenome186M.json b/bak/eval-results/RGB/yangheng/OmniGenome186M.json new file mode 100644 index 0000000000000000000000000000000000000000..ec7663dc48867a7bc15c972ae4c11fba5fdb5e37 --- /dev/null +++ b/bak/eval-results/RGB/yangheng/OmniGenome186M.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"yangheng/omnigenome-186M", + "model_args":"pretrained=yangheng/omnigenome-186M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"186M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"}, + "model_dtype":"bfloat16", + "model_name":"yangheng/omnigenome-186M", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7164 + }, + "SNMD":{ + "AUC":0.6381 + }, + "SNMR":{ + "F1":0.4980 + }, + "ArchiveII":{ + "F1":0.9520 + }, + "bpRNA":{ + "F1":0.8248 + }, + "RNAStralign":{ + "F1":0.9912 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/RGB/yangheng/SpliceBERT.json b/bak/eval-results/RGB/yangheng/SpliceBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..8ce43c5282635c6aabfd4a5e3b31435f198451af --- /dev/null +++ b/bak/eval-results/RGB/yangheng/SpliceBERT.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"multimolecule/splicebert", + "model_args":"pretrained=multimolecule/splicebert,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"19.7M", "Pretraining Data":"65 billion nucleotides", "Species":"Multi-Species", "Nucleic Acid":"mRNA"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/splicebert", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7340 + }, + "SNMD":{ + "AUC":0.5811 + }, + "SNMR":{ + "F1":0.4644 + }, + "ArchiveII":{ + "F1":0.8905 + }, + "bpRNA":{ + "F1":0.6910 + }, + "RNAStralign":{ + "F1":0.9697 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/RGB/yangheng/results_OmniGenome-52M.json b/bak/eval-results/RGB/yangheng/results_OmniGenome-52M.json new file mode 100644 index 0000000000000000000000000000000000000000..c786546cc3bf47cc0eb0b208341a403f97563f90 --- /dev/null +++ b/bak/eval-results/RGB/yangheng/results_OmniGenome-52M.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"yangheng/omnigenome-52M", + "model_args":"pretrained=yangheng/omnigenome-52M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"52M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"}, + "model_dtype":"bfloat16", + "model_name":"yangheng/omnigenome-52M", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7191 + }, + "SNMD":{ + "AUC":0.6244 + }, + "SNMR":{ + "F1":0.4891 + }, + "ArchiveII":{ + "F1":0.9498 + }, + "bpRNA":{ + "F1":0.8234 + }, + "RNAStralign":{ + "F1":0.9901 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/yangheng/MODEL/3UTRBERT.json b/bak/eval-results/yangheng/MODEL/3UTRBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..1adec75827e33722413afddd29d504c61349c815 --- /dev/null +++ b/bak/eval-results/yangheng/MODEL/3UTRBERT.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"multimolecule/utrbert-4mer", + "model_args":"pretrained=multimolecule/utrbert-4mer,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"86M", "Pretraining Data":"20,362 Sequences", "Species":"Multi-Species", "Nucleic Acid":"mRNA 3'UTR"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/utrbert-4mer", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7772 + }, + "SNMD":{ + "AUC":0.5002 + }, + "SNMR":{ + "F1":0.2401 + }, + "ArchiveII":{ + "F1":0.7898 + }, + "bpRNA":{ + "F1":0.5693 + }, + "RNAStralign":{ + "F1":0.9203 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/yangheng/MODEL/Agro-NT.json b/bak/eval-results/yangheng/MODEL/Agro-NT.json new file mode 100644 index 0000000000000000000000000000000000000000..846f30ec63c16f794989b0db1a154c59e1f3697c --- /dev/null +++ b/bak/eval-results/yangheng/MODEL/Agro-NT.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"InstaDeepAI/agro-nucleotide-transformer-1b", + "model_args":"pretrained=InstaDeepAI/agro-nucleotide-transformer-1b,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"985M", "Pretraining Data":"472.5B Tokens", "Species":"48 Edible Plants", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"InstaDeepAI/agro-nucleotide-transformer-1b", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7830 + }, + "SNMD":{ + "AUC":0.4999 + }, + "SNMR":{ + "F1":0.2638 + }, + "ArchiveII":{ + "F1":0.7013 + }, + "bpRNA":{ + "F1":0.4871 + }, + "RNAStralign":{ + "F1":0.7521 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/yangheng/MODEL/CDSBERT.json b/bak/eval-results/yangheng/MODEL/CDSBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..3c4b2d0bacaf8c5ada6c773d2f2fbf5ed1b05fa4 --- /dev/null +++ b/bak/eval-results/yangheng/MODEL/CDSBERT.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"GleghornLab/cdsBERT", + "model_args":"pretrained=GleghornLab/cdsBERT,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"420M", "Pretraining Data":"4M Sequences", "Species":"4,069 RNA families", "Nucleic Acid":"CDS"}, + "model_dtype":"bfloat16", + "model_name":"GleghornLab/cdsBERT", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7468 + }, + "SNMD":{ + "AUC":0.5503 + }, + "SNMR":{ + "F1":0.3616 + }, + "ArchiveII":{ + "F1":0.8934 + }, + "bpRNA":{ + "F1":0.7001 + }, + "RNAStralign":{ + "F1":0.9715 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/yangheng/MODEL/DNABERT-2-117M.json b/bak/eval-results/yangheng/MODEL/DNABERT-2-117M.json new file mode 100644 index 0000000000000000000000000000000000000000..d5420bb518b4cc9bbfaddbc533c30c0d9f49bb06 --- /dev/null +++ b/bak/eval-results/yangheng/MODEL/DNABERT-2-117M.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"zhihan1996/DNABERT-2-117M", + "model_args":"pretrained=zhihan1996/DNABERT-2-117M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"BPE", "# of Params":"117M", "Pretraining Data":"32.49B Tokens", "Species":"Human + 135 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"zhihan1996/DNABERT-2-117M", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.8158 + }, + "SNMD":{ + "AUC":0.4994 + }, + "SNMR":{ + "F1":0.1586 + }, + "ArchiveII":{ + "F1":0.5982 + }, + "bpRNA":{ + "F1":0.4340 + }, + "RNAStralign":{ + "F1":0.6549 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/yangheng/MODEL/HyenaDNA.json b/bak/eval-results/yangheng/MODEL/HyenaDNA.json new file mode 100644 index 0000000000000000000000000000000000000000..64a6a4fc545031813e11311068ee7011518c6410 --- /dev/null +++ b/bak/eval-results/yangheng/MODEL/HyenaDNA.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_args":"pretrained=LongSafari/hyenadna-large-1m-seqlen-hf,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"47M", "Pretraining Data":"~3.2B Tokens", "Species":"Human", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.8056 + }, + "SNMD":{ + "AUC":0.5332 + }, + "SNMR":{ + "F1":0.3980 + }, + "ArchiveII":{ + "F1":0.8423 + }, + "bpRNA":{ + "F1":0.5662 + }, + "RNAStralign":{ + "F1":0.9542 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/yangheng/MODEL/NT-V2-100M.json b/bak/eval-results/yangheng/MODEL/NT-V2-100M.json new file mode 100644 index 0000000000000000000000000000000000000000..f22a639787974b5149a8cd30360c249cf9409922 --- /dev/null +++ b/bak/eval-results/yangheng/MODEL/NT-V2-100M.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_args":"pretrained=InstaDeepAI/nucleotide-transformer-v2-100m-multi-species,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"96M", "Pretraining Data":"300B Tokens", "Species":"Human + 850 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7826 + }, + "SNMD":{ + "AUC":0.5049 + }, + "SNMR":{ + "F1":0.2601 + }, + "ArchiveII":{ + "F1":0.7990 + }, + "bpRNA":{ + "F1":0.5660 + }, + "RNAStralign":{ + "F1":0.9084 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/yangheng/MODEL/OmniGenome186M.json b/bak/eval-results/yangheng/MODEL/OmniGenome186M.json new file mode 100644 index 0000000000000000000000000000000000000000..ec7663dc48867a7bc15c972ae4c11fba5fdb5e37 --- /dev/null +++ b/bak/eval-results/yangheng/MODEL/OmniGenome186M.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"yangheng/omnigenome-186M", + "model_args":"pretrained=yangheng/omnigenome-186M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"186M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"}, + "model_dtype":"bfloat16", + "model_name":"yangheng/omnigenome-186M", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7164 + }, + "SNMD":{ + "AUC":0.6381 + }, + "SNMR":{ + "F1":0.4980 + }, + "ArchiveII":{ + "F1":0.9520 + }, + "bpRNA":{ + "F1":0.8248 + }, + "RNAStralign":{ + "F1":0.9912 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/yangheng/MODEL/SpliceBERT.json b/bak/eval-results/yangheng/MODEL/SpliceBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..8ce43c5282635c6aabfd4a5e3b31435f198451af --- /dev/null +++ b/bak/eval-results/yangheng/MODEL/SpliceBERT.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"multimolecule/splicebert", + "model_args":"pretrained=multimolecule/splicebert,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"19.7M", "Pretraining Data":"65 billion nucleotides", "Species":"Multi-Species", "Nucleic Acid":"mRNA"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/splicebert", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7340 + }, + "SNMD":{ + "AUC":0.5811 + }, + "SNMR":{ + "F1":0.4644 + }, + "ArchiveII":{ + "F1":0.8905 + }, + "bpRNA":{ + "F1":0.6910 + }, + "RNAStralign":{ + "F1":0.9697 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/yangheng/MODEL/results_OmniGenome-52M.json b/bak/eval-results/yangheng/MODEL/results_OmniGenome-52M.json new file mode 100644 index 0000000000000000000000000000000000000000..c786546cc3bf47cc0eb0b208341a403f97563f90 --- /dev/null +++ b/bak/eval-results/yangheng/MODEL/results_OmniGenome-52M.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"yangheng/omnigenome-52M", + "model_args":"pretrained=yangheng/omnigenome-52M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"52M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"}, + "model_dtype":"bfloat16", + "model_name":"yangheng/omnigenome-52M", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7191 + }, + "SNMD":{ + "AUC":0.6244 + }, + "SNMR":{ + "F1":0.4891 + }, + "ArchiveII":{ + "F1":0.9498 + }, + "bpRNA":{ + "F1":0.8234 + }, + "RNAStralign":{ + "F1":0.9901 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/yangheng/OmniGenome/OmniGenome186MResults b/bak/eval-results/yangheng/OmniGenome/OmniGenome186MResults new file mode 100644 index 0000000000000000000000000000000000000000..2b994e7353ee07a544096f72681d9dfdc2dc8361 --- /dev/null +++ b/bak/eval-results/yangheng/OmniGenome/OmniGenome186MResults @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"omnigenome-186M", + "model_args":"pretrained=yangheng/OmniGenome-186M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"186M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"}, + "model_dtype":"bfloat16", + "model_name":"OmniGenome-52M", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7164 + }, + "SNMD":{ + "AUC":63.81 + }, + "SNMR":{ + "F1":49.80 + }, + "ArchiveII":{ + "F1":95.20 + }, + "bpRNA":{ + "F1":82.48 + }, + "RNAStralign":{ + "F1":99.12 + } + } +} \ No newline at end of file diff --git a/bak/eval-results/yangheng/OmniGenome/OmniGenomeResults.json b/bak/eval-results/yangheng/OmniGenome/OmniGenomeResults.json new file mode 100644 index 0000000000000000000000000000000000000000..fc9145f04a604469be222e27404786d6aae6e7c7 --- /dev/null +++ b/bak/eval-results/yangheng/OmniGenome/OmniGenomeResults.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"omnigenome-52M", + "model_args":"pretrained=yangheng/OmniGenome-52M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"52M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"}, + "model_dtype":"bfloat16", + "model_name":"OmniGenome-52M", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7191 + }, + "SNMD":{ + "AUC":0.6244 + }, + "SNMR":{ + "F1":0.4891 + }, + "ArchiveII":{ + "F1":0.9498 + }, + "bpRNA":{ + "F1":0.8234 + }, + "RNAStralign":{ + "F1":0.9901 + } + } +} \ No newline at end of file diff --git a/benchmarks/BEACON.zip b/benchmarks/BEACON.zip new file mode 100644 index 0000000000000000000000000000000000000000..d16051ee647889adea3b97aca963cf6b6fe5530e --- /dev/null +++ b/benchmarks/BEACON.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d9361286a93bfb3178d6c14b29dc2f95cbeb903f166d485b8c23b6347b1653f +size 50173667 diff --git a/benchmarks/GB.zip b/benchmarks/GB.zip new file mode 100644 index 0000000000000000000000000000000000000000..49ef2aca1b6e23065faae0aa28a904617c1345da --- /dev/null +++ b/benchmarks/GB.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6335335f544cc4a3b8bddffa446130ab6dfb9ab455c0bc2340b2487e40c1a40 +size 85312423 diff --git a/benchmarks/GUE.zip b/benchmarks/GUE.zip new file mode 100644 index 0000000000000000000000000000000000000000..c2c6c96d3ebc5280945beb73042097b55d7c7669 --- /dev/null +++ b/benchmarks/GUE.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85710e9285725d4b05d14780a8d2df9f59301145a932257bce4fcc5c1329da27 +size 77623068 diff --git a/benchmarks/PGB.zip b/benchmarks/PGB.zip new file mode 100644 index 0000000000000000000000000000000000000000..4d8c6aca6299f3ab8a83522f9a4bd79dc599218d --- /dev/null +++ b/benchmarks/PGB.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:818bd2e26027195d9aea19fca943403f84cfbccfb3dbb245901e352b25aa774b +size 123544404 diff --git a/benchmarks/RGB.zip b/benchmarks/RGB.zip new file mode 100644 index 0000000000000000000000000000000000000000..dd5be642d7ca60294cf8fd03f081945f59328507 --- /dev/null +++ b/benchmarks/RGB.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c0909498826104ba22123f211b6a2badac0b3fdc65e4e4060eb774fc971e7d0 +size 34252498 diff --git a/benchmarks_info.json b/benchmarks_info.json new file mode 100644 index 0000000000000000000000000000000000000000..9903506acec9cf5c130f07d05474f51c86d39262 --- /dev/null +++ b/benchmarks_info.json @@ -0,0 +1,52 @@ +{ + "RGB": { + "filename": "RGB.zip", + "genome": "RNA", + "species": "multi-species", + "task number": "", + "task type": "classification, regression", + "url": "", + "author": "", + "license": "" + }, + "PGB": { + "filename": "PGB.zip", + "genome": "DNA", + "species": "multi-species", + "task number": "", + "task type": "classification, regression", + "url": "", + "author": "", + "license": "" + }, + "PGB_Full": { + "filename": "PGB_Full.zip", + "genome": "DNA", + "species": "multi-species", + "task number": "", + "task type": "classification, regression", + "url": "", + "author": "", + "license": "" + }, + "GUE": { + "filename": "GUE.zip", + "genome": "DNA", + "species": "multi-species", + "task number": "", + "task type": "classification", + "url": "", + "author": "", + "license": "" + }, + "GB": { + "filename": "GB.zip", + "genome": "DNA", + "species": "multi-species", + "task number": "", + "task type": "classification", + "url": "", + "author": "", + "license": "" + } +} diff --git a/eval-queue/.gitattributes b/eval-queue/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..28df5f900b358436f0267334b3e3e9af33f917ba --- /dev/null +++ b/eval-queue/.gitattributes @@ -0,0 +1,55 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.lz4 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +# Audio files - uncompressed +*.pcm filter=lfs diff=lfs merge=lfs -text +*.sam filter=lfs diff=lfs merge=lfs -text +*.raw filter=lfs diff=lfs merge=lfs -text +# Audio files - compressed +*.aac filter=lfs diff=lfs merge=lfs -text +*.flac filter=lfs diff=lfs merge=lfs -text +*.mp3 filter=lfs diff=lfs merge=lfs -text +*.ogg filter=lfs diff=lfs merge=lfs -text +*.wav filter=lfs diff=lfs merge=lfs -text +# Image files - uncompressed +*.bmp filter=lfs diff=lfs merge=lfs -text +*.gif filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.tiff filter=lfs diff=lfs merge=lfs -text +# Image files - compressed +*.jpg filter=lfs diff=lfs merge=lfs -text +*.jpeg filter=lfs diff=lfs merge=lfs -text +*.webp filter=lfs diff=lfs merge=lfs -text diff --git a/eval-queue/GB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json b/eval-queue/GB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..2471a5c9364821d4d97616d03ed65047aaac92fc --- /dev/null +++ b/eval-queue/GB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 96, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/GB/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json b/eval-queue/GB/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..005daf5291c7bd4b370b71fbda580cec7d169b80 --- /dev/null +++ b/eval-queue/GB/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "LongSafari/hyenadna-large-1m-seqlen-hf", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 47, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/GB/README.md b/eval-queue/GB/README.md new file mode 100644 index 0000000000000000000000000000000000000000..32897cd3e640101ba184f8c4ccd896981de3804a --- /dev/null +++ b/eval-queue/GB/README.md @@ -0,0 +1,3 @@ +--- +license: mit +--- diff --git a/eval-queue/GB/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json b/eval-queue/GB/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..0ea7d38471486d3d8f8e0df988dfd653e456f51a --- /dev/null +++ b/eval-queue/GB/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 7.73, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/GB/multimolecule/splicebert_eval_request_False_bfloat16_Original.json b/eval-queue/GB/multimolecule/splicebert_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..658b35d504d33099cd421e629b53295fd3e20568 --- /dev/null +++ b/eval-queue/GB/multimolecule/splicebert_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "multimolecule/splicebert", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 19.7, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/GB/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json b/eval-queue/GB/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..7f886f5471d1d50db29f073ad7cd8ce2fd20c91e --- /dev/null +++ b/eval-queue/GB/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "multimolecule/utrbert-4mer", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 86, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/GB/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json b/eval-queue/GB/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..c429db5ae1ff77b18ac6e4f8f0c58dcc5f4892a4 --- /dev/null +++ b/eval-queue/GB/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "yangheng/omnigenome-186M", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 186, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/GB/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json b/eval-queue/GB/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..ce39c9bd5427e85973ceadcd79c009dbe2d8dc49 --- /dev/null +++ b/eval-queue/GB/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "zhihan1996/DNABERT-2-117M", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 117, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/GUE/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json b/eval-queue/GUE/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..2471a5c9364821d4d97616d03ed65047aaac92fc --- /dev/null +++ b/eval-queue/GUE/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 96, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/GUE/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json b/eval-queue/GUE/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..005daf5291c7bd4b370b71fbda580cec7d169b80 --- /dev/null +++ b/eval-queue/GUE/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "LongSafari/hyenadna-large-1m-seqlen-hf", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 47, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/GUE/README.md b/eval-queue/GUE/README.md new file mode 100644 index 0000000000000000000000000000000000000000..32897cd3e640101ba184f8c4ccd896981de3804a --- /dev/null +++ b/eval-queue/GUE/README.md @@ -0,0 +1,3 @@ +--- +license: mit +--- diff --git a/eval-queue/GUE/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json b/eval-queue/GUE/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..0ea7d38471486d3d8f8e0df988dfd653e456f51a --- /dev/null +++ b/eval-queue/GUE/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 7.73, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/GUE/multimolecule/splicebert_eval_request_False_bfloat16_Original.json b/eval-queue/GUE/multimolecule/splicebert_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..658b35d504d33099cd421e629b53295fd3e20568 --- /dev/null +++ b/eval-queue/GUE/multimolecule/splicebert_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "multimolecule/splicebert", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 19.7, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/GUE/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json b/eval-queue/GUE/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..7f886f5471d1d50db29f073ad7cd8ce2fd20c91e --- /dev/null +++ b/eval-queue/GUE/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "multimolecule/utrbert-4mer", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 86, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/GUE/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json b/eval-queue/GUE/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..c429db5ae1ff77b18ac6e4f8f0c58dcc5f4892a4 --- /dev/null +++ b/eval-queue/GUE/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "yangheng/omnigenome-186M", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 186, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/GUE/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json b/eval-queue/GUE/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..ce39c9bd5427e85973ceadcd79c009dbe2d8dc49 --- /dev/null +++ b/eval-queue/GUE/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "zhihan1996/DNABERT-2-117M", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 117, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/GleghornLab/cdsBERT_eval_request_False_bfloat16_Original.json b/eval-queue/GleghornLab/cdsBERT_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..576be2fb71b8a4784aabcba74c89753ce508b229 --- /dev/null +++ b/eval-queue/GleghornLab/cdsBERT_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "GleghornLab/cdsBERT", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 420, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/InstaDeepAI/agro-nucleotide-transformer-1b_eval_request_False_bfloat16_Original.json b/eval-queue/InstaDeepAI/agro-nucleotide-transformer-1b_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..5082721283bbd97ebdb90d70d72fe7448cad8087 --- /dev/null +++ b/eval-queue/InstaDeepAI/agro-nucleotide-transformer-1b_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "InstaDeepAI/agro-nucleotide-transformer-1b", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 985, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json b/eval-queue/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..005daf5291c7bd4b370b71fbda580cec7d169b80 --- /dev/null +++ b/eval-queue/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "LongSafari/hyenadna-large-1m-seqlen-hf", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 47, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/PGB/GleghornLab/cdsBERT_eval_request_False_bfloat16_Original.json b/eval-queue/PGB/GleghornLab/cdsBERT_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..576be2fb71b8a4784aabcba74c89753ce508b229 --- /dev/null +++ b/eval-queue/PGB/GleghornLab/cdsBERT_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "GleghornLab/cdsBERT", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 420, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/PGB/InstaDeepAI/agro-nucleotide-transformer-1b_eval_request_False_bfloat16_Original.json b/eval-queue/PGB/InstaDeepAI/agro-nucleotide-transformer-1b_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..5082721283bbd97ebdb90d70d72fe7448cad8087 --- /dev/null +++ b/eval-queue/PGB/InstaDeepAI/agro-nucleotide-transformer-1b_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "InstaDeepAI/agro-nucleotide-transformer-1b", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 985, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/PGB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json b/eval-queue/PGB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..2471a5c9364821d4d97616d03ed65047aaac92fc --- /dev/null +++ b/eval-queue/PGB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 96, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/PGB/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json b/eval-queue/PGB/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..005daf5291c7bd4b370b71fbda580cec7d169b80 --- /dev/null +++ b/eval-queue/PGB/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "LongSafari/hyenadna-large-1m-seqlen-hf", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 47, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/PGB/README.md b/eval-queue/PGB/README.md new file mode 100644 index 0000000000000000000000000000000000000000..32897cd3e640101ba184f8c4ccd896981de3804a --- /dev/null +++ b/eval-queue/PGB/README.md @@ -0,0 +1,3 @@ +--- +license: mit +--- diff --git a/eval-queue/PGB/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json b/eval-queue/PGB/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..0ea7d38471486d3d8f8e0df988dfd653e456f51a --- /dev/null +++ b/eval-queue/PGB/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 7.73, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/PGB/multimolecule/rnabert_eval_request_False_bfloat16_Original.json b/eval-queue/PGB/multimolecule/rnabert_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..026a555399655334d70dcb4fc55e56669ff8c00b --- /dev/null +++ b/eval-queue/PGB/multimolecule/rnabert_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "multimolecule/rnabert", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 0.48, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/PGB/multimolecule/rnafm_eval_request_False_bfloat16_Original.json b/eval-queue/PGB/multimolecule/rnafm_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..ea2ffc0dab27a753c373703fccfd7eb88ea57716 --- /dev/null +++ b/eval-queue/PGB/multimolecule/rnafm_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "multimolecule/rnafm", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 99.52, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/PGB/multimolecule/rnamsm_eval_request_False_bfloat16_Original.json b/eval-queue/PGB/multimolecule/rnamsm_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..7095daa6152aea1c8b87abe6e9e25c3ac3fb74ab --- /dev/null +++ b/eval-queue/PGB/multimolecule/rnamsm_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "multimolecule/rnamsm", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 96.5, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/PGB/multimolecule/splicebert_eval_request_False_bfloat16_Original.json b/eval-queue/PGB/multimolecule/splicebert_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..658b35d504d33099cd421e629b53295fd3e20568 --- /dev/null +++ b/eval-queue/PGB/multimolecule/splicebert_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "multimolecule/splicebert", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 19.7, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/PGB/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json b/eval-queue/PGB/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..7f886f5471d1d50db29f073ad7cd8ce2fd20c91e --- /dev/null +++ b/eval-queue/PGB/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "multimolecule/utrbert-4mer", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 86, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/PGB/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json b/eval-queue/PGB/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..c429db5ae1ff77b18ac6e4f8f0c58dcc5f4892a4 --- /dev/null +++ b/eval-queue/PGB/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "yangheng/omnigenome-186M", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 186, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/PGB/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json b/eval-queue/PGB/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..ce39c9bd5427e85973ceadcd79c009dbe2d8dc49 --- /dev/null +++ b/eval-queue/PGB/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "zhihan1996/DNABERT-2-117M", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 117, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/README.md b/eval-queue/README.md new file mode 100644 index 0000000000000000000000000000000000000000..32897cd3e640101ba184f8c4ccd896981de3804a --- /dev/null +++ b/eval-queue/README.md @@ -0,0 +1,3 @@ +--- +license: mit +--- diff --git a/eval-queue/RGB/GleghornLab/cdsBERT_eval_request_False_bfloat16_Original.json b/eval-queue/RGB/GleghornLab/cdsBERT_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..576be2fb71b8a4784aabcba74c89753ce508b229 --- /dev/null +++ b/eval-queue/RGB/GleghornLab/cdsBERT_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "GleghornLab/cdsBERT", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 420, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/RGB/InstaDeepAI/agro-nucleotide-transformer-1b_eval_request_False_bfloat16_Original.json b/eval-queue/RGB/InstaDeepAI/agro-nucleotide-transformer-1b_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..5082721283bbd97ebdb90d70d72fe7448cad8087 --- /dev/null +++ b/eval-queue/RGB/InstaDeepAI/agro-nucleotide-transformer-1b_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "InstaDeepAI/agro-nucleotide-transformer-1b", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 985, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/RGB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json b/eval-queue/RGB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..b253088dd1195bd80fc63e2184ecd9231d02cc14 --- /dev/null +++ b/eval-queue/RGB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 100, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/RGB/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json b/eval-queue/RGB/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..005daf5291c7bd4b370b71fbda580cec7d169b80 --- /dev/null +++ b/eval-queue/RGB/LongSafari/hyenadna-large-1m-seqlen-hf_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "LongSafari/hyenadna-large-1m-seqlen-hf", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 47, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/RGB/README.md b/eval-queue/RGB/README.md new file mode 100644 index 0000000000000000000000000000000000000000..32897cd3e640101ba184f8c4ccd896981de3804a --- /dev/null +++ b/eval-queue/RGB/README.md @@ -0,0 +1,3 @@ +--- +license: mit +--- diff --git a/eval-queue/RGB/multimolecule/splicebert_eval_request_False_bfloat16_Original.json b/eval-queue/RGB/multimolecule/splicebert_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..658b35d504d33099cd421e629b53295fd3e20568 --- /dev/null +++ b/eval-queue/RGB/multimolecule/splicebert_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "multimolecule/splicebert", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 19.7, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/RGB/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json b/eval-queue/RGB/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..7f886f5471d1d50db29f073ad7cd8ce2fd20c91e --- /dev/null +++ b/eval-queue/RGB/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "multimolecule/utrbert-4mer", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 86, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/RGB/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json b/eval-queue/RGB/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..c429db5ae1ff77b18ac6e4f8f0c58dcc5f4892a4 --- /dev/null +++ b/eval-queue/RGB/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "yangheng/omnigenome-186M", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 186, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/RGB/yangheng/omnigenome-52M_eval_request_False_bfloat16_Original.json b/eval-queue/RGB/yangheng/omnigenome-52M_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..4076dc7827c8ae12fc87cc4670e462df5f98968a --- /dev/null +++ b/eval-queue/RGB/yangheng/omnigenome-52M_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "yangheng/omnigenome-52M", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 52, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/RGB/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json b/eval-queue/RGB/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..ce39c9bd5427e85973ceadcd79c009dbe2d8dc49 --- /dev/null +++ b/eval-queue/RGB/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "zhihan1996/DNABERT-2-117M", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 117, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/multimolecule/splicebert_eval_request_False_bfloat16_Original.json b/eval-queue/multimolecule/splicebert_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..658b35d504d33099cd421e629b53295fd3e20568 --- /dev/null +++ b/eval-queue/multimolecule/splicebert_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "multimolecule/splicebert", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 19.7, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json b/eval-queue/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..7f886f5471d1d50db29f073ad7cd8ce2fd20c91e --- /dev/null +++ b/eval-queue/multimolecule/utrbert-4mer_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "multimolecule/utrbert-4mer", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 86, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json b/eval-queue/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..c429db5ae1ff77b18ac6e4f8f0c58dcc5f4892a4 --- /dev/null +++ b/eval-queue/yangheng/omnigenome-186M_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "yangheng/omnigenome-186M", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 186, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/yangheng/omnigenome-52M_eval_request_False_bfloat16_Original.json b/eval-queue/yangheng/omnigenome-52M_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..4076dc7827c8ae12fc87cc4670e462df5f98968a --- /dev/null +++ b/eval-queue/yangheng/omnigenome-52M_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "yangheng/omnigenome-52M", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 52, "license": "custom"} \ No newline at end of file diff --git a/eval-queue/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json b/eval-queue/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json new file mode 100644 index 0000000000000000000000000000000000000000..ce39c9bd5427e85973ceadcd79c009dbe2d8dc49 --- /dev/null +++ b/eval-queue/zhihan1996/DNABERT-2-117M_eval_request_False_bfloat16_Original.json @@ -0,0 +1 @@ +{"model": "zhihan1996/DNABERT-2-117M", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 117, "license": "custom"} \ No newline at end of file diff --git a/eval-results/.gitattributes b/eval-results/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..28df5f900b358436f0267334b3e3e9af33f917ba --- /dev/null +++ b/eval-results/.gitattributes @@ -0,0 +1,55 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.lz4 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +# Audio files - uncompressed +*.pcm filter=lfs diff=lfs merge=lfs -text +*.sam filter=lfs diff=lfs merge=lfs -text +*.raw filter=lfs diff=lfs merge=lfs -text +# Audio files - compressed +*.aac filter=lfs diff=lfs merge=lfs -text +*.flac filter=lfs diff=lfs merge=lfs -text +*.mp3 filter=lfs diff=lfs merge=lfs -text +*.ogg filter=lfs diff=lfs merge=lfs -text +*.wav filter=lfs diff=lfs merge=lfs -text +# Image files - uncompressed +*.bmp filter=lfs diff=lfs merge=lfs -text +*.gif filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.tiff filter=lfs diff=lfs merge=lfs -text +# Image files - compressed +*.jpg filter=lfs diff=lfs merge=lfs -text +*.jpeg filter=lfs diff=lfs merge=lfs -text +*.webp filter=lfs diff=lfs merge=lfs -text diff --git a/eval-results/GB/3UTRBERT.json b/eval-results/GB/3UTRBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..ce357a624ac6d43f25bc2ecdbcc16c11fe9787e3 --- /dev/null +++ b/eval-results/GB/3UTRBERT.json @@ -0,0 +1,48 @@ +{ + "config":{ + "model":"multimolecule/utrbert-4mer", + "model_args":"pretrained=multimolecule/utrbert-4mer,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"86M", "Pretraining Data":"20,362 Sequences", "Species":"Multi-Species", "Nucleic Acid":"mRNA 3'UTR"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/utrbert-4mer", + "model_sha":"main" + }, + "results":{ + "DEM":{ + "F1":0.8950 + }, + "DOW":{ + "F1":0.9022 + }, + "DRE":{ + "F1":0.7435 + }, + "DME":{ + "F1":0.8014 + }, + "HCE":{ + "F1":0.7023 + }, + "HEE":{ + "F1":0.7633 + }, + "HRE":{ + "F1":0.9847 + }, + "HNP":{ + "F1":0.8249 + }, + "HOR":{ + "F1":0.6678 + } + } +} \ No newline at end of file diff --git a/eval-results/GB/Caduceus.json b/eval-results/GB/Caduceus.json new file mode 100644 index 0000000000000000000000000000000000000000..4240022d6b17dd8de28fc9e7dc9c5a1873760630 --- /dev/null +++ b/eval-results/GB/Caduceus.json @@ -0,0 +1,48 @@ +{ + "config":{ + "model":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", + "model_args":"pretrained= kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"7.73M", "Pretraining Data":"35 billion nucleotide base pairs", "Species":"Human", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", + "model_sha":"main" + }, + "results":{ + "DEM":{ + "F1":0.9213 + }, + "DOW":{ + "F1":0.9474 + }, + "DRE":{ + "F1":0.7203 + }, + "DME":{ + "F1":0.7561 + }, + "HCE":{ + "F1":0.7020 + }, + "HEE":{ + "F1":0.7647 + }, + "HRE":{ + "F1":0.7916 + }, + "HNP":{ + "F1":0.8436 + }, + "HOR":{ + "F1":0.6317 + } + } +} \ No newline at end of file diff --git a/eval-results/GB/DNABERT-2-117M.json b/eval-results/GB/DNABERT-2-117M.json new file mode 100644 index 0000000000000000000000000000000000000000..dc6e4754bd33a1a1da61ab44d2c2bbb87dc1b96a --- /dev/null +++ b/eval-results/GB/DNABERT-2-117M.json @@ -0,0 +1,48 @@ +{ + "config":{ + "model":"zhihan1996/DNABERT-2-117M", + "model_args":"pretrained=zhihan1996/DNABERT-2-117M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"BPE", "# of Params":"117M", "Pretraining Data":"32.49B Tokens", "Species":"Human + 135 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"zhihan1996/DNABERT-2-117M", + "model_sha":"main" + }, + "results":{ + "DEM":{ + "F1":0.9267 + }, + "DOW":{ + "F1":0.9517 + }, + "DRE":{ + "F1":0.4377 + }, + "DME":{ + "F1":0.7721 + }, + "HCE":{ + "F1":0.7558 + }, + "HEE":{ + "F1":0.8066 + }, + "HRE":{ + "F1":0.7814 + }, + "HNP":{ + "F1":0.8580 + }, + "HOR":{ + "F1":0.6803 + } + } +} \ No newline at end of file diff --git a/eval-results/GB/HyenaDNA.json b/eval-results/GB/HyenaDNA.json new file mode 100644 index 0000000000000000000000000000000000000000..e16541883eeac9d04b2bff41acbb2800fbb61d17 --- /dev/null +++ b/eval-results/GB/HyenaDNA.json @@ -0,0 +1,48 @@ +{ + "config":{ + "model":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_args":"pretrained=LongSafari/hyenadna-large-1m-seqlen-hf,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"47M", "Pretraining Data":"~3.2B Tokens", "Species":"Human", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_sha":"main" + }, + "results":{ + "DEM":{ + "F1":0.8821 + }, + "DOW":{ + "F1":0.9413 + }, + "DRE":{ + "F1":0.7011 + }, + "DME":{ + "F1":0.7644 + }, + "HCE":{ + "F1":0.7038 + }, + "HEE":{ + "F1":0.7958 + }, + "HRE":{ + "F1":0.9633 + }, + "HNP":{ + "F1":0.8599 + }, + "HOR":{ + "F1":0.6703 + } + } +} \ No newline at end of file diff --git a/eval-results/GB/NT-V2-100M.json b/eval-results/GB/NT-V2-100M.json new file mode 100644 index 0000000000000000000000000000000000000000..04e8c8e3026dee318fba5ff3f68d2b32f1cfe7f7 --- /dev/null +++ b/eval-results/GB/NT-V2-100M.json @@ -0,0 +1,48 @@ +{ + "config":{ + "model":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_args":"pretrained=InstaDeepAI/nucleotide-transformer-v2-100m-multi-species,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"96M", "Pretraining Data":"300B Tokens", "Species":"Human + 850 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_sha":"main" + }, + "results":{ + "DEM":{ + "F1":0.9166 + }, + "DOW":{ + "F1":0.9432 + }, + "DRE":{ + "F1":0.7820 + }, + "DME":{ + "F1":0.8172 + }, + "HCE":{ + "F1":0.7198 + }, + "HEE":{ + "F1":0.7985 + }, + "HRE":{ + "F1":0.9330 + }, + "HNP":{ + "F1":0.8530 + }, + "HOR":{ + "F1":0.6853 + } + } +} \ No newline at end of file diff --git a/eval-results/GB/OmniGenome186M.json b/eval-results/GB/OmniGenome186M.json new file mode 100644 index 0000000000000000000000000000000000000000..4f53aeb07e97ab71bbc8f78e16fa87545fb0c43c --- /dev/null +++ b/eval-results/GB/OmniGenome186M.json @@ -0,0 +1,48 @@ +{ + "config":{ + "model":"yangheng/omnigenome-186M", + "model_args":"pretrained=yangheng/omnigenome-186M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"186M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"}, + "model_dtype":"bfloat16", + "model_name":"yangheng/omnigenome-186M", + "model_sha":"main" + }, + "results":{ + "DEM":{ + "F1":0.9416 + }, + "DOW":{ + "F1":0.9349 + }, + "DRE":{ + "F1":0.7717 + }, + "DME":{ + "F1":0.8034 + }, + "HCE":{ + "F1":0.7351 + }, + "HEE":{ + "F1":0.8223 + }, + "HRE":{ + "F1":0.9566 + }, + "HNP":{ + "F1":0.8787 + }, + "HOR":{ + "F1":0.6897 + } + } +} \ No newline at end of file diff --git a/eval-results/GB/SpliceBERT.json b/eval-results/GB/SpliceBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..ea4b1c7c66306ef5379832c7558d16a550a55561 --- /dev/null +++ b/eval-results/GB/SpliceBERT.json @@ -0,0 +1,48 @@ +{ + "config":{ + "model":"multimolecule/splicebert", + "model_args":"pretrained=multimolecule/splicebert,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"19.7M", "Pretraining Data":"65 billion nucleotides", "Species":"Multi-Species", "Nucleic Acid":"mRNA"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/splicebert", + "model_sha":"main" + }, + "results":{ + "DEM":{ + "F1":0.9472 + }, + "DOW":{ + "F1":0.9642 + }, + "DRE":{ + "F1":0.7229 + }, + "DME":{ + "F1":0.7470 + }, + "HCE":{ + "F1":0.7350 + }, + "HEE":{ + "F1":0.7960 + }, + "HRE":{ + "F1":0.9523 + }, + "HNP":{ + "F1":0.8957 + }, + "HOR":{ + "F1":0.6889 + } + } +} \ No newline at end of file diff --git a/eval-results/GUE/3UTRBERT.json b/eval-results/GUE/3UTRBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..95d23ee36786021be7ec587e5b278036700807fc --- /dev/null +++ b/eval-results/GUE/3UTRBERT.json @@ -0,0 +1,42 @@ +{ + "config":{ + "model":"multimolecule/utrbert-4mer", + "model_args":"pretrained=multimolecule/utrbert-4mer,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"86M", "Pretraining Data":"20,362 Sequences", "Species":"Multi-Species", "Nucleic Acid":"mRNA 3'UTR"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/utrbert-4mer", + "model_sha":"main" + }, + "results":{ + "Yeast EMP":{ + "F1":0.7189 + }, + "Mouse TF-M":{ + "F1":0.7146 + }, + "Virus CVC":{ + "F1":0.6871 + }, + "Human TF-H":{ + "F1":0.7485 + }, + "Human PD":{ + "F1":0.8237 + }, + "Human CPD":{ + "F1":0.9051 + }, + "Human SSP":{ + "F1":0.8195 + } + } +} \ No newline at end of file diff --git a/eval-results/GUE/Caduceus.json b/eval-results/GUE/Caduceus.json new file mode 100644 index 0000000000000000000000000000000000000000..5efcd074500e32eb68a3417305640a6e65ad74bb --- /dev/null +++ b/eval-results/GUE/Caduceus.json @@ -0,0 +1,42 @@ +{ + "config":{ + "model":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", + "model_args":"pretrained= kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"7.73M", "Pretraining Data":"35 billion nucleotide base pairs", "Species":"Human", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", + "model_sha":"main" + }, + "results":{ + "Yeast EMP":{ + "F1":0.7349 + }, + "Mouse TF-M":{ + "F1":0.7818 + }, + "Virus CVC":{ + "F1":0.4909 + }, + "Human TF-H":{ + "F1":0.7956 + }, + "Human PD":{ + "F1":0.8913 + }, + "Human CPD":{ + "F1":0.8509 + }, + "Human SSP":{ + "F1":0.8182 + } + } +} \ No newline at end of file diff --git a/eval-results/GUE/DNABERT-2-117M.json b/eval-results/GUE/DNABERT-2-117M.json new file mode 100644 index 0000000000000000000000000000000000000000..9ec4062de7f854995b73467107515d49ae9c4fb6 --- /dev/null +++ b/eval-results/GUE/DNABERT-2-117M.json @@ -0,0 +1,42 @@ +{ + "config":{ + "model":"zhihan1996/DNABERT-2-117M", + "model_args":"pretrained=zhihan1996/DNABERT-2-117M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"BPE", "# of Params":"117M", "Pretraining Data":"32.49B Tokens", "Species":"Human + 135 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"zhihan1996/DNABERT-2-117M", + "model_sha":"main" + }, + "results":{ + "Yeast EMP":{ + "F1":0.7585 + }, + "Mouse TF-M":{ + "F1":0.8623 + }, + "Virus CVC":{ + "F1":0.6890 + }, + "Human TF-H":{ + "F1":0.8180 + }, + "Human PD":{ + "F1":0.9017 + }, + "Human CPD":{ + "F1":0.8257 + }, + "Human SSP":{ + "F1":0.8521 + } + } +} \ No newline at end of file diff --git a/eval-results/GUE/HyenaDNA.json b/eval-results/GUE/HyenaDNA.json new file mode 100644 index 0000000000000000000000000000000000000000..2e8f09165601c1cccda8918d51e6785afef7cc75 --- /dev/null +++ b/eval-results/GUE/HyenaDNA.json @@ -0,0 +1,42 @@ +{ + "config":{ + "model":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_args":"pretrained=LongSafari/hyenadna-large-1m-seqlen-hf,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"47M", "Pretraining Data":"~3.2B Tokens", "Species":"Human", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_sha":"main" + }, + "results":{ + "Yeast EMP":{ + "F1":0.7308 + }, + "Mouse TF-M":{ + "F1":0.7344 + }, + "Virus CVC":{ + "F1":0.6637 + }, + "Human TF-H":{ + "F1":0.7762 + }, + "Human PD":{ + "F1":0.9119 + }, + "Human CPD":{ + "F1":0.8431 + }, + "Human SSP":{ + "F1":0.8334 + } + } +} \ No newline at end of file diff --git a/eval-results/GUE/NT-V2-100M.json b/eval-results/GUE/NT-V2-100M.json new file mode 100644 index 0000000000000000000000000000000000000000..2b3a39d97cf155bf7888c9cdd54c03c6bddfd8b9 --- /dev/null +++ b/eval-results/GUE/NT-V2-100M.json @@ -0,0 +1,42 @@ +{ + "config":{ + "model":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_args":"pretrained=InstaDeepAI/nucleotide-transformer-v2-100m-multi-species,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"96M", "Pretraining Data":"300B Tokens", "Species":"Human + 850 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_sha":"main" + }, + "results":{ + "Yeast EMP":{ + "F1":0.7493 + }, + "Mouse TF-M":{ + "F1":0.7810 + }, + "Virus CVC":{ + "F1":0.5923 + }, + "Human TF-H":{ + "F1":0.7912 + }, + "Human PD":{ + "F1":0.9087 + }, + "Human CPD":{ + "F1":0.8470 + }, + "Human SSP":{ + "F1":0.8413 + } + } +} \ No newline at end of file diff --git a/eval-results/GUE/OmniGenome186M.json b/eval-results/GUE/OmniGenome186M.json new file mode 100644 index 0000000000000000000000000000000000000000..7a8f30ed4659430d7155800e51113eb113cea5ea --- /dev/null +++ b/eval-results/GUE/OmniGenome186M.json @@ -0,0 +1,42 @@ +{ + "config":{ + "model":"yangheng/omnigenome-186M", + "model_args":"pretrained=yangheng/omnigenome-186M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"186M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"}, + "model_dtype":"bfloat16", + "model_name":"yangheng/omnigenome-186M", + "model_sha":"main" + }, + "results":{ + "Yeast EMP":{ + "F1":0.7851 + }, + "Mouse TF-M":{ + "F1":0.8472 + }, + "Virus CVC":{ + "F1":0.7472 + }, + "Human TF-H":{ + "F1":0.8173 + }, + "Human PD":{ + "F1":0.9004 + }, + "Human CPD":{ + "F1":0.8522 + }, + "Human SSP":{ + "F1":0.9039 + } + } +} \ No newline at end of file diff --git a/eval-results/GUE/SpliceBERT.json b/eval-results/GUE/SpliceBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..e68ed40e24cf3d44c7e36b5e0126543abdc237ac --- /dev/null +++ b/eval-results/GUE/SpliceBERT.json @@ -0,0 +1,42 @@ +{ + "config":{ + "model":"multimolecule/splicebert", + "model_args":"pretrained=multimolecule/splicebert,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"19.7M", "Pretraining Data":"65 billion nucleotides", "Species":"Multi-Species", "Nucleic Acid":"mRNA"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/splicebert", + "model_sha":"main" + }, + "results":{ + "Yeast EMP":{ + "F1":0.7766 + }, + "Mouse TF-M":{ + "F1":0.8497 + }, + "Virus CVC":{ + "F1":0.5624 + }, + "Human TF-H":{ + "F1":0.8277 + }, + "Human PD":{ + "F1":0.9224 + }, + "Human CPD":{ + "F1":0.8396 + }, + "Human SSP":{ + "F1":0.9381 + } + } +} \ No newline at end of file diff --git a/eval-results/PGB/3UTRBERT.json b/eval-results/PGB/3UTRBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..095b1f90d8ed20191ab55b722ed6a4ec48c26e48 --- /dev/null +++ b/eval-results/PGB/3UTRBERT.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"multimolecule/utrbert-4mer", + "model_args":"pretrained=multimolecule/utrbert-4mer,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"86M", "Pretraining Data":"20,362 Sequences", "Species":"Multi-Species", "Nucleic Acid":"mRNA 3'UTR"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/utrbert-4mer", + "model_sha":"main" + }, +"results":{ + "PolyA":{ + "F1":0.7648 + }, + "LncRNA":{ + "F1":0.7075 + }, + "Chrom Acc":{ + "F1":0.6371 + }, + "Prom Str":{ + "RMSE":1.04 + }, + "Term Str":{ + "RMSE":0.36 + }, + "Splice":{ + "F1":0.9444 + }, + "Gene Exp":{ + "RMSE":14.87 + }, + "Enhancer":{ + "F1":0.7167 + } + } +} \ No newline at end of file diff --git a/eval-results/PGB/Agro-NT.json b/eval-results/PGB/Agro-NT.json new file mode 100644 index 0000000000000000000000000000000000000000..9c1cf5acde27b681f67c9ca8e23b5bd3b9c6e63e --- /dev/null +++ b/eval-results/PGB/Agro-NT.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"InstaDeepAI/agro-nucleotide-transformer-1b", + "model_args":"pretrained=InstaDeepAI/agro-nucleotide-transformer-1b,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"985M", "Pretraining Data":"472.5B Tokens", "Species":"48 Edible Plants", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"InstaDeepAI/agro-nucleotide-transformer-1b", + "model_sha":"main" + }, +"results":{ + "PolyA":{ + "F1":0.7889 + }, + "LncRNA":{ + "F1":0.6724 + }, + "Chrom Acc":{ + "F1":0.6327 + }, + "Prom Str":{ + "RMSE":0.94 + }, + "Term Str":{ + "RMSE":0.78 + }, + "Splice":{ + "F1":0.8845 + }, + "Gene Exp":{ + "RMSE":15.56 + }, + "Enhancer":{ + "F1":0.6283 + } + } +} \ No newline at end of file diff --git a/eval-results/PGB/CDSBERT.json b/eval-results/PGB/CDSBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..92365f0f58bc175190a3ff95cbc24d2093494f09 --- /dev/null +++ b/eval-results/PGB/CDSBERT.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"GleghornLab/cdsBERT", + "model_args":"pretrained=GleghornLab/cdsBERT,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"420M", "Pretraining Data":"4M Sequences", "Species":"4,069 RNA families", "Nucleic Acid":"CDS"}, + "model_dtype":"bfloat16", + "model_name":"GleghornLab/cdsBERT", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.3972 + }, + "LncRNA":{ + "F1":0.3306 + }, + "Chrom Acc":{ + "F1":0.4895 + }, + "Prom Str":{ + "RMSE":2.19 + }, + "Term Str":{ + "RMSE":0.59 + }, + "Splice":{ + "F1":0.5220 + }, + "Gene Exp":{ + "RMSE":14.77 + }, + "Enhancer":{ + "F1":0.3393 + } + } +} \ No newline at end of file diff --git a/eval-results/PGB/Caduceus.json b/eval-results/PGB/Caduceus.json new file mode 100644 index 0000000000000000000000000000000000000000..80cfc4a525127aff7dbc5d0a71224d432319bbc6 --- /dev/null +++ b/eval-results/PGB/Caduceus.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", + "model_args":"pretrained= kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"7.73M", "Pretraining Data":"35 billion nucleotide base pairs", "Species":"Human", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.7089 + }, + "LncRNA":{ + "F1":0.6840 + }, + "Chrom Acc":{ + "F1":0.6453 + }, + "Prom Str":{ + "RMSE":0.91 + }, + "Term Str":{ + "RMSE":0.26 + }, + "Splice":{ + "F1":0.7951 + }, + "Gene Exp":{ + "RMSE":14.72 + }, + "Enhancer":{ + "F1":0.6083 + } + } +} \ No newline at end of file diff --git a/eval-results/PGB/DNABERT-2-117M.json b/eval-results/PGB/DNABERT-2-117M.json new file mode 100644 index 0000000000000000000000000000000000000000..3906893d2ce05c38ed29efe43ba07f466bdc0e58 --- /dev/null +++ b/eval-results/PGB/DNABERT-2-117M.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"zhihan1996/DNABERT-2-117M", + "model_args":"pretrained=zhihan1996/DNABERT-2-117M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"BPE", "# of Params":"117M", "Pretraining Data":"32.49B Tokens", "Species":"Human + 135 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"zhihan1996/DNABERT-2-117M", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.4135 + }, + "LncRNA":{ + "F1":0.7255 + }, + "Chrom Acc":{ + "F1":0.6149 + }, + "Prom Str":{ + "RMSE":0.99 + }, + "Term Str":{ + "RMSE":0.24 + }, + "Splice":{ + "F1":0.4534 + }, + "Gene Exp":{ + "RMSE":14.78 + }, + "Enhancer":{ + "F1":0.3640 + } + } +} \ No newline at end of file diff --git a/eval-results/PGB/HyenaDNA.json b/eval-results/PGB/HyenaDNA.json new file mode 100644 index 0000000000000000000000000000000000000000..af033dd0115d50a3bdebed9eb98038b3850d3c41 --- /dev/null +++ b/eval-results/PGB/HyenaDNA.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_args":"pretrained=LongSafari/hyenadna-large-1m-seqlen-hf,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"47M", "Pretraining Data":"~3.2B Tokens", "Species":"Human", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.8311 + }, + "LncRNA":{ + "F1":0.5821 + }, + "Chrom Acc":{ + "F1":0.5220 + }, + "Prom Str":{ + "RMSE":0.88 + }, + "Term Str":{ + "RMSE":0.26 + }, + "Splice":{ + "F1":0.9028 + }, + "Gene Exp":{ + "RMSE":14.76 + }, + "Enhancer":{ + "F1":0.6617 + } + } +} \ No newline at end of file diff --git a/eval-results/PGB/NT-V2-100M.json b/eval-results/PGB/NT-V2-100M.json new file mode 100644 index 0000000000000000000000000000000000000000..9dbcaf073a15ce2d3e1c89b16d7b65d074e97a89 --- /dev/null +++ b/eval-results/PGB/NT-V2-100M.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_args":"pretrained=InstaDeepAI/nucleotide-transformer-v2-100m-multi-species,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"96M", "Pretraining Data":"300B Tokens", "Species":"Human + 850 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.7126 + }, + "LncRNA":{ + "F1":0.7308 + }, + "Chrom Acc":{ + "F1":0.6571 + }, + "Prom Str":{ + "RMSE":0.81 + }, + "Term Str":{ + "RMSE":0.27 + }, + "Splice":{ + "F1":0.9505 + }, + "Gene Exp":{ + "RMSE":14.69 + }, + "Enhancer":{ + "F1":0.7389 + } + } +} \ No newline at end of file diff --git a/eval-results/PGB/OmniGenome186M.json b/eval-results/PGB/OmniGenome186M.json new file mode 100644 index 0000000000000000000000000000000000000000..06b0c51eead662453dcfb7fe0c41ae7a8297046b --- /dev/null +++ b/eval-results/PGB/OmniGenome186M.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"yangheng/omnigenome-186M", + "model_args":"pretrained=yangheng/omnigenome-186M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"186M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"}, + "model_dtype":"bfloat16", + "model_name":"yangheng/omnigenome-186M", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.8755 + }, + "LncRNA":{ + "F1":0.7796 + }, + "Chrom Acc":{ + "F1":0.6769 + }, + "Prom Str":{ + "RMSE":0.59 + }, + "Term Str":{ + "RMSE":0.18 + }, + "Splice":{ + "F1":0.9841 + }, + "Gene Exp":{ + "RMSE":14.71 + }, + "Enhancer":{ + "F1":0.7977 + } + } +} \ No newline at end of file diff --git a/eval-results/PGB/RNA-BERT.json b/eval-results/PGB/RNA-BERT.json new file mode 100644 index 0000000000000000000000000000000000000000..dc87b9183fd0d44c57fc5f39596f9670be1d7b88 --- /dev/null +++ b/eval-results/PGB/RNA-BERT.json @@ -0,0 +1,46 @@ +{ + "config":{ + "model":"multimolecule/rnabert", + "model_args":"pretrained=multimolecule/rnabert,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"0.48M", "Pretraining Data":"76,237 human ncRNA sequences", "Species":"Human", "Nucleic Acid":"ncRNA"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/rnabert", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.7854 + }, + "LncRNA":{ + "F1":0.6199 + }, + "Chrom Acc":{ + "F1":0.4894 + }, + "Prom Str":{ + "RMSE":1.81 + }, + "Term Str":{ + "RMSE":0.38 + }, + "Splice":{ + "F1":0.9445 + }, + "Gene Exp":{ + "RMSE":14.89 + }, + "Enhancer":{ + "F1":0.5761 + } + + } +} \ No newline at end of file diff --git a/eval-results/PGB/RNA-FM.json b/eval-results/PGB/RNA-FM.json new file mode 100644 index 0000000000000000000000000000000000000000..73cc0c1f5125e649358e73058bb3310733f164d3 --- /dev/null +++ b/eval-results/PGB/RNA-FM.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"multimolecule/rnafm", + "model_args":"pretrained=multimolecule/rnafm,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"99.52M", "Pretraining Data":"23.7 million non-redundant RNA sequences", "Species":"Multi-Species", "Nucleic Acid":"ncRNA"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/rnafm", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.8494 + }, + "LncRNA":{ + "F1":0.6875 + }, + "Chrom Acc":{ + "F1":0.5492 + }, + "Prom Str":{ + "RMSE":0.95 + }, + "Term Str":{ + "RMSE":0.27 + }, + "Splice":{ + "F1":0.9595 + }, + "Gene Exp":{ + "RMSE":14.83 + }, + "Enhancer":{ + "F1":0.5714 + } + } +} \ No newline at end of file diff --git a/eval-results/PGB/RNA-MSM.json b/eval-results/PGB/RNA-MSM.json new file mode 100644 index 0000000000000000000000000000000000000000..282ad3a11ed1f200ccd93c78fcd961d94c4364de --- /dev/null +++ b/eval-results/PGB/RNA-MSM.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"multimolecule/rnamsm", + "model_args":"pretrained=multimolecule/rnamsm,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"96.5M", "Pretraining Data":"3,932 RNA families", "Species":"Multi-Species", "Nucleic Acid":"RNA"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/rnamsm", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.8425 + }, + "LncRNA":{ + "F1":0.6749 + }, + "Chrom Acc":{ + "F1":0.5352 + }, + "Prom Str":{ + "RMSE":1.28 + }, + "Term Str":{ + "RMSE":0.28 + }, + "Splice":{ + "F1":0.9549 + }, + "Gene Exp":{ + "RMSE":14.87 + }, + "Enhancer":{ + "F1":0.6145 + } + } +} \ No newline at end of file diff --git a/eval-results/PGB/SpliceBERT.json b/eval-results/PGB/SpliceBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..f52106504f5a96e28e24a29e9ef7019115a7f8ab --- /dev/null +++ b/eval-results/PGB/SpliceBERT.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"multimolecule/splicebert", + "model_args":"pretrained=multimolecule/splicebert,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"19.7M", "Pretraining Data":"65 billion nucleotides", "Species":"Multi-Species", "Nucleic Acid":"mRNA"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/splicebert", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.6523 + }, + "LncRNA":{ + "F1":0.7188 + }, + "Chrom Acc":{ + "F1":0.6362 + }, + "Prom Str":{ + "RMSE":0.75 + }, + "Term Str":{ + "RMSE":0.22 + }, + "Splice":{ + "F1":0.9645 + }, + "Gene Exp":{ + "RMSE":14.70 + }, + "Enhancer":{ + "F1":0.6971 + } + } +} \ No newline at end of file diff --git a/eval-results/README.md b/eval-results/README.md new file mode 100644 index 0000000000000000000000000000000000000000..32897cd3e640101ba184f8c4ccd896981de3804a --- /dev/null +++ b/eval-results/README.md @@ -0,0 +1,3 @@ +--- +license: mit +--- diff --git a/eval-results/RGB/yangheng/3UTRBERT.json b/eval-results/RGB/yangheng/3UTRBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..1adec75827e33722413afddd29d504c61349c815 --- /dev/null +++ b/eval-results/RGB/yangheng/3UTRBERT.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"multimolecule/utrbert-4mer", + "model_args":"pretrained=multimolecule/utrbert-4mer,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"86M", "Pretraining Data":"20,362 Sequences", "Species":"Multi-Species", "Nucleic Acid":"mRNA 3'UTR"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/utrbert-4mer", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7772 + }, + "SNMD":{ + "AUC":0.5002 + }, + "SNMR":{ + "F1":0.2401 + }, + "ArchiveII":{ + "F1":0.7898 + }, + "bpRNA":{ + "F1":0.5693 + }, + "RNAStralign":{ + "F1":0.9203 + } + } +} \ No newline at end of file diff --git a/eval-results/RGB/yangheng/Agro-NT.json b/eval-results/RGB/yangheng/Agro-NT.json new file mode 100644 index 0000000000000000000000000000000000000000..846f30ec63c16f794989b0db1a154c59e1f3697c --- /dev/null +++ b/eval-results/RGB/yangheng/Agro-NT.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"InstaDeepAI/agro-nucleotide-transformer-1b", + "model_args":"pretrained=InstaDeepAI/agro-nucleotide-transformer-1b,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"985M", "Pretraining Data":"472.5B Tokens", "Species":"48 Edible Plants", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"InstaDeepAI/agro-nucleotide-transformer-1b", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7830 + }, + "SNMD":{ + "AUC":0.4999 + }, + "SNMR":{ + "F1":0.2638 + }, + "ArchiveII":{ + "F1":0.7013 + }, + "bpRNA":{ + "F1":0.4871 + }, + "RNAStralign":{ + "F1":0.7521 + } + } +} \ No newline at end of file diff --git a/eval-results/RGB/yangheng/CDSBERT.json b/eval-results/RGB/yangheng/CDSBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..3c4b2d0bacaf8c5ada6c773d2f2fbf5ed1b05fa4 --- /dev/null +++ b/eval-results/RGB/yangheng/CDSBERT.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"GleghornLab/cdsBERT", + "model_args":"pretrained=GleghornLab/cdsBERT,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"420M", "Pretraining Data":"4M Sequences", "Species":"4,069 RNA families", "Nucleic Acid":"CDS"}, + "model_dtype":"bfloat16", + "model_name":"GleghornLab/cdsBERT", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7468 + }, + "SNMD":{ + "AUC":0.5503 + }, + "SNMR":{ + "F1":0.3616 + }, + "ArchiveII":{ + "F1":0.8934 + }, + "bpRNA":{ + "F1":0.7001 + }, + "RNAStralign":{ + "F1":0.9715 + } + } +} \ No newline at end of file diff --git a/eval-results/RGB/yangheng/DNABERT-2-117M.json b/eval-results/RGB/yangheng/DNABERT-2-117M.json new file mode 100644 index 0000000000000000000000000000000000000000..d5420bb518b4cc9bbfaddbc533c30c0d9f49bb06 --- /dev/null +++ b/eval-results/RGB/yangheng/DNABERT-2-117M.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"zhihan1996/DNABERT-2-117M", + "model_args":"pretrained=zhihan1996/DNABERT-2-117M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"BPE", "# of Params":"117M", "Pretraining Data":"32.49B Tokens", "Species":"Human + 135 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"zhihan1996/DNABERT-2-117M", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.8158 + }, + "SNMD":{ + "AUC":0.4994 + }, + "SNMR":{ + "F1":0.1586 + }, + "ArchiveII":{ + "F1":0.5982 + }, + "bpRNA":{ + "F1":0.4340 + }, + "RNAStralign":{ + "F1":0.6549 + } + } +} \ No newline at end of file diff --git a/eval-results/RGB/yangheng/HyenaDNA.json b/eval-results/RGB/yangheng/HyenaDNA.json new file mode 100644 index 0000000000000000000000000000000000000000..64a6a4fc545031813e11311068ee7011518c6410 --- /dev/null +++ b/eval-results/RGB/yangheng/HyenaDNA.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_args":"pretrained=LongSafari/hyenadna-large-1m-seqlen-hf,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"47M", "Pretraining Data":"~3.2B Tokens", "Species":"Human", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.8056 + }, + "SNMD":{ + "AUC":0.5332 + }, + "SNMR":{ + "F1":0.3980 + }, + "ArchiveII":{ + "F1":0.8423 + }, + "bpRNA":{ + "F1":0.5662 + }, + "RNAStralign":{ + "F1":0.9542 + } + } +} \ No newline at end of file diff --git a/eval-results/RGB/yangheng/NT-V2-100M.json b/eval-results/RGB/yangheng/NT-V2-100M.json new file mode 100644 index 0000000000000000000000000000000000000000..f22a639787974b5149a8cd30360c249cf9409922 --- /dev/null +++ b/eval-results/RGB/yangheng/NT-V2-100M.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_args":"pretrained=InstaDeepAI/nucleotide-transformer-v2-100m-multi-species,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"96M", "Pretraining Data":"300B Tokens", "Species":"Human + 850 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7826 + }, + "SNMD":{ + "AUC":0.5049 + }, + "SNMR":{ + "F1":0.2601 + }, + "ArchiveII":{ + "F1":0.7990 + }, + "bpRNA":{ + "F1":0.5660 + }, + "RNAStralign":{ + "F1":0.9084 + } + } +} \ No newline at end of file diff --git a/eval-results/RGB/yangheng/OmniGenome186M.json b/eval-results/RGB/yangheng/OmniGenome186M.json new file mode 100644 index 0000000000000000000000000000000000000000..ec7663dc48867a7bc15c972ae4c11fba5fdb5e37 --- /dev/null +++ b/eval-results/RGB/yangheng/OmniGenome186M.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"yangheng/omnigenome-186M", + "model_args":"pretrained=yangheng/omnigenome-186M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"186M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"}, + "model_dtype":"bfloat16", + "model_name":"yangheng/omnigenome-186M", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7164 + }, + "SNMD":{ + "AUC":0.6381 + }, + "SNMR":{ + "F1":0.4980 + }, + "ArchiveII":{ + "F1":0.9520 + }, + "bpRNA":{ + "F1":0.8248 + }, + "RNAStralign":{ + "F1":0.9912 + } + } +} \ No newline at end of file diff --git a/eval-results/RGB/yangheng/SpliceBERT.json b/eval-results/RGB/yangheng/SpliceBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..8ce43c5282635c6aabfd4a5e3b31435f198451af --- /dev/null +++ b/eval-results/RGB/yangheng/SpliceBERT.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"multimolecule/splicebert", + "model_args":"pretrained=multimolecule/splicebert,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"19.7M", "Pretraining Data":"65 billion nucleotides", "Species":"Multi-Species", "Nucleic Acid":"mRNA"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/splicebert", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7340 + }, + "SNMD":{ + "AUC":0.5811 + }, + "SNMR":{ + "F1":0.4644 + }, + "ArchiveII":{ + "F1":0.8905 + }, + "bpRNA":{ + "F1":0.6910 + }, + "RNAStralign":{ + "F1":0.9697 + } + } +} \ No newline at end of file diff --git a/eval-results/RGB/yangheng/results_OmniGenome-52M.json b/eval-results/RGB/yangheng/results_OmniGenome-52M.json new file mode 100644 index 0000000000000000000000000000000000000000..c786546cc3bf47cc0eb0b208341a403f97563f90 --- /dev/null +++ b/eval-results/RGB/yangheng/results_OmniGenome-52M.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"yangheng/omnigenome-52M", + "model_args":"pretrained=yangheng/omnigenome-52M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"52M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"}, + "model_dtype":"bfloat16", + "model_name":"yangheng/omnigenome-52M", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7191 + }, + "SNMD":{ + "AUC":0.6244 + }, + "SNMR":{ + "F1":0.4891 + }, + "ArchiveII":{ + "F1":0.9498 + }, + "bpRNA":{ + "F1":0.8234 + }, + "RNAStralign":{ + "F1":0.9901 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/.gitattributes b/eval-results/yangheng/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..28df5f900b358436f0267334b3e3e9af33f917ba --- /dev/null +++ b/eval-results/yangheng/.gitattributes @@ -0,0 +1,55 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.lz4 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +# Audio files - uncompressed +*.pcm filter=lfs diff=lfs merge=lfs -text +*.sam filter=lfs diff=lfs merge=lfs -text +*.raw filter=lfs diff=lfs merge=lfs -text +# Audio files - compressed +*.aac filter=lfs diff=lfs merge=lfs -text +*.flac filter=lfs diff=lfs merge=lfs -text +*.mp3 filter=lfs diff=lfs merge=lfs -text +*.ogg filter=lfs diff=lfs merge=lfs -text +*.wav filter=lfs diff=lfs merge=lfs -text +# Image files - uncompressed +*.bmp filter=lfs diff=lfs merge=lfs -text +*.gif filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.tiff filter=lfs diff=lfs merge=lfs -text +# Image files - compressed +*.jpg filter=lfs diff=lfs merge=lfs -text +*.jpeg filter=lfs diff=lfs merge=lfs -text +*.webp filter=lfs diff=lfs merge=lfs -text diff --git a/eval-results/yangheng/GB/3UTRBERT.json b/eval-results/yangheng/GB/3UTRBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..ce357a624ac6d43f25bc2ecdbcc16c11fe9787e3 --- /dev/null +++ b/eval-results/yangheng/GB/3UTRBERT.json @@ -0,0 +1,48 @@ +{ + "config":{ + "model":"multimolecule/utrbert-4mer", + "model_args":"pretrained=multimolecule/utrbert-4mer,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"86M", "Pretraining Data":"20,362 Sequences", "Species":"Multi-Species", "Nucleic Acid":"mRNA 3'UTR"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/utrbert-4mer", + "model_sha":"main" + }, + "results":{ + "DEM":{ + "F1":0.8950 + }, + "DOW":{ + "F1":0.9022 + }, + "DRE":{ + "F1":0.7435 + }, + "DME":{ + "F1":0.8014 + }, + "HCE":{ + "F1":0.7023 + }, + "HEE":{ + "F1":0.7633 + }, + "HRE":{ + "F1":0.9847 + }, + "HNP":{ + "F1":0.8249 + }, + "HOR":{ + "F1":0.6678 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/GB/Caduceus.json b/eval-results/yangheng/GB/Caduceus.json new file mode 100644 index 0000000000000000000000000000000000000000..4240022d6b17dd8de28fc9e7dc9c5a1873760630 --- /dev/null +++ b/eval-results/yangheng/GB/Caduceus.json @@ -0,0 +1,48 @@ +{ + "config":{ + "model":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", + "model_args":"pretrained= kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"7.73M", "Pretraining Data":"35 billion nucleotide base pairs", "Species":"Human", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", + "model_sha":"main" + }, + "results":{ + "DEM":{ + "F1":0.9213 + }, + "DOW":{ + "F1":0.9474 + }, + "DRE":{ + "F1":0.7203 + }, + "DME":{ + "F1":0.7561 + }, + "HCE":{ + "F1":0.7020 + }, + "HEE":{ + "F1":0.7647 + }, + "HRE":{ + "F1":0.7916 + }, + "HNP":{ + "F1":0.8436 + }, + "HOR":{ + "F1":0.6317 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/GB/DNABERT-2-117M.json b/eval-results/yangheng/GB/DNABERT-2-117M.json new file mode 100644 index 0000000000000000000000000000000000000000..dc6e4754bd33a1a1da61ab44d2c2bbb87dc1b96a --- /dev/null +++ b/eval-results/yangheng/GB/DNABERT-2-117M.json @@ -0,0 +1,48 @@ +{ + "config":{ + "model":"zhihan1996/DNABERT-2-117M", + "model_args":"pretrained=zhihan1996/DNABERT-2-117M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"BPE", "# of Params":"117M", "Pretraining Data":"32.49B Tokens", "Species":"Human + 135 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"zhihan1996/DNABERT-2-117M", + "model_sha":"main" + }, + "results":{ + "DEM":{ + "F1":0.9267 + }, + "DOW":{ + "F1":0.9517 + }, + "DRE":{ + "F1":0.4377 + }, + "DME":{ + "F1":0.7721 + }, + "HCE":{ + "F1":0.7558 + }, + "HEE":{ + "F1":0.8066 + }, + "HRE":{ + "F1":0.7814 + }, + "HNP":{ + "F1":0.8580 + }, + "HOR":{ + "F1":0.6803 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/GB/HyenaDNA.json b/eval-results/yangheng/GB/HyenaDNA.json new file mode 100644 index 0000000000000000000000000000000000000000..e16541883eeac9d04b2bff41acbb2800fbb61d17 --- /dev/null +++ b/eval-results/yangheng/GB/HyenaDNA.json @@ -0,0 +1,48 @@ +{ + "config":{ + "model":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_args":"pretrained=LongSafari/hyenadna-large-1m-seqlen-hf,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"47M", "Pretraining Data":"~3.2B Tokens", "Species":"Human", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_sha":"main" + }, + "results":{ + "DEM":{ + "F1":0.8821 + }, + "DOW":{ + "F1":0.9413 + }, + "DRE":{ + "F1":0.7011 + }, + "DME":{ + "F1":0.7644 + }, + "HCE":{ + "F1":0.7038 + }, + "HEE":{ + "F1":0.7958 + }, + "HRE":{ + "F1":0.9633 + }, + "HNP":{ + "F1":0.8599 + }, + "HOR":{ + "F1":0.6703 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/GB/NT-V2-100M.json b/eval-results/yangheng/GB/NT-V2-100M.json new file mode 100644 index 0000000000000000000000000000000000000000..04e8c8e3026dee318fba5ff3f68d2b32f1cfe7f7 --- /dev/null +++ b/eval-results/yangheng/GB/NT-V2-100M.json @@ -0,0 +1,48 @@ +{ + "config":{ + "model":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_args":"pretrained=InstaDeepAI/nucleotide-transformer-v2-100m-multi-species,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"96M", "Pretraining Data":"300B Tokens", "Species":"Human + 850 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_sha":"main" + }, + "results":{ + "DEM":{ + "F1":0.9166 + }, + "DOW":{ + "F1":0.9432 + }, + "DRE":{ + "F1":0.7820 + }, + "DME":{ + "F1":0.8172 + }, + "HCE":{ + "F1":0.7198 + }, + "HEE":{ + "F1":0.7985 + }, + "HRE":{ + "F1":0.9330 + }, + "HNP":{ + "F1":0.8530 + }, + "HOR":{ + "F1":0.6853 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/GB/OmniGenome186M.json b/eval-results/yangheng/GB/OmniGenome186M.json new file mode 100644 index 0000000000000000000000000000000000000000..4f53aeb07e97ab71bbc8f78e16fa87545fb0c43c --- /dev/null +++ b/eval-results/yangheng/GB/OmniGenome186M.json @@ -0,0 +1,48 @@ +{ + "config":{ + "model":"yangheng/omnigenome-186M", + "model_args":"pretrained=yangheng/omnigenome-186M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"186M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"}, + "model_dtype":"bfloat16", + "model_name":"yangheng/omnigenome-186M", + "model_sha":"main" + }, + "results":{ + "DEM":{ + "F1":0.9416 + }, + "DOW":{ + "F1":0.9349 + }, + "DRE":{ + "F1":0.7717 + }, + "DME":{ + "F1":0.8034 + }, + "HCE":{ + "F1":0.7351 + }, + "HEE":{ + "F1":0.8223 + }, + "HRE":{ + "F1":0.9566 + }, + "HNP":{ + "F1":0.8787 + }, + "HOR":{ + "F1":0.6897 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/GB/SpliceBERT.json b/eval-results/yangheng/GB/SpliceBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..ea4b1c7c66306ef5379832c7558d16a550a55561 --- /dev/null +++ b/eval-results/yangheng/GB/SpliceBERT.json @@ -0,0 +1,48 @@ +{ + "config":{ + "model":"multimolecule/splicebert", + "model_args":"pretrained=multimolecule/splicebert,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"19.7M", "Pretraining Data":"65 billion nucleotides", "Species":"Multi-Species", "Nucleic Acid":"mRNA"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/splicebert", + "model_sha":"main" + }, + "results":{ + "DEM":{ + "F1":0.9472 + }, + "DOW":{ + "F1":0.9642 + }, + "DRE":{ + "F1":0.7229 + }, + "DME":{ + "F1":0.7470 + }, + "HCE":{ + "F1":0.7350 + }, + "HEE":{ + "F1":0.7960 + }, + "HRE":{ + "F1":0.9523 + }, + "HNP":{ + "F1":0.8957 + }, + "HOR":{ + "F1":0.6889 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/GUE/3UTRBERT.json b/eval-results/yangheng/GUE/3UTRBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..95d23ee36786021be7ec587e5b278036700807fc --- /dev/null +++ b/eval-results/yangheng/GUE/3UTRBERT.json @@ -0,0 +1,42 @@ +{ + "config":{ + "model":"multimolecule/utrbert-4mer", + "model_args":"pretrained=multimolecule/utrbert-4mer,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"86M", "Pretraining Data":"20,362 Sequences", "Species":"Multi-Species", "Nucleic Acid":"mRNA 3'UTR"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/utrbert-4mer", + "model_sha":"main" + }, + "results":{ + "Yeast EMP":{ + "F1":0.7189 + }, + "Mouse TF-M":{ + "F1":0.7146 + }, + "Virus CVC":{ + "F1":0.6871 + }, + "Human TF-H":{ + "F1":0.7485 + }, + "Human PD":{ + "F1":0.8237 + }, + "Human CPD":{ + "F1":0.9051 + }, + "Human SSP":{ + "F1":0.8195 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/GUE/Caduceus.json b/eval-results/yangheng/GUE/Caduceus.json new file mode 100644 index 0000000000000000000000000000000000000000..5efcd074500e32eb68a3417305640a6e65ad74bb --- /dev/null +++ b/eval-results/yangheng/GUE/Caduceus.json @@ -0,0 +1,42 @@ +{ + "config":{ + "model":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", + "model_args":"pretrained= kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"7.73M", "Pretraining Data":"35 billion nucleotide base pairs", "Species":"Human", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", + "model_sha":"main" + }, + "results":{ + "Yeast EMP":{ + "F1":0.7349 + }, + "Mouse TF-M":{ + "F1":0.7818 + }, + "Virus CVC":{ + "F1":0.4909 + }, + "Human TF-H":{ + "F1":0.7956 + }, + "Human PD":{ + "F1":0.8913 + }, + "Human CPD":{ + "F1":0.8509 + }, + "Human SSP":{ + "F1":0.8182 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/GUE/DNABERT-2-117M.json b/eval-results/yangheng/GUE/DNABERT-2-117M.json new file mode 100644 index 0000000000000000000000000000000000000000..9ec4062de7f854995b73467107515d49ae9c4fb6 --- /dev/null +++ b/eval-results/yangheng/GUE/DNABERT-2-117M.json @@ -0,0 +1,42 @@ +{ + "config":{ + "model":"zhihan1996/DNABERT-2-117M", + "model_args":"pretrained=zhihan1996/DNABERT-2-117M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"BPE", "# of Params":"117M", "Pretraining Data":"32.49B Tokens", "Species":"Human + 135 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"zhihan1996/DNABERT-2-117M", + "model_sha":"main" + }, + "results":{ + "Yeast EMP":{ + "F1":0.7585 + }, + "Mouse TF-M":{ + "F1":0.8623 + }, + "Virus CVC":{ + "F1":0.6890 + }, + "Human TF-H":{ + "F1":0.8180 + }, + "Human PD":{ + "F1":0.9017 + }, + "Human CPD":{ + "F1":0.8257 + }, + "Human SSP":{ + "F1":0.8521 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/GUE/HyenaDNA.json b/eval-results/yangheng/GUE/HyenaDNA.json new file mode 100644 index 0000000000000000000000000000000000000000..2e8f09165601c1cccda8918d51e6785afef7cc75 --- /dev/null +++ b/eval-results/yangheng/GUE/HyenaDNA.json @@ -0,0 +1,42 @@ +{ + "config":{ + "model":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_args":"pretrained=LongSafari/hyenadna-large-1m-seqlen-hf,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"47M", "Pretraining Data":"~3.2B Tokens", "Species":"Human", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_sha":"main" + }, + "results":{ + "Yeast EMP":{ + "F1":0.7308 + }, + "Mouse TF-M":{ + "F1":0.7344 + }, + "Virus CVC":{ + "F1":0.6637 + }, + "Human TF-H":{ + "F1":0.7762 + }, + "Human PD":{ + "F1":0.9119 + }, + "Human CPD":{ + "F1":0.8431 + }, + "Human SSP":{ + "F1":0.8334 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/GUE/NT-V2-100M.json b/eval-results/yangheng/GUE/NT-V2-100M.json new file mode 100644 index 0000000000000000000000000000000000000000..2b3a39d97cf155bf7888c9cdd54c03c6bddfd8b9 --- /dev/null +++ b/eval-results/yangheng/GUE/NT-V2-100M.json @@ -0,0 +1,42 @@ +{ + "config":{ + "model":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_args":"pretrained=InstaDeepAI/nucleotide-transformer-v2-100m-multi-species,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"96M", "Pretraining Data":"300B Tokens", "Species":"Human + 850 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_sha":"main" + }, + "results":{ + "Yeast EMP":{ + "F1":0.7493 + }, + "Mouse TF-M":{ + "F1":0.7810 + }, + "Virus CVC":{ + "F1":0.5923 + }, + "Human TF-H":{ + "F1":0.7912 + }, + "Human PD":{ + "F1":0.9087 + }, + "Human CPD":{ + "F1":0.8470 + }, + "Human SSP":{ + "F1":0.8413 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/GUE/OmniGenome186M.json b/eval-results/yangheng/GUE/OmniGenome186M.json new file mode 100644 index 0000000000000000000000000000000000000000..7a8f30ed4659430d7155800e51113eb113cea5ea --- /dev/null +++ b/eval-results/yangheng/GUE/OmniGenome186M.json @@ -0,0 +1,42 @@ +{ + "config":{ + "model":"yangheng/omnigenome-186M", + "model_args":"pretrained=yangheng/omnigenome-186M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"186M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"}, + "model_dtype":"bfloat16", + "model_name":"yangheng/omnigenome-186M", + "model_sha":"main" + }, + "results":{ + "Yeast EMP":{ + "F1":0.7851 + }, + "Mouse TF-M":{ + "F1":0.8472 + }, + "Virus CVC":{ + "F1":0.7472 + }, + "Human TF-H":{ + "F1":0.8173 + }, + "Human PD":{ + "F1":0.9004 + }, + "Human CPD":{ + "F1":0.8522 + }, + "Human SSP":{ + "F1":0.9039 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/GUE/SpliceBERT.json b/eval-results/yangheng/GUE/SpliceBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..e68ed40e24cf3d44c7e36b5e0126543abdc237ac --- /dev/null +++ b/eval-results/yangheng/GUE/SpliceBERT.json @@ -0,0 +1,42 @@ +{ + "config":{ + "model":"multimolecule/splicebert", + "model_args":"pretrained=multimolecule/splicebert,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"19.7M", "Pretraining Data":"65 billion nucleotides", "Species":"Multi-Species", "Nucleic Acid":"mRNA"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/splicebert", + "model_sha":"main" + }, + "results":{ + "Yeast EMP":{ + "F1":0.7766 + }, + "Mouse TF-M":{ + "F1":0.8497 + }, + "Virus CVC":{ + "F1":0.5624 + }, + "Human TF-H":{ + "F1":0.8277 + }, + "Human PD":{ + "F1":0.9224 + }, + "Human CPD":{ + "F1":0.8396 + }, + "Human SSP":{ + "F1":0.9381 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/MODEL/3UTRBERT.json b/eval-results/yangheng/MODEL/3UTRBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..1adec75827e33722413afddd29d504c61349c815 --- /dev/null +++ b/eval-results/yangheng/MODEL/3UTRBERT.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"multimolecule/utrbert-4mer", + "model_args":"pretrained=multimolecule/utrbert-4mer,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"86M", "Pretraining Data":"20,362 Sequences", "Species":"Multi-Species", "Nucleic Acid":"mRNA 3'UTR"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/utrbert-4mer", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7772 + }, + "SNMD":{ + "AUC":0.5002 + }, + "SNMR":{ + "F1":0.2401 + }, + "ArchiveII":{ + "F1":0.7898 + }, + "bpRNA":{ + "F1":0.5693 + }, + "RNAStralign":{ + "F1":0.9203 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/MODEL/Agro-NT.json b/eval-results/yangheng/MODEL/Agro-NT.json new file mode 100644 index 0000000000000000000000000000000000000000..846f30ec63c16f794989b0db1a154c59e1f3697c --- /dev/null +++ b/eval-results/yangheng/MODEL/Agro-NT.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"InstaDeepAI/agro-nucleotide-transformer-1b", + "model_args":"pretrained=InstaDeepAI/agro-nucleotide-transformer-1b,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"985M", "Pretraining Data":"472.5B Tokens", "Species":"48 Edible Plants", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"InstaDeepAI/agro-nucleotide-transformer-1b", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7830 + }, + "SNMD":{ + "AUC":0.4999 + }, + "SNMR":{ + "F1":0.2638 + }, + "ArchiveII":{ + "F1":0.7013 + }, + "bpRNA":{ + "F1":0.4871 + }, + "RNAStralign":{ + "F1":0.7521 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/MODEL/CDSBERT.json b/eval-results/yangheng/MODEL/CDSBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..3c4b2d0bacaf8c5ada6c773d2f2fbf5ed1b05fa4 --- /dev/null +++ b/eval-results/yangheng/MODEL/CDSBERT.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"GleghornLab/cdsBERT", + "model_args":"pretrained=GleghornLab/cdsBERT,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"420M", "Pretraining Data":"4M Sequences", "Species":"4,069 RNA families", "Nucleic Acid":"CDS"}, + "model_dtype":"bfloat16", + "model_name":"GleghornLab/cdsBERT", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7468 + }, + "SNMD":{ + "AUC":0.5503 + }, + "SNMR":{ + "F1":0.3616 + }, + "ArchiveII":{ + "F1":0.8934 + }, + "bpRNA":{ + "F1":0.7001 + }, + "RNAStralign":{ + "F1":0.9715 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/MODEL/DNABERT-2-117M.json b/eval-results/yangheng/MODEL/DNABERT-2-117M.json new file mode 100644 index 0000000000000000000000000000000000000000..d5420bb518b4cc9bbfaddbc533c30c0d9f49bb06 --- /dev/null +++ b/eval-results/yangheng/MODEL/DNABERT-2-117M.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"zhihan1996/DNABERT-2-117M", + "model_args":"pretrained=zhihan1996/DNABERT-2-117M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"BPE", "# of Params":"117M", "Pretraining Data":"32.49B Tokens", "Species":"Human + 135 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"zhihan1996/DNABERT-2-117M", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.8158 + }, + "SNMD":{ + "AUC":0.4994 + }, + "SNMR":{ + "F1":0.1586 + }, + "ArchiveII":{ + "F1":0.5982 + }, + "bpRNA":{ + "F1":0.4340 + }, + "RNAStralign":{ + "F1":0.6549 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/MODEL/HyenaDNA.json b/eval-results/yangheng/MODEL/HyenaDNA.json new file mode 100644 index 0000000000000000000000000000000000000000..64a6a4fc545031813e11311068ee7011518c6410 --- /dev/null +++ b/eval-results/yangheng/MODEL/HyenaDNA.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_args":"pretrained=LongSafari/hyenadna-large-1m-seqlen-hf,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"47M", "Pretraining Data":"~3.2B Tokens", "Species":"Human", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.8056 + }, + "SNMD":{ + "AUC":0.5332 + }, + "SNMR":{ + "F1":0.3980 + }, + "ArchiveII":{ + "F1":0.8423 + }, + "bpRNA":{ + "F1":0.5662 + }, + "RNAStralign":{ + "F1":0.9542 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/MODEL/NT-V2-100M.json b/eval-results/yangheng/MODEL/NT-V2-100M.json new file mode 100644 index 0000000000000000000000000000000000000000..f22a639787974b5149a8cd30360c249cf9409922 --- /dev/null +++ b/eval-results/yangheng/MODEL/NT-V2-100M.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_args":"pretrained=InstaDeepAI/nucleotide-transformer-v2-100m-multi-species,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"96M", "Pretraining Data":"300B Tokens", "Species":"Human + 850 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7826 + }, + "SNMD":{ + "AUC":0.5049 + }, + "SNMR":{ + "F1":0.2601 + }, + "ArchiveII":{ + "F1":0.7990 + }, + "bpRNA":{ + "F1":0.5660 + }, + "RNAStralign":{ + "F1":0.9084 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/MODEL/OmniGenome186M.json b/eval-results/yangheng/MODEL/OmniGenome186M.json new file mode 100644 index 0000000000000000000000000000000000000000..ec7663dc48867a7bc15c972ae4c11fba5fdb5e37 --- /dev/null +++ b/eval-results/yangheng/MODEL/OmniGenome186M.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"yangheng/omnigenome-186M", + "model_args":"pretrained=yangheng/omnigenome-186M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"186M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"}, + "model_dtype":"bfloat16", + "model_name":"yangheng/omnigenome-186M", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7164 + }, + "SNMD":{ + "AUC":0.6381 + }, + "SNMR":{ + "F1":0.4980 + }, + "ArchiveII":{ + "F1":0.9520 + }, + "bpRNA":{ + "F1":0.8248 + }, + "RNAStralign":{ + "F1":0.9912 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/MODEL/SpliceBERT.json b/eval-results/yangheng/MODEL/SpliceBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..8ce43c5282635c6aabfd4a5e3b31435f198451af --- /dev/null +++ b/eval-results/yangheng/MODEL/SpliceBERT.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"multimolecule/splicebert", + "model_args":"pretrained=multimolecule/splicebert,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"19.7M", "Pretraining Data":"65 billion nucleotides", "Species":"Multi-Species", "Nucleic Acid":"mRNA"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/splicebert", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7340 + }, + "SNMD":{ + "AUC":0.5811 + }, + "SNMR":{ + "F1":0.4644 + }, + "ArchiveII":{ + "F1":0.8905 + }, + "bpRNA":{ + "F1":0.6910 + }, + "RNAStralign":{ + "F1":0.9697 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/MODEL/results_OmniGenome-52M.json b/eval-results/yangheng/MODEL/results_OmniGenome-52M.json new file mode 100644 index 0000000000000000000000000000000000000000..c786546cc3bf47cc0eb0b208341a403f97563f90 --- /dev/null +++ b/eval-results/yangheng/MODEL/results_OmniGenome-52M.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"yangheng/omnigenome-52M", + "model_args":"pretrained=yangheng/omnigenome-52M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"52M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"}, + "model_dtype":"bfloat16", + "model_name":"yangheng/omnigenome-52M", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7191 + }, + "SNMD":{ + "AUC":0.6244 + }, + "SNMR":{ + "F1":0.4891 + }, + "ArchiveII":{ + "F1":0.9498 + }, + "bpRNA":{ + "F1":0.8234 + }, + "RNAStralign":{ + "F1":0.9901 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/OmniGenome/OmniGenome186MResults b/eval-results/yangheng/OmniGenome/OmniGenome186MResults new file mode 100644 index 0000000000000000000000000000000000000000..2b994e7353ee07a544096f72681d9dfdc2dc8361 --- /dev/null +++ b/eval-results/yangheng/OmniGenome/OmniGenome186MResults @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"omnigenome-186M", + "model_args":"pretrained=yangheng/OmniGenome-186M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"186M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"}, + "model_dtype":"bfloat16", + "model_name":"OmniGenome-52M", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7164 + }, + "SNMD":{ + "AUC":63.81 + }, + "SNMR":{ + "F1":49.80 + }, + "ArchiveII":{ + "F1":95.20 + }, + "bpRNA":{ + "F1":82.48 + }, + "RNAStralign":{ + "F1":99.12 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/OmniGenome/OmniGenomeResults.json b/eval-results/yangheng/OmniGenome/OmniGenomeResults.json new file mode 100644 index 0000000000000000000000000000000000000000..fc9145f04a604469be222e27404786d6aae6e7c7 --- /dev/null +++ b/eval-results/yangheng/OmniGenome/OmniGenomeResults.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"omnigenome-52M", + "model_args":"pretrained=yangheng/OmniGenome-52M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"52M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"}, + "model_dtype":"bfloat16", + "model_name":"OmniGenome-52M", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7191 + }, + "SNMD":{ + "AUC":0.6244 + }, + "SNMR":{ + "F1":0.4891 + }, + "ArchiveII":{ + "F1":0.9498 + }, + "bpRNA":{ + "F1":0.8234 + }, + "RNAStralign":{ + "F1":0.9901 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/PGB/3UTRBERT.json b/eval-results/yangheng/PGB/3UTRBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..095b1f90d8ed20191ab55b722ed6a4ec48c26e48 --- /dev/null +++ b/eval-results/yangheng/PGB/3UTRBERT.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"multimolecule/utrbert-4mer", + "model_args":"pretrained=multimolecule/utrbert-4mer,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"86M", "Pretraining Data":"20,362 Sequences", "Species":"Multi-Species", "Nucleic Acid":"mRNA 3'UTR"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/utrbert-4mer", + "model_sha":"main" + }, +"results":{ + "PolyA":{ + "F1":0.7648 + }, + "LncRNA":{ + "F1":0.7075 + }, + "Chrom Acc":{ + "F1":0.6371 + }, + "Prom Str":{ + "RMSE":1.04 + }, + "Term Str":{ + "RMSE":0.36 + }, + "Splice":{ + "F1":0.9444 + }, + "Gene Exp":{ + "RMSE":14.87 + }, + "Enhancer":{ + "F1":0.7167 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/PGB/Agro-NT.json b/eval-results/yangheng/PGB/Agro-NT.json new file mode 100644 index 0000000000000000000000000000000000000000..9c1cf5acde27b681f67c9ca8e23b5bd3b9c6e63e --- /dev/null +++ b/eval-results/yangheng/PGB/Agro-NT.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"InstaDeepAI/agro-nucleotide-transformer-1b", + "model_args":"pretrained=InstaDeepAI/agro-nucleotide-transformer-1b,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"985M", "Pretraining Data":"472.5B Tokens", "Species":"48 Edible Plants", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"InstaDeepAI/agro-nucleotide-transformer-1b", + "model_sha":"main" + }, +"results":{ + "PolyA":{ + "F1":0.7889 + }, + "LncRNA":{ + "F1":0.6724 + }, + "Chrom Acc":{ + "F1":0.6327 + }, + "Prom Str":{ + "RMSE":0.94 + }, + "Term Str":{ + "RMSE":0.78 + }, + "Splice":{ + "F1":0.8845 + }, + "Gene Exp":{ + "RMSE":15.56 + }, + "Enhancer":{ + "F1":0.6283 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/PGB/CDSBERT.json b/eval-results/yangheng/PGB/CDSBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..92365f0f58bc175190a3ff95cbc24d2093494f09 --- /dev/null +++ b/eval-results/yangheng/PGB/CDSBERT.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"GleghornLab/cdsBERT", + "model_args":"pretrained=GleghornLab/cdsBERT,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"420M", "Pretraining Data":"4M Sequences", "Species":"4,069 RNA families", "Nucleic Acid":"CDS"}, + "model_dtype":"bfloat16", + "model_name":"GleghornLab/cdsBERT", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.3972 + }, + "LncRNA":{ + "F1":0.3306 + }, + "Chrom Acc":{ + "F1":0.4895 + }, + "Prom Str":{ + "RMSE":2.19 + }, + "Term Str":{ + "RMSE":0.59 + }, + "Splice":{ + "F1":0.5220 + }, + "Gene Exp":{ + "RMSE":14.77 + }, + "Enhancer":{ + "F1":0.3393 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/PGB/Caduceus.json b/eval-results/yangheng/PGB/Caduceus.json new file mode 100644 index 0000000000000000000000000000000000000000..80cfc4a525127aff7dbc5d0a71224d432319bbc6 --- /dev/null +++ b/eval-results/yangheng/PGB/Caduceus.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", + "model_args":"pretrained= kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"7.73M", "Pretraining Data":"35 billion nucleotide base pairs", "Species":"Human", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.7089 + }, + "LncRNA":{ + "F1":0.6840 + }, + "Chrom Acc":{ + "F1":0.6453 + }, + "Prom Str":{ + "RMSE":0.91 + }, + "Term Str":{ + "RMSE":0.26 + }, + "Splice":{ + "F1":0.7951 + }, + "Gene Exp":{ + "RMSE":14.72 + }, + "Enhancer":{ + "F1":0.6083 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/PGB/DNABERT-2-117M.json b/eval-results/yangheng/PGB/DNABERT-2-117M.json new file mode 100644 index 0000000000000000000000000000000000000000..3906893d2ce05c38ed29efe43ba07f466bdc0e58 --- /dev/null +++ b/eval-results/yangheng/PGB/DNABERT-2-117M.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"zhihan1996/DNABERT-2-117M", + "model_args":"pretrained=zhihan1996/DNABERT-2-117M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"BPE", "# of Params":"117M", "Pretraining Data":"32.49B Tokens", "Species":"Human + 135 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"zhihan1996/DNABERT-2-117M", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.4135 + }, + "LncRNA":{ + "F1":0.7255 + }, + "Chrom Acc":{ + "F1":0.6149 + }, + "Prom Str":{ + "RMSE":0.99 + }, + "Term Str":{ + "RMSE":0.24 + }, + "Splice":{ + "F1":0.4534 + }, + "Gene Exp":{ + "RMSE":14.78 + }, + "Enhancer":{ + "F1":0.3640 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/PGB/HyenaDNA.json b/eval-results/yangheng/PGB/HyenaDNA.json new file mode 100644 index 0000000000000000000000000000000000000000..af033dd0115d50a3bdebed9eb98038b3850d3c41 --- /dev/null +++ b/eval-results/yangheng/PGB/HyenaDNA.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_args":"pretrained=LongSafari/hyenadna-large-1m-seqlen-hf,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"47M", "Pretraining Data":"~3.2B Tokens", "Species":"Human", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.8311 + }, + "LncRNA":{ + "F1":0.5821 + }, + "Chrom Acc":{ + "F1":0.5220 + }, + "Prom Str":{ + "RMSE":0.88 + }, + "Term Str":{ + "RMSE":0.26 + }, + "Splice":{ + "F1":0.9028 + }, + "Gene Exp":{ + "RMSE":14.76 + }, + "Enhancer":{ + "F1":0.6617 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/PGB/NT-V2-100M.json b/eval-results/yangheng/PGB/NT-V2-100M.json new file mode 100644 index 0000000000000000000000000000000000000000..9dbcaf073a15ce2d3e1c89b16d7b65d074e97a89 --- /dev/null +++ b/eval-results/yangheng/PGB/NT-V2-100M.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_args":"pretrained=InstaDeepAI/nucleotide-transformer-v2-100m-multi-species,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"96M", "Pretraining Data":"300B Tokens", "Species":"Human + 850 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.7126 + }, + "LncRNA":{ + "F1":0.7308 + }, + "Chrom Acc":{ + "F1":0.6571 + }, + "Prom Str":{ + "RMSE":0.81 + }, + "Term Str":{ + "RMSE":0.27 + }, + "Splice":{ + "F1":0.9505 + }, + "Gene Exp":{ + "RMSE":14.69 + }, + "Enhancer":{ + "F1":0.7389 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/PGB/OmniGenome186M.json b/eval-results/yangheng/PGB/OmniGenome186M.json new file mode 100644 index 0000000000000000000000000000000000000000..06b0c51eead662453dcfb7fe0c41ae7a8297046b --- /dev/null +++ b/eval-results/yangheng/PGB/OmniGenome186M.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"yangheng/omnigenome-186M", + "model_args":"pretrained=yangheng/omnigenome-186M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"186M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"}, + "model_dtype":"bfloat16", + "model_name":"yangheng/omnigenome-186M", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.8755 + }, + "LncRNA":{ + "F1":0.7796 + }, + "Chrom Acc":{ + "F1":0.6769 + }, + "Prom Str":{ + "RMSE":0.59 + }, + "Term Str":{ + "RMSE":0.18 + }, + "Splice":{ + "F1":0.9841 + }, + "Gene Exp":{ + "RMSE":14.71 + }, + "Enhancer":{ + "F1":0.7977 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/PGB/RNA-BERT.json b/eval-results/yangheng/PGB/RNA-BERT.json new file mode 100644 index 0000000000000000000000000000000000000000..dc87b9183fd0d44c57fc5f39596f9670be1d7b88 --- /dev/null +++ b/eval-results/yangheng/PGB/RNA-BERT.json @@ -0,0 +1,46 @@ +{ + "config":{ + "model":"multimolecule/rnabert", + "model_args":"pretrained=multimolecule/rnabert,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"0.48M", "Pretraining Data":"76,237 human ncRNA sequences", "Species":"Human", "Nucleic Acid":"ncRNA"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/rnabert", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.7854 + }, + "LncRNA":{ + "F1":0.6199 + }, + "Chrom Acc":{ + "F1":0.4894 + }, + "Prom Str":{ + "RMSE":1.81 + }, + "Term Str":{ + "RMSE":0.38 + }, + "Splice":{ + "F1":0.9445 + }, + "Gene Exp":{ + "RMSE":14.89 + }, + "Enhancer":{ + "F1":0.5761 + } + + } +} \ No newline at end of file diff --git a/eval-results/yangheng/PGB/RNA-FM.json b/eval-results/yangheng/PGB/RNA-FM.json new file mode 100644 index 0000000000000000000000000000000000000000..73cc0c1f5125e649358e73058bb3310733f164d3 --- /dev/null +++ b/eval-results/yangheng/PGB/RNA-FM.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"multimolecule/rnafm", + "model_args":"pretrained=multimolecule/rnafm,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"99.52M", "Pretraining Data":"23.7 million non-redundant RNA sequences", "Species":"Multi-Species", "Nucleic Acid":"ncRNA"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/rnafm", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.8494 + }, + "LncRNA":{ + "F1":0.6875 + }, + "Chrom Acc":{ + "F1":0.5492 + }, + "Prom Str":{ + "RMSE":0.95 + }, + "Term Str":{ + "RMSE":0.27 + }, + "Splice":{ + "F1":0.9595 + }, + "Gene Exp":{ + "RMSE":14.83 + }, + "Enhancer":{ + "F1":0.5714 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/PGB/RNA-MSM.json b/eval-results/yangheng/PGB/RNA-MSM.json new file mode 100644 index 0000000000000000000000000000000000000000..282ad3a11ed1f200ccd93c78fcd961d94c4364de --- /dev/null +++ b/eval-results/yangheng/PGB/RNA-MSM.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"multimolecule/rnamsm", + "model_args":"pretrained=multimolecule/rnamsm,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"96.5M", "Pretraining Data":"3,932 RNA families", "Species":"Multi-Species", "Nucleic Acid":"RNA"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/rnamsm", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.8425 + }, + "LncRNA":{ + "F1":0.6749 + }, + "Chrom Acc":{ + "F1":0.5352 + }, + "Prom Str":{ + "RMSE":1.28 + }, + "Term Str":{ + "RMSE":0.28 + }, + "Splice":{ + "F1":0.9549 + }, + "Gene Exp":{ + "RMSE":14.87 + }, + "Enhancer":{ + "F1":0.6145 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/PGB/SpliceBERT.json b/eval-results/yangheng/PGB/SpliceBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..f52106504f5a96e28e24a29e9ef7019115a7f8ab --- /dev/null +++ b/eval-results/yangheng/PGB/SpliceBERT.json @@ -0,0 +1,45 @@ +{ + "config":{ + "model":"multimolecule/splicebert", + "model_args":"pretrained=multimolecule/splicebert,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"19.7M", "Pretraining Data":"65 billion nucleotides", "Species":"Multi-Species", "Nucleic Acid":"mRNA"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/splicebert", + "model_sha":"main" + }, + "results":{ + "PolyA":{ + "F1":0.6523 + }, + "LncRNA":{ + "F1":0.7188 + }, + "Chrom Acc":{ + "F1":0.6362 + }, + "Prom Str":{ + "RMSE":0.75 + }, + "Term Str":{ + "RMSE":0.22 + }, + "Splice":{ + "F1":0.9645 + }, + "Gene Exp":{ + "RMSE":14.70 + }, + "Enhancer":{ + "F1":0.6971 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/README.md b/eval-results/yangheng/README.md new file mode 100644 index 0000000000000000000000000000000000000000..32897cd3e640101ba184f8c4ccd896981de3804a --- /dev/null +++ b/eval-results/yangheng/README.md @@ -0,0 +1,3 @@ +--- +license: mit +--- diff --git a/eval-results/yangheng/RGB/yangheng/3UTRBERT.json b/eval-results/yangheng/RGB/yangheng/3UTRBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..1adec75827e33722413afddd29d504c61349c815 --- /dev/null +++ b/eval-results/yangheng/RGB/yangheng/3UTRBERT.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"multimolecule/utrbert-4mer", + "model_args":"pretrained=multimolecule/utrbert-4mer,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"86M", "Pretraining Data":"20,362 Sequences", "Species":"Multi-Species", "Nucleic Acid":"mRNA 3'UTR"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/utrbert-4mer", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7772 + }, + "SNMD":{ + "AUC":0.5002 + }, + "SNMR":{ + "F1":0.2401 + }, + "ArchiveII":{ + "F1":0.7898 + }, + "bpRNA":{ + "F1":0.5693 + }, + "RNAStralign":{ + "F1":0.9203 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/RGB/yangheng/Agro-NT.json b/eval-results/yangheng/RGB/yangheng/Agro-NT.json new file mode 100644 index 0000000000000000000000000000000000000000..846f30ec63c16f794989b0db1a154c59e1f3697c --- /dev/null +++ b/eval-results/yangheng/RGB/yangheng/Agro-NT.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"InstaDeepAI/agro-nucleotide-transformer-1b", + "model_args":"pretrained=InstaDeepAI/agro-nucleotide-transformer-1b,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"985M", "Pretraining Data":"472.5B Tokens", "Species":"48 Edible Plants", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"InstaDeepAI/agro-nucleotide-transformer-1b", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7830 + }, + "SNMD":{ + "AUC":0.4999 + }, + "SNMR":{ + "F1":0.2638 + }, + "ArchiveII":{ + "F1":0.7013 + }, + "bpRNA":{ + "F1":0.4871 + }, + "RNAStralign":{ + "F1":0.7521 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/RGB/yangheng/CDSBERT.json b/eval-results/yangheng/RGB/yangheng/CDSBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..3c4b2d0bacaf8c5ada6c773d2f2fbf5ed1b05fa4 --- /dev/null +++ b/eval-results/yangheng/RGB/yangheng/CDSBERT.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"GleghornLab/cdsBERT", + "model_args":"pretrained=GleghornLab/cdsBERT,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"420M", "Pretraining Data":"4M Sequences", "Species":"4,069 RNA families", "Nucleic Acid":"CDS"}, + "model_dtype":"bfloat16", + "model_name":"GleghornLab/cdsBERT", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7468 + }, + "SNMD":{ + "AUC":0.5503 + }, + "SNMR":{ + "F1":0.3616 + }, + "ArchiveII":{ + "F1":0.8934 + }, + "bpRNA":{ + "F1":0.7001 + }, + "RNAStralign":{ + "F1":0.9715 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/RGB/yangheng/DNABERT-2-117M.json b/eval-results/yangheng/RGB/yangheng/DNABERT-2-117M.json new file mode 100644 index 0000000000000000000000000000000000000000..d5420bb518b4cc9bbfaddbc533c30c0d9f49bb06 --- /dev/null +++ b/eval-results/yangheng/RGB/yangheng/DNABERT-2-117M.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"zhihan1996/DNABERT-2-117M", + "model_args":"pretrained=zhihan1996/DNABERT-2-117M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"BPE", "# of Params":"117M", "Pretraining Data":"32.49B Tokens", "Species":"Human + 135 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"zhihan1996/DNABERT-2-117M", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.8158 + }, + "SNMD":{ + "AUC":0.4994 + }, + "SNMR":{ + "F1":0.1586 + }, + "ArchiveII":{ + "F1":0.5982 + }, + "bpRNA":{ + "F1":0.4340 + }, + "RNAStralign":{ + "F1":0.6549 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/RGB/yangheng/HyenaDNA.json b/eval-results/yangheng/RGB/yangheng/HyenaDNA.json new file mode 100644 index 0000000000000000000000000000000000000000..64a6a4fc545031813e11311068ee7011518c6410 --- /dev/null +++ b/eval-results/yangheng/RGB/yangheng/HyenaDNA.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_args":"pretrained=LongSafari/hyenadna-large-1m-seqlen-hf,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"47M", "Pretraining Data":"~3.2B Tokens", "Species":"Human", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.8056 + }, + "SNMD":{ + "AUC":0.5332 + }, + "SNMR":{ + "F1":0.3980 + }, + "ArchiveII":{ + "F1":0.8423 + }, + "bpRNA":{ + "F1":0.5662 + }, + "RNAStralign":{ + "F1":0.9542 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/RGB/yangheng/NT-V2-100M.json b/eval-results/yangheng/RGB/yangheng/NT-V2-100M.json new file mode 100644 index 0000000000000000000000000000000000000000..f22a639787974b5149a8cd30360c249cf9409922 --- /dev/null +++ b/eval-results/yangheng/RGB/yangheng/NT-V2-100M.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_args":"pretrained=InstaDeepAI/nucleotide-transformer-v2-100m-multi-species,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"96M", "Pretraining Data":"300B Tokens", "Species":"Human + 850 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7826 + }, + "SNMD":{ + "AUC":0.5049 + }, + "SNMR":{ + "F1":0.2601 + }, + "ArchiveII":{ + "F1":0.7990 + }, + "bpRNA":{ + "F1":0.5660 + }, + "RNAStralign":{ + "F1":0.9084 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/RGB/yangheng/OmniGenome186M.json b/eval-results/yangheng/RGB/yangheng/OmniGenome186M.json new file mode 100644 index 0000000000000000000000000000000000000000..ec7663dc48867a7bc15c972ae4c11fba5fdb5e37 --- /dev/null +++ b/eval-results/yangheng/RGB/yangheng/OmniGenome186M.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"yangheng/omnigenome-186M", + "model_args":"pretrained=yangheng/omnigenome-186M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"186M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"}, + "model_dtype":"bfloat16", + "model_name":"yangheng/omnigenome-186M", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7164 + }, + "SNMD":{ + "AUC":0.6381 + }, + "SNMR":{ + "F1":0.4980 + }, + "ArchiveII":{ + "F1":0.9520 + }, + "bpRNA":{ + "F1":0.8248 + }, + "RNAStralign":{ + "F1":0.9912 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/RGB/yangheng/SpliceBERT.json b/eval-results/yangheng/RGB/yangheng/SpliceBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..8ce43c5282635c6aabfd4a5e3b31435f198451af --- /dev/null +++ b/eval-results/yangheng/RGB/yangheng/SpliceBERT.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"multimolecule/splicebert", + "model_args":"pretrained=multimolecule/splicebert,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"19.7M", "Pretraining Data":"65 billion nucleotides", "Species":"Multi-Species", "Nucleic Acid":"mRNA"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/splicebert", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7340 + }, + "SNMD":{ + "AUC":0.5811 + }, + "SNMR":{ + "F1":0.4644 + }, + "ArchiveII":{ + "F1":0.8905 + }, + "bpRNA":{ + "F1":0.6910 + }, + "RNAStralign":{ + "F1":0.9697 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/RGB/yangheng/results_OmniGenome-52M.json b/eval-results/yangheng/RGB/yangheng/results_OmniGenome-52M.json new file mode 100644 index 0000000000000000000000000000000000000000..c786546cc3bf47cc0eb0b208341a403f97563f90 --- /dev/null +++ b/eval-results/yangheng/RGB/yangheng/results_OmniGenome-52M.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"yangheng/omnigenome-52M", + "model_args":"pretrained=yangheng/omnigenome-52M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"52M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"}, + "model_dtype":"bfloat16", + "model_name":"yangheng/omnigenome-52M", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7191 + }, + "SNMD":{ + "AUC":0.6244 + }, + "SNMR":{ + "F1":0.4891 + }, + "ArchiveII":{ + "F1":0.9498 + }, + "bpRNA":{ + "F1":0.8234 + }, + "RNAStralign":{ + "F1":0.9901 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/yangheng/MODEL/3UTRBERT.json b/eval-results/yangheng/yangheng/MODEL/3UTRBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..1adec75827e33722413afddd29d504c61349c815 --- /dev/null +++ b/eval-results/yangheng/yangheng/MODEL/3UTRBERT.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"multimolecule/utrbert-4mer", + "model_args":"pretrained=multimolecule/utrbert-4mer,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"86M", "Pretraining Data":"20,362 Sequences", "Species":"Multi-Species", "Nucleic Acid":"mRNA 3'UTR"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/utrbert-4mer", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7772 + }, + "SNMD":{ + "AUC":0.5002 + }, + "SNMR":{ + "F1":0.2401 + }, + "ArchiveII":{ + "F1":0.7898 + }, + "bpRNA":{ + "F1":0.5693 + }, + "RNAStralign":{ + "F1":0.9203 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/yangheng/MODEL/Agro-NT.json b/eval-results/yangheng/yangheng/MODEL/Agro-NT.json new file mode 100644 index 0000000000000000000000000000000000000000..846f30ec63c16f794989b0db1a154c59e1f3697c --- /dev/null +++ b/eval-results/yangheng/yangheng/MODEL/Agro-NT.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"InstaDeepAI/agro-nucleotide-transformer-1b", + "model_args":"pretrained=InstaDeepAI/agro-nucleotide-transformer-1b,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"985M", "Pretraining Data":"472.5B Tokens", "Species":"48 Edible Plants", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"InstaDeepAI/agro-nucleotide-transformer-1b", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7830 + }, + "SNMD":{ + "AUC":0.4999 + }, + "SNMR":{ + "F1":0.2638 + }, + "ArchiveII":{ + "F1":0.7013 + }, + "bpRNA":{ + "F1":0.4871 + }, + "RNAStralign":{ + "F1":0.7521 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/yangheng/MODEL/CDSBERT.json b/eval-results/yangheng/yangheng/MODEL/CDSBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..3c4b2d0bacaf8c5ada6c773d2f2fbf5ed1b05fa4 --- /dev/null +++ b/eval-results/yangheng/yangheng/MODEL/CDSBERT.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"GleghornLab/cdsBERT", + "model_args":"pretrained=GleghornLab/cdsBERT,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"420M", "Pretraining Data":"4M Sequences", "Species":"4,069 RNA families", "Nucleic Acid":"CDS"}, + "model_dtype":"bfloat16", + "model_name":"GleghornLab/cdsBERT", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7468 + }, + "SNMD":{ + "AUC":0.5503 + }, + "SNMR":{ + "F1":0.3616 + }, + "ArchiveII":{ + "F1":0.8934 + }, + "bpRNA":{ + "F1":0.7001 + }, + "RNAStralign":{ + "F1":0.9715 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/yangheng/MODEL/DNABERT-2-117M.json b/eval-results/yangheng/yangheng/MODEL/DNABERT-2-117M.json new file mode 100644 index 0000000000000000000000000000000000000000..d5420bb518b4cc9bbfaddbc533c30c0d9f49bb06 --- /dev/null +++ b/eval-results/yangheng/yangheng/MODEL/DNABERT-2-117M.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"zhihan1996/DNABERT-2-117M", + "model_args":"pretrained=zhihan1996/DNABERT-2-117M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"BPE", "# of Params":"117M", "Pretraining Data":"32.49B Tokens", "Species":"Human + 135 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"zhihan1996/DNABERT-2-117M", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.8158 + }, + "SNMD":{ + "AUC":0.4994 + }, + "SNMR":{ + "F1":0.1586 + }, + "ArchiveII":{ + "F1":0.5982 + }, + "bpRNA":{ + "F1":0.4340 + }, + "RNAStralign":{ + "F1":0.6549 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/yangheng/MODEL/HyenaDNA.json b/eval-results/yangheng/yangheng/MODEL/HyenaDNA.json new file mode 100644 index 0000000000000000000000000000000000000000..64a6a4fc545031813e11311068ee7011518c6410 --- /dev/null +++ b/eval-results/yangheng/yangheng/MODEL/HyenaDNA.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_args":"pretrained=LongSafari/hyenadna-large-1m-seqlen-hf,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"47M", "Pretraining Data":"~3.2B Tokens", "Species":"Human", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"LongSafari/hyenadna-large-1m-seqlen-hf", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.8056 + }, + "SNMD":{ + "AUC":0.5332 + }, + "SNMR":{ + "F1":0.3980 + }, + "ArchiveII":{ + "F1":0.8423 + }, + "bpRNA":{ + "F1":0.5662 + }, + "RNAStralign":{ + "F1":0.9542 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/yangheng/MODEL/NT-V2-100M.json b/eval-results/yangheng/yangheng/MODEL/NT-V2-100M.json new file mode 100644 index 0000000000000000000000000000000000000000..f22a639787974b5149a8cd30360c249cf9409922 --- /dev/null +++ b/eval-results/yangheng/yangheng/MODEL/NT-V2-100M.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_args":"pretrained=InstaDeepAI/nucleotide-transformer-v2-100m-multi-species,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"k-mers", "# of Params":"96M", "Pretraining Data":"300B Tokens", "Species":"Human + 850 Species", "Nucleic Acid":"DNA"}, + "model_dtype":"bfloat16", + "model_name":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7826 + }, + "SNMD":{ + "AUC":0.5049 + }, + "SNMR":{ + "F1":0.2601 + }, + "ArchiveII":{ + "F1":0.7990 + }, + "bpRNA":{ + "F1":0.5660 + }, + "RNAStralign":{ + "F1":0.9084 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/yangheng/MODEL/OmniGenome186M.json b/eval-results/yangheng/yangheng/MODEL/OmniGenome186M.json new file mode 100644 index 0000000000000000000000000000000000000000..ec7663dc48867a7bc15c972ae4c11fba5fdb5e37 --- /dev/null +++ b/eval-results/yangheng/yangheng/MODEL/OmniGenome186M.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"yangheng/omnigenome-186M", + "model_args":"pretrained=yangheng/omnigenome-186M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"186M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"}, + "model_dtype":"bfloat16", + "model_name":"yangheng/omnigenome-186M", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7164 + }, + "SNMD":{ + "AUC":0.6381 + }, + "SNMR":{ + "F1":0.4980 + }, + "ArchiveII":{ + "F1":0.9520 + }, + "bpRNA":{ + "F1":0.8248 + }, + "RNAStralign":{ + "F1":0.9912 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/yangheng/MODEL/SpliceBERT.json b/eval-results/yangheng/yangheng/MODEL/SpliceBERT.json new file mode 100644 index 0000000000000000000000000000000000000000..8ce43c5282635c6aabfd4a5e3b31435f198451af --- /dev/null +++ b/eval-results/yangheng/yangheng/MODEL/SpliceBERT.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"multimolecule/splicebert", + "model_args":"pretrained=multimolecule/splicebert,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"19.7M", "Pretraining Data":"65 billion nucleotides", "Species":"Multi-Species", "Nucleic Acid":"mRNA"}, + "model_dtype":"bfloat16", + "model_name":"multimolecule/splicebert", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7340 + }, + "SNMD":{ + "AUC":0.5811 + }, + "SNMR":{ + "F1":0.4644 + }, + "ArchiveII":{ + "F1":0.8905 + }, + "bpRNA":{ + "F1":0.6910 + }, + "RNAStralign":{ + "F1":0.9697 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/yangheng/MODEL/results_OmniGenome-52M.json b/eval-results/yangheng/yangheng/MODEL/results_OmniGenome-52M.json new file mode 100644 index 0000000000000000000000000000000000000000..c786546cc3bf47cc0eb0b208341a403f97563f90 --- /dev/null +++ b/eval-results/yangheng/yangheng/MODEL/results_OmniGenome-52M.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"yangheng/omnigenome-52M", + "model_args":"pretrained=yangheng/omnigenome-52M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"52M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"}, + "model_dtype":"bfloat16", + "model_name":"yangheng/omnigenome-52M", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7191 + }, + "SNMD":{ + "AUC":0.6244 + }, + "SNMR":{ + "F1":0.4891 + }, + "ArchiveII":{ + "F1":0.9498 + }, + "bpRNA":{ + "F1":0.8234 + }, + "RNAStralign":{ + "F1":0.9901 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/yangheng/OmniGenome/OmniGenome186MResults b/eval-results/yangheng/yangheng/OmniGenome/OmniGenome186MResults new file mode 100644 index 0000000000000000000000000000000000000000..2b994e7353ee07a544096f72681d9dfdc2dc8361 --- /dev/null +++ b/eval-results/yangheng/yangheng/OmniGenome/OmniGenome186MResults @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"omnigenome-186M", + "model_args":"pretrained=yangheng/OmniGenome-186M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"186M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"}, + "model_dtype":"bfloat16", + "model_name":"OmniGenome-52M", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7164 + }, + "SNMD":{ + "AUC":63.81 + }, + "SNMR":{ + "F1":49.80 + }, + "ArchiveII":{ + "F1":95.20 + }, + "bpRNA":{ + "F1":82.48 + }, + "RNAStralign":{ + "F1":99.12 + } + } +} \ No newline at end of file diff --git a/eval-results/yangheng/yangheng/OmniGenome/OmniGenomeResults.json b/eval-results/yangheng/yangheng/OmniGenome/OmniGenomeResults.json new file mode 100644 index 0000000000000000000000000000000000000000..fc9145f04a604469be222e27404786d6aae6e7c7 --- /dev/null +++ b/eval-results/yangheng/yangheng/OmniGenome/OmniGenomeResults.json @@ -0,0 +1,39 @@ +{ + "config":{ + "model":"omnigenome-52M", + "model_args":"pretrained=yangheng/OmniGenome-52M,revision=main,dtype=bfloat16", + "num_fewshot":0, + "batch_size":1, + "batch_sizes":[ + + ], + "device":"cpu", + "no_cache":true, + "limit":20, + "bootstrap_iters":100000, + "description_dict":{"Tokenization":"SNT", "# of Params":"52M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"}, + "model_dtype":"bfloat16", + "model_name":"OmniGenome-52M", + "model_sha":"main" + }, + "results":{ + "mRNA":{ + "RMSE":0.7191 + }, + "SNMD":{ + "AUC":0.6244 + }, + "SNMR":{ + "F1":0.4891 + }, + "ArchiveII":{ + "F1":0.9498 + }, + "bpRNA":{ + "F1":0.8234 + }, + "RNAStralign":{ + "F1":0.9901 + } + } +} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..3b4737924b5a7d81c962a4e28b66ac6cdcc3b004 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,13 @@ +[tool.ruff] +# Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default. +select = ["E", "F"] +ignore = ["E501"] # line too long (black is taking care of this) +line-length = 119 +fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"] + +[tool.isort] +profile = "black" +line_length = 119 + +[tool.black] +line-length = 119 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..1a42048914d3e12bd52ff6f6f605d8b75dc9fd2f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,19 @@ +APScheduler==3.10.1 +black==23.11.0 +click==8.1.3 +datasets==2.14.5 +gradio==4.4.0 +gradio_client==0.7.0 +huggingface-hub>=0.18.0 +matplotlib==3.7.1 +numpy==1.24.2 +pandas==2.0.0 +python-dateutil==2.8.2 +requests==2.28.2 +tqdm==4.65.0 +transformers==4.35.2 +tokenizers>=0.15.0 +git+https://github.com/EleutherAI/lm-evaluation-harness.git@b281b0921b636bc36ad05c0b0b0763bd6dd43463#egg=lm-eval +accelerate==0.24.1 +sentencepiece +gradio_leaderboard \ No newline at end of file diff --git a/src/about.py b/src/about.py new file mode 100644 index 0000000000000000000000000000000000000000..ac3f0503e17d506b729aee323e479d173b9c2c8e --- /dev/null +++ b/src/about.py @@ -0,0 +1,120 @@ +from dataclasses import dataclass +from enum import Enum + + +@dataclass +class Task: + benchmark: str + metric: str + col_name: str + + +# Select your tasks here +# --------------------------------------------------- +class TasksRGB(Enum): + # task_key in the json file, metric_key in the json file, name to display in the leaderboard + task0 = Task("mRNA", "RMSE", "mRNA (RMSE)") + task1 = Task("SNMD", "AUC", "SNMD (AUC)") + task2 = Task("SNMR", "F1", "SNMR (F1)") + task3 = Task("ArchiveII", "F1", "ArchiveII (F1)") + task4 = Task("bpRNA", "F1", "bpRNA (F1)") + task5 = Task("RNAStralign", "F1", "RNAStralign (F1)") + +class TasksPGB(Enum): + # task_key in the json file, metric_key in the json file, name to display in the leaderboard + task0 = Task("PolyA", "F1", "PolyA (F1)") + task1 = Task("LncRNA", "F1", "LncRNA (F1)") + task2 = Task("Chrom Acc", "F1", "Chrom Acc (F1)") + task3 = Task("Prom Str", "RMSE", "Prom Str (RMSE)") + task4 = Task("Term Str", "RMSE", "Term Str (RMSE)") + task5 = Task("Splice", "F1", "Splice (F1)") + task6 = Task("Gene Exp", "RMSE", "Gene Exp (RMSE)") + task7 = Task("Enhancer", "F1", "Enhancer (F1)") + +class TasksGUE(Enum): + # task_key in the json file, metric_key in the json file, name to display in the leaderboard + task0 = Task("Yeast EMP", "F1", "Yeast EMP (F1)") + task1 = Task("Mouse TF-M", "F1", "Mouse TF-M (F1)") + task2 = Task("Virus CVC", "F1", "Virus CVC (F1)") + task3 = Task("Human TF-H", "F1", "Human TF-H (F1)") + task4 = Task("Human PD", "F1", "Human PD (F1)") + task5 = Task("Human CPD", "F1", "Human CPD (F1)") + task6 = Task("Human SSP", "F1", "Human SSP (F1)") + +class TasksGB(Enum): + # task_key in the json file, metric_key in the json file, name to display in the leaderboard + task0 = Task("DEM", "F1", "DEM (F1)") + task1 = Task("DOW", "F1", "DOW (F1)") + task2 = Task("DRE", "F1", "DRE (F1)") + task3 = Task("DME", "F1", "DME (F1)") + task4 = Task("HCE", "F1", "HCE (F1)") + task5 = Task("HEE", "F1", "HEE (F1)") + task6 = Task("HRE", "F1", "HRE (F1)") + task7 = Task("HNP", "F1", "HNP (F1)") + task8 = Task("HOR", "F1", "HOR (F1)") + + +NUM_FEWSHOT = 0 # Change with your few shot +# --------------------------------------------------- + +# Your leaderboard name +TITLE = """

Genomic Modelling Leaderboard

""" + +# What does your leaderboard evaluate? +INTRODUCTION_TEXT = """ +""" + +# Which evaluations are you running? how can people reproduce what you have? +LLM_BENCHMARKS_TEXT = f""" +## Why do we need this benchmark? +Large-scale foundation models for molecular biology constitute a vital and rapidly developing change in the computational biology and AI4Science landscape. +As key parts of biology, such as DNA, RNA sequences, secondary structures, have a large effect on each other, the usage of this information within large-scale models allows for foundation models to be adapted and suited to multiple key tasks. +However, with this trend comes significant issues, the primary one being the difficulty to comprehensively evaluate these models and compare them fairly. +Here, we refer to the specific lack of real-world data to reflect the true performance of the models, rather than in-silico experiments only. +This issue forces repeated benchmark testing and models being trained and adapted for a specific task that may not have any real-world benefit. +Given the importance of this, we propose this genomic leaderboard on meticulously curated real-world datasets, to allow for a fair and comprehensive benchmark on the most important genomic downstream tasks. +## Evaluation Datasets +TODO HERE +## Reported Scores and Ranking +TODO HERE +## How it works +Do we need this? +## Reproducibility +To reproduce our results, here are the commands you can run: +""" + +EVALUATION_QUEUE_TEXT = """ +## Some good practices before submitting a model +### 1) Make sure you can load your model and tokenizer using AutoClasses: +```python +from transformers import AutoConfig, AutoModel, AutoTokenizer +config = AutoConfig.from_pretrained("your model name", revision=revision) +model = AutoModel.from_pretrained("your model name", revision=revision) +tokenizer = AutoTokenizer.from_pretrained("your model name", revision=revision) +``` +If this step fails, follow the error messages to debug your model before submitting it. It's likely your model has been improperly uploaded. +Note: make sure your model is public! +Note: if your model needs `use_remote_code=True`, we do not support this option yet but we are working on adding it, stay posted! +### 2) Convert your model weights to [safetensors](https://huggingface.co/docs/safetensors/index) +It's a new format for storing weights which is safer and faster to load and use. It will also allow us to add the number of parameters of your model to the `Extended Viewer`! +### 3) Make sure your model has an open license! +This is a leaderboard for Open LLMs, and we'd love for as many people as possible to know they can use your model 🤗 +### 4) Fill up your model card +When we add extra information about models to the leaderboard, it will be automatically taken from the model card +## In case of model failure +If your model is displayed in the `FAILED` category, its execution stopped. +Make sure you have followed the above steps first. +If everything is done, check you can launch the EleutherAIHarness on your model locally, using the above command without modifications (you can add `--limit` to limit the number of examples per task). +""" + +CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" +CITATION_BUTTON_TEXT = r""" +@article{Yang2024, + author = {Yang, Heng and Li, Ke}, + title = {OmniGenome: Aligning RNA Sequences with Secondary Structures in Genomic Foundation Models}, + journal = {arXiv}, + year = {2024}, + note = {arXiv preprint arXiv:2407.11242} + url = {https://arxiv.org/abs/2407.11242} +} +""" diff --git a/src/display/css_html_js.py b/src/display/css_html_js.py new file mode 100644 index 0000000000000000000000000000000000000000..16508dafade465ad22d094bca64d8d1b58ebf40b --- /dev/null +++ b/src/display/css_html_js.py @@ -0,0 +1,105 @@ +custom_css = """ + +.markdown-text { + font-size: 16px !important; +} + +#models-to-add-text { + font-size: 18px !important; +} + +#citation-button span { + font-size: 16px !important; +} + +#citation-button textarea { + font-size: 16px !important; +} + +#citation-button > label > button { + margin: 6px; + transform: scale(1.3); +} + +#leaderboard-table { + margin-top: 15px +} + +#leaderboard-table-lite { + margin-top: 15px +} + +#search-bar-table-box > div:first-child { + background: none; + border: none; +} + +#search-bar { + padding: 0px; +} + +/* Limit the width of the first AutoEvalColumn so that names don't expand too much */ +table td:first-child, +table th:first-child { + max-width: 400px; + overflow: auto; + white-space: nowrap; +} + +.tab-buttons button { + font-size: 20px; +} + +#scale-logo { + border-style: none !important; + box-shadow: none; + display: block; + margin-left: auto; + margin-right: auto; + max-width: 600px; +} + +#scale-logo .download { + display: none; +} +#filter_type{ + border: 0; + padding-left: 0; + padding-top: 0; +} +#filter_type label { + display: flex; +} +#filter_type label > span{ + margin-top: var(--spacing-lg); + margin-right: 0.5em; +} +#filter_type label > .wrap{ + width: 103px; +} +#filter_type label > .wrap .wrap-inner{ + padding: 2px; +} +#filter_type label > .wrap .wrap-inner input{ + width: 1px +} +#filter-columns-type{ + border:0; + padding:0.5; +} +#filter-columns-size{ + border:0; + padding:0.5; +} +#box-filter > .form{ + border: 0 +} +""" + +get_window_url_params = """ + function(url_params) { + const params = new URLSearchParams(window.location.search); + url_params = Object.fromEntries(params); + return url_params; + } + """ diff --git a/src/display/formatting.py b/src/display/formatting.py new file mode 100644 index 0000000000000000000000000000000000000000..b46d29c9dba71be80866bfe46c5a77acd0dc50ce --- /dev/null +++ b/src/display/formatting.py @@ -0,0 +1,27 @@ +def model_hyperlink(link, model_name): + return f'{model_name}' + + +def make_clickable_model(model_name): + link = f"https://huggingface.co/{model_name}" + return model_hyperlink(link, model_name) + + +def styled_error(error): + return f"

{error}

" + + +def styled_warning(warn): + return f"

{warn}

" + + +def styled_message(message): + return f"

{message}

" + + +def has_no_nan_values(df, columns): + return df[columns].notna().all(axis=1) + + +def has_nan_values(df, columns): + return df[columns].isna().any(axis=1) diff --git a/src/display/utils.py b/src/display/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..fee0ca9bb1eacbfc4617ae840e7f05abd26f07ec --- /dev/null +++ b/src/display/utils.py @@ -0,0 +1,128 @@ +from dataclasses import dataclass, make_dataclass +from enum import Enum + + +from src.about import TasksRGB, TasksPGB, TasksGUE, TasksGB + + +def fields(raw_class): + return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"] + + +# These classes are for user facing column names, +# to avoid having to change them all around the code +# when a modif is needed +@dataclass +class ColumnContent: + name: str + type: str + displayed_by_default: bool + hidden: bool = False + never_hidden: bool = False + + +## Leaderboard columns +auto_eval_columns = [] +for eval_col in [TasksRGB, TasksPGB, TasksGUE, TasksGB]: + + auto_eval_column_dict = [] + # Init + auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)]) + auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)]) + # Scores + auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Rank", "number", True)]) + for task in eval_col: + auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)]) + # Model information + auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)]) + auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)]) + auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)]) + auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)]) + auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)]) + auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)]) + auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)]) + auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)]) + auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)]) + auto_eval_columns.append(auto_eval_column_dict) + +# We use make dataclass to dynamically fill the scores from Tasks +AutoEvalColumnRGB = make_dataclass("AutoEvalColumn", auto_eval_columns[0], frozen=True) +AutoEvalColumnPGB = make_dataclass("AutoEvalColumn", auto_eval_columns[1], frozen=True) +AutoEvalColumnGUE = make_dataclass("AutoEvalColumn", auto_eval_columns[2], frozen=True) +AutoEvalColumnGB = make_dataclass("AutoEvalColumn", auto_eval_columns[3], frozen=True) + + +## For the queue columns in the submission tab +@dataclass(frozen=True) +class EvalQueueColumn: # Queue column + model = ColumnContent("model", "markdown", True) + revision = ColumnContent("revision", "str", True) + private = ColumnContent("private", "bool", True) + precision = ColumnContent("precision", "str", True) + weight_type = ColumnContent("weight_type", "str", "Original") + status = ColumnContent("status", "str", True) + + +## All the model information that we might need +@dataclass +class ModelDetails: + name: str + display_name: str = "" + symbol: str = "" # emoji + + +class ModelType(Enum): + PT = ModelDetails(name="pretrained", symbol="🟢") + FT = ModelDetails(name="fine-tuned", symbol="🔶") + IFT = ModelDetails(name="instruction-tuned", symbol="⭕") + RL = ModelDetails(name="RL-tuned", symbol="🟦") + Unknown = ModelDetails(name="", symbol="?") + + def to_str(self, separator=" "): + return f"{self.value.symbol}{separator}{self.value.name}" + + @staticmethod + def from_str(type): + if "fine-tuned" in type or "🔶" in type: + return ModelType.FT + if "pretrained" in type or "🟢" in type: + return ModelType.PT + if "RL-tuned" in type or "🟦" in type: + return ModelType.RL + if "instruction-tuned" in type or "⭕" in type: + return ModelType.IFT + return ModelType.Unknown + + +class WeightType(Enum): + Adapter = ModelDetails("Adapter") + Original = ModelDetails("Original") + Delta = ModelDetails("Delta") + + +class Precision(Enum): + float16 = ModelDetails("float16") + bfloat16 = ModelDetails("bfloat16") + Unknown = ModelDetails("?") + + def from_str(precision): + if precision in ["torch.float16", "float16"]: + return Precision.float16 + if precision in ["torch.bfloat16", "bfloat16"]: + return Precision.bfloat16 + return Precision.Unknown + + +# Column selection +RGB_COLS = [c.name for c in fields(AutoEvalColumnRGB) if not c.hidden] +PGB_COLS = [c.name for c in fields(AutoEvalColumnPGB) if not c.hidden] +GUE_COLS = [c.name for c in fields(AutoEvalColumnGUE) if not c.hidden] +GB_COLS = [c.name for c in fields(AutoEvalColumnGB) if not c.hidden] + +EVAL_COLS = [c.name for c in fields(EvalQueueColumn)] +EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)] + +RGB_BENCHMARK_COLS = [t.value.col_name for t in TasksRGB] +PGB_BENCHMARK_COLS = [t.value.col_name for t in TasksPGB] +GUE_BENCHMARK_COLS = [t.value.col_name for t in TasksGUE] +GB_BENCHMARK_COLS = [t.value.col_name for t in TasksGB] diff --git a/src/envs.py b/src/envs.py new file mode 100644 index 0000000000000000000000000000000000000000..66e8d5850c9013ff80df991e096885f479174160 --- /dev/null +++ b/src/envs.py @@ -0,0 +1,27 @@ +import os + +from huggingface_hub import HfApi + +# Info to change for your repository +# ---------------------------------- +TOKEN = os.environ.get("TOKEN") # A read/write token for your org + +OWNER = ( + "yangheng" # Change to your org - don't forget to create a results and request dataset, with the correct format! +) +# ---------------------------------- + +REPO_ID = f"{OWNER}/leaderboard" +QUEUE_REPO = f"{OWNER}/requests" +RESULTS_REPO = f"{OWNER}/results" + +# If you setup a cache later, just change HF_HOME +CACHE_PATH = os.getenv("HF_HOME", ".") + +# Local caches +EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue") +EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results") +EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk") +EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk") + +API = HfApi(token=TOKEN) diff --git a/src/leaderboard/read_evals.py b/src/leaderboard/read_evals.py new file mode 100644 index 0000000000000000000000000000000000000000..327168de38e2b3a02937e39929dcf0989cbf55ac --- /dev/null +++ b/src/leaderboard/read_evals.py @@ -0,0 +1,225 @@ +import glob +import json +import os +from dataclasses import dataclass + +import re +import dateutil +import numpy as np + +from src.display.formatting import make_clickable_model +from src.display.utils import AutoEvalColumnRGB, AutoEvalColumnPGB,\ + AutoEvalColumnGUE, AutoEvalColumnGB, ModelType, Precision, WeightType +from src.about import TasksRGB, TasksPGB, TasksGUE, TasksGB +from src.submission.check_validity import is_model_on_hub + + +@dataclass +class EvalResult: + """Represents one full evaluation. Built from a combination of the result and request file for a given run.""" + + eval_name: str # org_model_precision (uid) + full_model: str # org/model (path on hub) + org: str + model: str + revision: str # commit hash, "" if main + results: dict + precision: Precision = Precision.Unknown + model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ... + weight_type: WeightType = WeightType.Original # Original or Adapter + architecture: str = "Unknown" + license: str = "?" + likes: int = 0 + num_params: int = 0 + date: str = "" # submission date of request file + still_on_hub: bool = False + + @classmethod + def init_from_json_file(self, json_filepath, Tasks): + """Inits the result from the specific model result file""" + with open(json_filepath) as fp: + data = json.load(fp) + + config = data.get("config") + + # Precision + precision = Precision.from_str(config.get("model_dtype")) + + # Get model and org + org_and_model = config.get("model_name", config.get("model_args", None)) + org_and_model = org_and_model.split("/", 1) + + if len(org_and_model) == 1: + org = None + model = org_and_model[0] + result_key = f"{model}_{precision.value.name}" + else: + org = org_and_model[0] + model = org_and_model[1] + result_key = f"{org}_{model}_{precision.value.name}" + full_model = "/".join(org_and_model) + + still_on_hub, _, model_config = is_model_on_hub( + full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False + ) + print("Is model on hub? \n", _) + architecture = "?" + if model_config is not None: + architectures = getattr(model_config, "architectures", None) + if architectures: + architecture = ";".join(architectures) + + # Extract results available in this file (some results are split in several files) + results = {} + for task in Tasks: + task = task.value + # We average all scores of a given metric (not all metrics are present in all files) + accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k]) + if accs.size == 0 or any([acc is None for acc in accs]): + continue + if task.metric == "RMSE": + # Keep RMSE at original value + mean_acc = np.mean(accs) + else: + mean_acc = np.mean(accs) * 100.0 + results[task.benchmark] = mean_acc + + return self( + eval_name=result_key, + full_model=full_model, + org=org, + model=model, + results=results, + precision=precision, + revision=config.get("model_sha", ""), + still_on_hub=still_on_hub, + architecture=architecture, + ) + + def update_with_request_file(self, requests_path): + """Finds the relevant request file for the current model and updates info with it""" + # print("Requests Path: ", requests_path) + request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name) + try: + with open(request_file, "r") as f: + request = json.load(f) + self.model_type = ModelType.from_str(request.get("model_type", "")) + self.weight_type = WeightType[request.get("weight_type", "Original")] + self.license = request.get("license", "?") + self.likes = request.get("likes", 0) + self.num_params = request.get("params", 0) + self.date = request.get("submitted_time", "") + except Exception: + print( + f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}" + ) + + def to_dict(self, rank, AutoEvalColumn, Tasks): + """Converts the Eval Result to a dict compatible with our dataframe display""" + average = rank + # average = sorted(average, reverse=True) + # rank = [rank+1 for rank, value in enumerate(average)] + data_dict = { + "eval_name": self.eval_name, # not a column, just a save name, + AutoEvalColumn.precision.name: self.precision.value.name, + AutoEvalColumn.model_type.name: self.model_type.value.name, + AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol, + AutoEvalColumn.weight_type.name: self.weight_type.value.name, + AutoEvalColumn.architecture.name: self.architecture, + AutoEvalColumn.model.name: make_clickable_model(self.full_model), + AutoEvalColumn.revision.name: self.revision, + AutoEvalColumn.average.name: average, + AutoEvalColumn.license.name: self.license, + AutoEvalColumn.likes.name: self.likes, + AutoEvalColumn.params.name: self.num_params, + AutoEvalColumn.still_on_hub.name: self.still_on_hub, + } + + for task in Tasks: + data_dict[task.value.col_name] = self.results[task.value.benchmark] + + return data_dict + + +def get_request_file_for_model(requests_path, model_name, precision): + """Selects the correct request file for a given model. Only keeps runs tagged as FINISHED""" + request_files = os.path.join( + requests_path, + f"{model_name}_eval_request_*.json", + ) + # print("Request Files: ", request_files) + request_files = glob.glob(request_files) + + # Select correct request file (precision) + request_file = "" + request_files = sorted(request_files, reverse=True) + for tmp_request_file in request_files: + with open(tmp_request_file, "r") as f: + req_content = json.load(f) + # print("Request File: ", tmp_request_file) + # print("Req Content: ", req_content) + if req_content["status"] in ["FINISHED"] and req_content["precision"] == precision.split(".")[-1]: + request_file = tmp_request_file + return request_file + + +def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]: + """From the path of the results folder root, extract all needed info for results""" + model_result_filepaths = [] + + for root, _, files in os.walk(results_path): + # We should only have json files in model results + print(f"Files {files}") + if len(files) == 0 or any([not f.endswith(".json") for f in files]): + continue + + # Sort the files by date + try: + files.sort(key=lambda x: x.removesuffix(".json").removeprefix("results_")[:-7]) + except dateutil.parser._parser.ParserError: + files = [files[-1]] + for file in files: + model_result_filepaths.append(os.path.join(root, file)) + + eval_results = {} + print(f"Filepaths: {model_result_filepaths}") + for model_result_filepath in model_result_filepaths: + # Creation of result + if "RGB" in results_path: + eval_result = EvalResult.init_from_json_file(model_result_filepath, TasksRGB) + elif "PGB" in results_path: + eval_result = EvalResult.init_from_json_file(model_result_filepath, TasksPGB) + elif "GUE" in results_path: + eval_result = EvalResult.init_from_json_file(model_result_filepath, TasksGUE) + else: + eval_result = EvalResult.init_from_json_file(model_result_filepath, TasksGB) + eval_result.update_with_request_file(requests_path) + + # Store results of same eval together + eval_name = eval_result.eval_name + if eval_name in eval_results.keys(): + eval_results[eval_name].results.update({k: v for k, v in eval_result.results.items() if v is not None}) + else: + eval_results[eval_name] = eval_result + + results = [] + for result in eval_results.values(): + result.average = np.mean(list(result.results.values())) + sorted_results = sorted(eval_results.values(), key=lambda r: r.average, reverse=True) + print(f"SORTED RESULTS HERE: \n{sorted_results}") + for i, v in enumerate(sorted_results): + try: + # we test if the dict version is complete + if "RGB" in results_path: + v.to_dict(i, AutoEvalColumnRGB, TasksRGB) + elif "PGB" in results_path: + v.to_dict(i, AutoEvalColumnPGB, TasksPGB) + elif "GUE" in results_path: + v.to_dict(i, AutoEvalColumnGUE, TasksGUE) + else: + v.to_dict(i, AutoEvalColumnGB, TasksGB) + results.append(v) + except KeyError: # not all eval values present + continue + + return results diff --git a/src/populate.py b/src/populate.py new file mode 100644 index 0000000000000000000000000000000000000000..89dc65e5a632857d71beabf0271e60d9255d13b4 --- /dev/null +++ b/src/populate.py @@ -0,0 +1,83 @@ +import json +import os + +import numpy as np +import pandas as pd + +from src.display.formatting import has_no_nan_values, make_clickable_model +from src.display.utils import EvalQueueColumn +from src.leaderboard.read_evals import get_raw_eval_results +from src.display.utils import AutoEvalColumnRGB, AutoEvalColumnPGB,\ + AutoEvalColumnGUE, AutoEvalColumnGB +from src.about import TasksRGB, TasksPGB, TasksGUE, TasksGB + + +def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame: + """Creates a dataframe from all the individual experiment results""" + print(f"RESULTS PATH: {results_path}") + raw_data = get_raw_eval_results(results_path, requests_path) + for result in raw_data: + result.average = np.mean(list(result.results.values())) + sorted_results = sorted(raw_data, key=lambda r: r.average, reverse=True) + print(sorted_results) + # ranks = [rank+1 for rank, value in enumerate(sorted_results)] + # rank = [rank+1 for rank, value in enumerate(average)] + if "RGB" in results_path: + all_data_json = [v.to_dict(i+1, AutoEvalColumnRGB, TasksRGB) for i, v in enumerate(raw_data)] + elif "PGB" in results_path: + all_data_json = [v.to_dict(i+1, AutoEvalColumnPGB, TasksPGB) for i, v in enumerate(raw_data)] + elif "GUE" in results_path: + all_data_json = [v.to_dict(i+1, AutoEvalColumnGUE, TasksGUE) for i, v in enumerate(raw_data)] + else: + all_data_json = [v.to_dict(i+1, AutoEvalColumnGB, TasksGB) for i, v in enumerate(raw_data)] + # all_data_json = [v.to_dict(i + 1) for i, v in enumerate(raw_data)] + + df = pd.DataFrame.from_records(all_data_json) + # df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False) + print(f"Cols: {cols}") + print(f"DF: {df}") + df = df[cols].round(decimals=2) + + # filter out if any of the benchmarks have not been produced + df = df[has_no_nan_values(df, benchmark_cols)] + print(df) + return df + + +def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]: + """Creates the different dataframes for the evaluation queues requestes""" + entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")] + all_evals = [] + print(entries) + entries = [entry for entry in entries if not entry.startswith(".")] + print(entries) + for entry in entries: + print(entries) + if ".json" in entry: + file_path = os.path.join(save_path, entry) + with open(file_path) as fp: + data = json.load(fp) + + data[EvalQueueColumn.model.name] = make_clickable_model(data["model"]) + data[EvalQueueColumn.revision.name] = data.get("revision", "main") + + all_evals.append(data) + elif ".md" not in entry: + # this is a folder + entries = [e for e in os.listdir(f"{save_path}/{entry}") if not e.startswith(".")] + # for sub_entry in sub_entries: + # file_path = os.path.join(save_path, entry, sub_entry) + # with open(file_path) as fp: + # data = json.load(fp) + + # data[EvalQueueColumn.model.name] = make_clickable_model(data["model"]) + # data[EvalQueueColumn.revision.name] = data.get("revision", "main") + # all_evals.append(data) + + pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]] + running_list = [e for e in all_evals if e["status"] == "RUNNING"] + finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"] + df_pending = pd.DataFrame.from_records(pending_list, columns=cols) + df_running = pd.DataFrame.from_records(running_list, columns=cols) + df_finished = pd.DataFrame.from_records(finished_list, columns=cols) + return df_finished[cols], df_running[cols], df_pending[cols] diff --git a/src/submission/check_validity.py b/src/submission/check_validity.py new file mode 100644 index 0000000000000000000000000000000000000000..d2d64adf8b9b473e51bd9e3e19a4c2edb9e27d2c --- /dev/null +++ b/src/submission/check_validity.py @@ -0,0 +1,107 @@ +import json +import os +from collections import defaultdict + +import huggingface_hub +from huggingface_hub import ModelCard +from huggingface_hub.hf_api import ModelInfo +from transformers import AutoConfig +from transformers.models.auto.tokenization_auto import AutoTokenizer + + +def check_model_card(repo_id: str) -> tuple[bool, str]: + """Checks if the model card and license exist and have been filled""" + try: + card = ModelCard.load(repo_id) + except huggingface_hub.utils.EntryNotFoundError: + return False, "Please add a model card to your model to explain how you trained/fine-tuned it." + + # Enforce license metadata + if card.data.license is None: + if not ("license_name" in card.data and "license_link" in card.data): + return False, ( + "License not found. Please add a license to your model card using the `license` metadata or a" + " `license_name`/`license_link` pair." + ) + + # Enforce card content + if len(card.text) < 200: + return False, "Please add a description to your model card, it is too short." + + return True, "" + + +def is_model_on_hub( + model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False +) -> tuple[bool, str]: + """Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses.""" + try: + config = AutoConfig.from_pretrained( + model_name, revision=revision, trust_remote_code=trust_remote_code, token=token + ) + if test_tokenizer: + try: + tk = AutoTokenizer.from_pretrained( + model_name, revision=revision, trust_remote_code=trust_remote_code, token=token + ) + except ValueError as e: + return (False, f"uses a tokenizer which is not in a transformers release: {e}", None) + except Exception: + return ( + False, + "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?", + None, + ) + return True, None, config + + except ValueError: + return ( + False, + "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.", + None, + ) + + except Exception: + return False, "was not found on hub!", None + + +def get_model_size(model_info: ModelInfo, precision: str): + """Gets the model size from the configuration, or the model name if the configuration does not contain the information.""" + try: + model_size = round(model_info.safetensors["total"] / 1e9, 3) + except (AttributeError, TypeError): + return 0 # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py + + size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.modelId.lower()) else 1 + model_size = size_factor * model_size + return model_size + + +def get_model_arch(model_info: ModelInfo): + """Gets the model architecture from the configuration""" + return model_info.config.get("architectures", "Unknown") + + +def already_submitted_models(requested_models_dir: str) -> set[str]: + """Gather a list of already submitted models to avoid duplicates""" + depth = 1 + file_names = [] + users_to_submission_dates = defaultdict(list) + + for root, _, files in os.walk(requested_models_dir): + current_depth = root.count(os.sep) - requested_models_dir.count(os.sep) + if current_depth == depth: + for file in files: + if not file.endswith(".json"): + continue + with open(os.path.join(root, file), "r") as f: + info = json.load(f) + file_names.append(f"{info['model']}_{info['revision']}_{info['precision']}") + + # Select organisation + if info["model"].count("/") == 0 or "submitted_time" not in info: + continue + organisation, _ = info["model"].split("/") + users_to_submission_dates[organisation].append(info["submitted_time"]) + + return set(file_names), users_to_submission_dates diff --git a/src/submission/submit.py b/src/submission/submit.py new file mode 100644 index 0000000000000000000000000000000000000000..15205035a70424d6703974a4738416980ef1dbf4 --- /dev/null +++ b/src/submission/submit.py @@ -0,0 +1,117 @@ +import json +import os +from datetime import datetime, timezone + +from src.display.formatting import styled_error, styled_message, styled_warning +from src.envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, TOKEN +from src.submission.check_validity import already_submitted_models, check_model_card, get_model_size, is_model_on_hub + +REQUESTED_MODELS = None +USERS_TO_SUBMISSION_DATES = None + + +def add_new_eval( + model: str, + base_model: str, + revision: str, + precision: str, + weight_type: str, + model_type: str, +): + global REQUESTED_MODELS + global USERS_TO_SUBMISSION_DATES + if not REQUESTED_MODELS: + REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH) + + user_name = "" + model_path = model + if "/" in model: + user_name = model.split("/")[0] + model_path = model.split("/")[1] + + precision = precision.split(" ")[0] + current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + if model_type is None or model_type == "": + return styled_error("Please select a model type.") + + # Does the model actually exist? + if revision == "": + revision = "main" + + # Is the model on the hub? + if weight_type in ["Delta", "Adapter"]: + base_model_on_hub, error, _ = is_model_on_hub( + model_name=base_model, revision=revision, token=TOKEN, test_tokenizer=True + ) + if not base_model_on_hub: + return styled_error(f'Base model "{base_model}" {error}') + + if not weight_type == "Adapter": + model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True) + if not model_on_hub: + return styled_error(f'Model "{model}" {error}') + + # Is the model info correctly filled? + try: + model_info = API.model_info(repo_id=model, revision=revision) + except Exception: + return styled_error("Could not get your model information. Please fill it up properly.") + + model_size = get_model_size(model_info=model_info, precision=precision) + + # Were the model card and license filled? + try: + license = model_info.cardData["license"] + except Exception: + return styled_error("Please select a license for your model") + + modelcard_OK, error_msg = check_model_card(model) + if not modelcard_OK: + return styled_error(error_msg) + + # Seems good, creating the eval + print("Adding new eval") + + eval_entry = { + "model": model, + "base_model": base_model, + "revision": revision, + "precision": precision, + "weight_type": weight_type, + "status": "PENDING", + "submitted_time": current_time, + "model_type": model_type, + "likes": model_info.likes, + "params": model_size, + "license": license, + "private": False, + } + + # Check for duplicate submission + if f"{model}_{revision}_{precision}" in REQUESTED_MODELS: + return styled_warning("This model has been already submitted.") + + print("Creating eval file") + OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}" + os.makedirs(OUT_DIR, exist_ok=True) + out_path = f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{weight_type}.json" + + with open(out_path, "w") as f: + f.write(json.dumps(eval_entry)) + + print("Uploading eval file") + API.upload_file( + path_or_fileobj=out_path, + path_in_repo=out_path.split("eval-queue/")[1], + repo_id=QUEUE_REPO, + repo_type="dataset", + commit_message=f"Add {model} to eval queue", + ) + + # Remove the local file + os.remove(out_path) + + return styled_message( + "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list." + )