protobench / app.py
vtrv.vls
API fix
2375d69
raw
history blame
6.67 kB
import gradio
import argparse
import os
import boto3
from datetime import datetime
import pandas as pd
from utils import generate, send_to_s3
from models import get_tinyllama, response_tinyllama
from constants import css, js_code, js_light
MERA_table = None
TINY_LLAMA = None
S3_SESSION = None
def giga_gen(content, chat_history):
chat_history.append([content])
res = generate(chat_history,'auth_token.json')
chat_history[-1].append(res)
send_to_s3(res, f'protobench/giga_{str(datetime.now()).replace(" ", "_")}.json', S3_SESSION)
return '', chat_history
def tiny_gen(content, chat_history):
chat_history.append([content])
res = response_tinyllama(TINY_LLAMA, content)
chat_history[-1].append(res)
send_to_s3(res, f'protobench/tiny_{str(datetime.now()).replace(" ", "_")}.json', S3_SESSION)
return '', chat_history
def tab_arena():
with gradio.Row():
with gradio.Column():
chatbot_left = gradio.Chatbot()
with gradio.Column():
chatbot_right = gradio.Chatbot()
with gradio.Row():
gradio.Button('Both Good')
gradio.Button('Left Better')
gradio.Button('Right Better')
gradio.Button('Both Bad')
msg = gradio.Textbox()
clear = gradio.ClearButton([msg, chatbot_left, chatbot_right])
# def respond(message, chat_history):
# bot_message = random.choice(["How are you?", "I love you", "I'm very hungry"])
# chat_history.append((message, bot_message))
# time.sleep(2)
# return "", chat_history
msg.submit(giga_gen, [msg, chatbot_left], [msg, chatbot_left])
msg.submit(tiny_gen, [msg, chatbot_right], [msg, chatbot_right])
# with gradio.Column():
# gradio.ChatInterface(
# fn=giga_gen,
# examples=[{"text": "hello"}, {"text": "hola"}, {"text": "merhaba"}],
# title="Giga",
# multimodal=True,
# )
# with gradio.Column():
# gradio.ChatInterface(
# fn=tiny_gen,
# examples=[{"text": "hello"}, {"text": "hola"}, {"text": "merhaba"}],
# title="Tiny",
# multimodal=True,
# )
# with gradio.Column():
# gradio.Interface(fn=giga_gen, inputs="text", outputs="text", allow_flagging=False, title='Giga') # arena =
# with gradio.Column():
# gradio.Interface(fn=tiny_gen, inputs="text", outputs="text", allow_flagging=False, title='TinyLlama') # arena =
# arena.launch()
def tab_leaderboard():
df = pd.DataFrame({
"Model" : ['A', 'B', 'C',],
"Test 1" : [0, 1, 0],
"Test 2" : [1, 0, 1,],
})
# Function to apply text color
def highlight_cols(x):
df = x.copy()
# df.loc[:, :] = 'color: purple'
df[['Model']] = 'color: green'
return df
# Applying the style function
# s = df.style.apply(highlight_cols, axis = None)
# Displaying the styled dataframe in Gradio
with gradio.TabItem("Autogen Metrics", elem_id="od-benchmark-tab-table-ablation", id=0, elem_classes="subtab"):
with gradio.Blocks() as demo:
gradio.DataFrame(df)
with gradio.TabItem("Autometrics", elem_id="od-benchmark-tab-table-ablation", id=1, elem_classes="subtab"):
with gradio.Blocks() as demo:
gradio.DataFrame(df)
with gradio.TabItem("SBS metrics", elem_id="od-benchmark-tab-table-ablation", id=2, elem_classes="subtab"):
with gradio.Blocks() as demo:
gradio.DataFrame(df)
with gradio.TabItem("Arena ELO rating", elem_id="od-benchmark-tab-table-ablation", id=3, elem_classes="subtab"):
with gradio.Blocks() as demo:
gradio.DataFrame(df)
with open("test.md", "r") as f:
TEST_MD = f.read()
available_models = ["GigaChat", ""] # list(model_info.keys())
def build_demo():
# global original_dfs, available_models, gpt4t_dfs, haiku_dfs, llama_dfs
with gradio.Blocks(theme=gradio.themes.Base(), css=css, js=js_light) as demo:
# gradio.HTML(BANNER, elem_id="banner")
# gradio.Markdown(HEADER_MD.replace("{model_num}", str(len(original_dfs["-1"]))), elem_classes="markdown-text")
with gradio.Tabs(elem_classes="tab-buttons") as tabs:
with gradio.TabItem("🐼 MERA leaderboard", elem_id="od-benchmark-tab-table", id=0):
gradio.Markdown(TEST_MD, elem_classes="markdown-text-details")
tab_leaderboard()
with gradio.TabItem("πŸ†š SBS by categories and criteria", elem_id="od-benchmark-tab-table", id=1):
gradio.Markdown(TEST_MD, elem_classes="markdown-text-details")
with gradio.TabItem("πŸ₯Š Model arena", elem_id="od-benchmark-tab-table", id=2):
tab_arena()
# _tab_explore()
with gradio.TabItem("πŸ’ͺ About MERA", elem_id="od-benchmark-tab-table", id=3):
gradio.Markdown(TEST_MD, elem_classes="markdown-text")
# gr.Markdown(f"Last updated on **{LAST_UPDATED}** | [Link to V1-legacy](https://huggingface.co/spaces/allenai/WildBench-V1-legacy)", elem_classes="markdown-text-small")
# with gr.Row():
# with gr.Accordion("πŸ“™ Citation", open=False, elem_classes="accordion-label"):
# gr.Textbox(
# value=CITATION_TEXT,
# lines=7,
# label="Copy the BibTeX snippet to cite this source",
# elem_id="citation-button",
# show_copy_button=True)
# ).style(show_copy_button=True)
return demo
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--share", action="store_true")
# parser.add_argument("--bench_table", help="Path to MERA table", default="data_dir/MERA_jun2024.jsonl")
args = parser.parse_args()
# data_load(args.result_file)
# TYPES = ["number", "markdown", "number"]
TINY_LLAMA = get_tinyllama()
try:
session = boto3.session.Session()
S3_SESSION = session.client(
service_name='s3',
endpoint_url=os.getenv('S3_ENDPOINT'),
aws_access_key_id=os.getenv('S3_ACCESS_KEY'),
aws_secret_access_key=os.getenv('S3_SECRET_KEY'),
)
except:
print('Failed to start s3 session')
demo = build_demo()
demo.launch(share=args.share, height=3000, width="110%") # share=args.share
# demo = gradio.Interface(fn=gen, inputs="text", outputs="text")
# demo.launch()