Spaces:

vetrovvlad
/

protobench

Sleeping

File size: 11,808 Bytes

1639c46
 
bac027d
014ba5e
c03435f
9540a56
f39aadc
4fa4c7b
1639c46
4fa4c7b
 
 
1847ea5
4fa4c7b
014ba5e
de62f09
4fa4c7b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59904b9
451712e
 
 
4fa4c7b
4d88e85
4fa4c7b
 
 
 
 
 
 
 
 
 
1847ea5
4fa4c7b
 
 
 
 
 
 
 
 
 
 
 
409cbac
451712e
409cbac
4fa4c7b
 
 
 
 
 
 
 
 
 
 
 
6934609
4fa4c7b
 
 
 
 
 
 
 
 
 
 
6f92fa3
de62f09
4fa4c7b
01af800
 
4d88e85
cce3c9c
01af800
7496919
cce3c9c
 
65408a7
 
 
cce3c9c
 
 
 
 
 
df22c76
 
4fa4c7b
4bea324
 
3bef8f9
8586e70
df22c76
 
 
 
4fa4c7b
9813584
4fa4c7b
 
 
 
9813584
4fa4c7b
 
 
5d15a5a
d629b6c
df22c76
 
4fa4c7b
 
 
 
 
 
 
 
 
 
cce3c9c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
01af800
7be9d95
1639c46
c03435f
 
d8ef990
 
 
 
c03435f
 
 
 
 
 
 
 
 
 
 
 
d8ef990
 
 
 
8f4c6d6
d8ef990
 
 
8f4c6d6
d8ef990
 
 
8f4c6d6
d8ef990
 
c03435f
de62f09
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1639c46
 
 
 
7be9d95
1639c46
 
 
 
 
 
c03435f
1639c46
 
de62f09
 
1639c46
 
de62f09
1639c46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7be9d95
 
 
 
 
1639c46
 
4d068a9
de62f09
 
 
014ba5e
 
 
 
 
9e16c0f
 
014ba5e
 
 
 
4fa4c7b
 
1639c46

import gradio
import argparse
import os
import boto3
import pandas as pd
from copy import copy

import queue

from constants import css, js_code, js_light
from utils import model_response, clear_chat
from models import get_tinyllama, get_qwen2ins1b, GigaChat, response_gigachat, response_qwen2ins1b, response_tinyllama

INIT_MODELS = dict()
S3_SESSION = None
TEST_MD = None
CURRENT_MODELS = queue.LifoQueue()
MODEL_LIB = {'TINYLLAMA': get_tinyllama, "QWEN2INS1B": get_qwen2ins1b, "RUBASE": GigaChat.get_giga}
GEN_LIB = {'TINYLLAMA': response_tinyllama, "QWEN2INS1B": response_qwen2ins1b, "RUBASE": response_gigachat}

def model_gen(
        content,
        chat_history,
        model_name: str,
        top_p,
        temp,
        max_tokens,
        no_context=False
        ):
    
    global INIT_MODELS, S3_SESSION, GEN_LIB, MODEL_LIB
    model_manager(model_name, MODEL_LIB, 3)
    if content is None:
        return '', []
    if len(content) == 0:
        return '', []
    
    chat_history = [chat_history[-1]] if no_context and len(chat_history)>=1  else chat_history
    
    return model_response(
        content, 
        chat_history,
        S3_SESSION, 
        INIT_MODELS,
        GEN_LIB,
        model_name, 
        {"top_p": top_p, "temperature": temp, "max_tokens": max_tokens}
        )

def model_regen(
        content,
        chat_history,
        model_name: str,
        top_p,
        temp,
        max_tokens,
        no_context=False
        ):
    
    global INIT_MODELS, S3_SESSION, GEN_LIB, MODEL_LIB
    model_manager(model_name, MODEL_LIB, 3)
    if chat_history is None:
        return '', []
    
    chat_history = chat_history[-1] if no_context else chat_history
    content = copy(chat_history[-1][0])
    
    return model_response(
        content, 
        chat_history[:-1],
        S3_SESSION, 
        INIT_MODELS,
        GEN_LIB,
        model_name, 
        {"top_p": top_p, "temperature": temp, "max_tokens": max_tokens}
        )

def model_manager(
        add_model,
        model_lib,
        max_models=3
        ):
    global INIT_MODELS, CURRENT_MODELS
    while CURRENT_MODELS.qsize() >= max_models:
        model_del = CURRENT_MODELS.get()
        INIT_MODELS[model_del] = None
    CURRENT_MODELS.put(add_model)
    INIT_MODELS[add_model] = model_lib[add_model]()

def tab_online_arena():
    global S3_SESSION, GEN_LIB, MODEL_LIB, INIT_MODELS, CURRENT_MODELS
    with gradio.Row():
        with gradio.Column():
            model_left = gradio.Dropdown(["TINYLLAMA", "QWEN2INS1B", "RUBASE"], value="QWEN2INS1B", interactive=True, multiselect=False, label="Left model")
            chatbot_left = gradio.Chatbot()
        with gradio.Column():
            model_right = gradio.Dropdown(["TINYLLAMA", "QWEN2INS1B", "RUBASE"], value= "RUBASE", interactive=True, multiselect=False, label="Right model")
            chatbot_right = gradio.Chatbot()

    with gradio.Row():
        msg = gradio.Textbox(label='Prompt', placeholder='Put your prompt here')

    with gradio.Row():
        gradio.Button('Both Good')
        gradio.Button('Left Better')
        gradio.Button('Right Better')
        gradio.Button('Both Bad')

    with gradio.Row():
        with gradio.Accordion("Parameters", open=False):
            no_context = gradio.Checkbox(label="No context", value=False)
            top_p = gradio.Slider(label='Top P', minimum=0, maximum=1, value=1, step=0.05, interactive=True)
            temp = gradio.Slider(label='Temperature', minimum=0, maximum=1, value=0.7, step=0.05, interactive=True)
            max_tokens = gradio.Slider(label='Max ouput tokens', minimum=1, maximum=2048, value=256, step=1, interactive=True)

    with gradio.Row():
        clear = gradio.ClearButton([msg, chatbot_left, chatbot_right], value='Clear history')
        regen_left = gradio.Button(value='Regenerate left answer')
        regen_right = gradio.Button(value='Regenerate right answer')
        regen_left.click(
            model_regen, 
            [msg, chatbot_left, model_left, top_p, temp, max_tokens, no_context], 
             [msg, chatbot_left]
             )
        regen_right.click(
            model_regen,
            [msg, chatbot_right, model_right, top_p, temp, max_tokens, no_context], 
             [msg, chatbot_right]
             )
    
    with gradio.Blocks():
        model_left.change(clear_chat, [], [msg, chatbot_left])
        model_right.change(clear_chat, [], [msg, chatbot_right])
        msg.submit(
            model_gen, 
            [msg, chatbot_left, model_left, top_p, temp, max_tokens, no_context], 
             [msg, chatbot_left]
             )
        msg.submit(
            model_gen,
            [msg, chatbot_right, model_right, top_p, temp, max_tokens, no_context], 
             [msg, chatbot_right]
             )

        # with gradio.Column():
        #     gradio.ChatInterface(
        #         fn=giga_gen,
        #         examples=[{"text": "hello"}, {"text": "hola"}, {"text": "merhaba"}],
        #         title="Giga",
        #         multimodal=True,
        #     )
        # with gradio.Column():
        #     gradio.ChatInterface(
        #         fn=tiny_gen,
        #         examples=[{"text": "hello"}, {"text": "hola"}, {"text": "merhaba"}],
        #         title="Tiny",
        #         multimodal=True,
        #     )
        # with gradio.Column():
        #     gradio.Interface(fn=giga_gen, inputs="text", outputs="text", allow_flagging=False, title='Giga') # arena = 
        # with gradio.Column():
        #     gradio.Interface(fn=tiny_gen, inputs="text", outputs="text", allow_flagging=False, title='TinyLlama') # arena = 
        
    # arena.launch()

def tab_leaderboard():
    df = pd.DataFrame({
            "Model" : ['A', 'B', 'C',], 
            "Test 1" : [0, 1, 0], 
            "Test 2" : [1, 0, 1,],
        })

    # Function to apply text color
    def highlight_cols(x): 
        df = x.copy() 
        # df.loc[:, :] = 'color: purple'
        df[['Model']] = 'color: green'
        return df 

    # Applying the style function
    # s = df.style.apply(highlight_cols, axis = None)

    # Displaying the styled dataframe in Gradio
    with gradio.TabItem("Autogen Metrics", elem_id="od-benchmark-tab-table-ablation", id=0, elem_classes="subtab"): 
        with gradio.Blocks() as demo:
            gradio.DataFrame(df)
    
    with gradio.TabItem("Autometrics", elem_id="od-benchmark-tab-table-ablation", id=1, elem_classes="subtab"): 
        with gradio.Blocks() as demo:
            gradio.DataFrame(df)
    
    with gradio.TabItem("SBS metrics", elem_id="od-benchmark-tab-table-ablation", id=2, elem_classes="subtab"): 
        with gradio.Blocks() as demo:
            gradio.DataFrame(df)
    
    with gradio.TabItem("Arena ELO rating", elem_id="od-benchmark-tab-table-ablation", id=3, elem_classes="subtab"): 
        with gradio.Blocks() as demo:
            gradio.DataFrame(df)

def tab_offline_arena(): 
    # with gradio.Row(): 
    #     btn_show_history = gradio.Button("🎲  Click here to sample an example + a pair of LLM outputs! ", elem_classes="sample_button")
    with gradio.Row():
        with gradio.Column(scale=2):
            with gradio.Accordion("Choose models to sample from", open=False, elem_classes="accordion-label"):
                model_options = list(MODEL_LIB.keys())  
                selected_models = gradio.CheckboxGroup(model_options, info="", value=model_options, show_label=False, elem_id="select-models") 
                clear_button = gradio.Button("Clear", elem_classes="btn_boderline_gray", scale=1)
                # clear the selected_models
                clear_button.click(lambda: {selected_models: {"value": [], "__type__": "update"}}, inputs=[], outputs=[selected_models])
        with gradio.Column(scale=1):
            with gradio.Accordion("Choose task types to sample from", open=False, elem_classes="accordion-label"):
                select_tasks = gradio.CheckboxGroup(['Task 1', "Task 2", "Task 3"], info="", value=['Task 1', "Task 2", "Task 3"], show_label=False, elem_id="select-tasks") 
                clear_task_button = gradio.Button("Clear", elem_classes="btn_boderline_gray", scale=1)
                # clear the select_tasks
                clear_task_button.click(lambda: {select_tasks: {"value": [], "__type__": "update"}}, inputs=[], outputs=[select_tasks])
            with gradio.Accordion("Choose criteria  to sample from", open=False, elem_classes="accordion-label"):
                select_tasks = gradio.CheckboxGroup(['Criterion 1', "Criterion 2", "Criterion 3"], info="", value=['Criterion 1', "Criterion 2", "Criterion 3"], show_label=False, elem_id="select-criteria") 
                clear_task_button = gradio.Button("Clear", elem_classes="btn_boderline_gray", scale=1)
                # clear the select_tasks
                clear_task_button.click(lambda: {select_tasks: {"value": [], "__type__": "update"}}, inputs=[], outputs=[select_tasks])


def build_demo():
    # global original_dfs, available_models, gpt4t_dfs, haiku_dfs, llama_dfs

    with gradio.Blocks(theme=gradio.themes.Base(), css=css, js=js_light) as demo:
        # gradio.HTML(BANNER, elem_id="banner")
        # gradio.Markdown(HEADER_MD.replace("{model_num}", str(len(original_dfs["-1"]))), elem_classes="markdown-text")
        
        with gradio.Tabs(elem_classes="tab-buttons") as tabs:
            with gradio.TabItem("🐼 MERA leaderboard", elem_id="od-benchmark-tab-table", id=0):
                gradio.Markdown(TEST_MD, elem_classes="markdown-text-details")
                tab_leaderboard()

            with gradio.TabItem("🆚 SBS by categories and criteria", elem_id="od-benchmark-tab-table", id=1):
                # gradio.Markdown(TEST_MD, elem_classes="markdown-text-details")
                tab_offline_arena()

            with gradio.TabItem("🥊 Model arena", elem_id="od-benchmark-tab-table", id=2):
                tab_online_arena()
                # _tab_explore()

            with gradio.TabItem("💪 About MERA", elem_id="od-benchmark-tab-table", id=3):
                gradio.Markdown(TEST_MD, elem_classes="markdown-text")
        # gr.Markdown(f"Last updated on **{LAST_UPDATED}** | [Link to V1-legacy](https://huggingface.co/spaces/allenai/WildBench-V1-legacy)", elem_classes="markdown-text-small")
        
        # with gr.Row():
        #     with gr.Accordion("📙 Citation", open=False, elem_classes="accordion-label"):
        #         gr.Textbox(
        #             value=CITATION_TEXT, 
        #             lines=7,
        #             label="Copy the BibTeX snippet to cite this source",
        #             elem_id="citation-button",
        #             show_copy_button=True)
                # ).style(show_copy_button=True)

    return demo

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--share", action="store_true")
    # parser.add_argument("--bench_table", help="Path to MERA table", default="data_dir/MERA_jun2024.jsonl")
    args = parser.parse_args()
    # data_load(args.result_file)    
    # TYPES = ["number", "markdown", "number"]

    with open("test.md", "r") as f:
        TEST_MD = f.read()

    try:
        session = boto3.session.Session()
        S3_SESSION = session.client(
            service_name='s3',
            endpoint_url=os.getenv('S3_ENDPOINT'),
            aws_access_key_id=os.getenv('S3_ACCESS_KEY'),
            aws_secret_access_key=os.getenv('S3_SECRET_KEY'),
        )
    except:
        print('Failed to start s3 session')

    app = build_demo()
    app.launch(share=args.share, height=3000, width="110%") # share=args.share

    # demo = gradio.Interface(fn=gen, inputs="text", outputs="text")
    # demo.launch()