Spaces:

vetrovvlad
/

protobench

Sleeping

protobench / app.py

vtrv.vls

Functionality rework

4d88e85 about 1 year ago

11.8 kB

	import gradio
	import argparse
	import os
	import boto3
	import pandas as pd
	from copy import copy

	import queue

	from constants import css, js_code, js_light
	from utils import model_response, clear_chat
	from models import get_tinyllama, get_qwen2ins1b, GigaChat, response_gigachat, response_qwen2ins1b, response_tinyllama

	INIT_MODELS = dict()
	S3_SESSION = None
	TEST_MD = None
	CURRENT_MODELS = queue.LifoQueue()
	MODEL_LIB = {'TINYLLAMA': get_tinyllama, "QWEN2INS1B": get_qwen2ins1b, "RUBASE": GigaChat.get_giga}
	GEN_LIB = {'TINYLLAMA': response_tinyllama, "QWEN2INS1B": response_qwen2ins1b, "RUBASE": response_gigachat}

	def model_gen(
	content,
	chat_history,
	model_name: str,
	top_p,
	temp,
	max_tokens,
	no_context=False
	):

	global INIT_MODELS, S3_SESSION, GEN_LIB, MODEL_LIB
	model_manager(model_name, MODEL_LIB, 3)
	if content is None:
	return '', []
	if len(content) == 0:
	return '', []

	chat_history = [chat_history[-1]] if no_context and len(chat_history)>=1 else chat_history

	return model_response(
	content,
	chat_history,
	S3_SESSION,
	INIT_MODELS,
	GEN_LIB,
	model_name,
	{"top_p": top_p, "temperature": temp, "max_tokens": max_tokens}
	)

	def model_regen(
	content,
	chat_history,
	model_name: str,
	top_p,
	temp,
	max_tokens,
	no_context=False
	):

	global INIT_MODELS, S3_SESSION, GEN_LIB, MODEL_LIB
	model_manager(model_name, MODEL_LIB, 3)
	if chat_history is None:
	return '', []

	chat_history = chat_history[-1] if no_context else chat_history
	content = copy(chat_history[-1][0])

	return model_response(
	content,
	chat_history[:-1],
	S3_SESSION,
	INIT_MODELS,
	GEN_LIB,
	model_name,
	{"top_p": top_p, "temperature": temp, "max_tokens": max_tokens}
	)

	def model_manager(
	add_model,
	model_lib,
	max_models=3
	):
	global INIT_MODELS, CURRENT_MODELS
	while CURRENT_MODELS.qsize() >= max_models:
	model_del = CURRENT_MODELS.get()
	INIT_MODELS[model_del] = None
	CURRENT_MODELS.put(add_model)
	INIT_MODELS[add_model] = model_lib[add_model]()

	def tab_online_arena():
	global S3_SESSION, GEN_LIB, MODEL_LIB, INIT_MODELS, CURRENT_MODELS
	with gradio.Row():
	with gradio.Column():
	model_left = gradio.Dropdown(["TINYLLAMA", "QWEN2INS1B", "RUBASE"], value="QWEN2INS1B", interactive=True, multiselect=False, label="Left model")
	chatbot_left = gradio.Chatbot()
	with gradio.Column():
	model_right = gradio.Dropdown(["TINYLLAMA", "QWEN2INS1B", "RUBASE"], value= "RUBASE", interactive=True, multiselect=False, label="Right model")
	chatbot_right = gradio.Chatbot()

	with gradio.Row():
	msg = gradio.Textbox(label='Prompt', placeholder='Put your prompt here')

	with gradio.Row():
	gradio.Button('Both Good')
	gradio.Button('Left Better')
	gradio.Button('Right Better')
	gradio.Button('Both Bad')

	with gradio.Row():
	with gradio.Accordion("Parameters", open=False):
	no_context = gradio.Checkbox(label="No context", value=False)
	top_p = gradio.Slider(label='Top P', minimum=0, maximum=1, value=1, step=0.05, interactive=True)
	temp = gradio.Slider(label='Temperature', minimum=0, maximum=1, value=0.7, step=0.05, interactive=True)
	max_tokens = gradio.Slider(label='Max ouput tokens', minimum=1, maximum=2048, value=256, step=1, interactive=True)

	with gradio.Row():
	clear = gradio.ClearButton([msg, chatbot_left, chatbot_right], value='Clear history')
	regen_left = gradio.Button(value='Regenerate left answer')
	regen_right = gradio.Button(value='Regenerate right answer')
	regen_left.click(
	model_regen,
	[msg, chatbot_left, model_left, top_p, temp, max_tokens, no_context],
	[msg, chatbot_left]
	)
	regen_right.click(
	model_regen,
	[msg, chatbot_right, model_right, top_p, temp, max_tokens, no_context],
	[msg, chatbot_right]
	)

	with gradio.Blocks():
	model_left.change(clear_chat, [], [msg, chatbot_left])
	model_right.change(clear_chat, [], [msg, chatbot_right])
	msg.submit(
	model_gen,
	[msg, chatbot_left, model_left, top_p, temp, max_tokens, no_context],
	[msg, chatbot_left]
	)
	msg.submit(
	model_gen,
	[msg, chatbot_right, model_right, top_p, temp, max_tokens, no_context],
	[msg, chatbot_right]
	)

	# with gradio.Column():
	# gradio.ChatInterface(
	# fn=giga_gen,
	# examples=[{"text": "hello"}, {"text": "hola"}, {"text": "merhaba"}],
	# title="Giga",
	# multimodal=True,
	# )
	# with gradio.Column():
	# gradio.ChatInterface(
	# fn=tiny_gen,
	# examples=[{"text": "hello"}, {"text": "hola"}, {"text": "merhaba"}],
	# title="Tiny",
	# multimodal=True,
	# )
	# with gradio.Column():
	# gradio.Interface(fn=giga_gen, inputs="text", outputs="text", allow_flagging=False, title='Giga') # arena =
	# with gradio.Column():
	# gradio.Interface(fn=tiny_gen, inputs="text", outputs="text", allow_flagging=False, title='TinyLlama') # arena =

	# arena.launch()

	def tab_leaderboard():
	df = pd.DataFrame({
	"Model" : ['A', 'B', 'C',],
	"Test 1" : [0, 1, 0],
	"Test 2" : [1, 0, 1,],
	})

	# Function to apply text color
	def highlight_cols(x):
	df = x.copy()
	# df.loc[:, :] = 'color: purple'
	df[['Model']] = 'color: green'
	return df

	# Applying the style function
	# s = df.style.apply(highlight_cols, axis = None)

	# Displaying the styled dataframe in Gradio
	with gradio.TabItem("Autogen Metrics", elem_id="od-benchmark-tab-table-ablation", id=0, elem_classes="subtab"):
	with gradio.Blocks() as demo:
	gradio.DataFrame(df)

	with gradio.TabItem("Autometrics", elem_id="od-benchmark-tab-table-ablation", id=1, elem_classes="subtab"):
	with gradio.Blocks() as demo:
	gradio.DataFrame(df)

	with gradio.TabItem("SBS metrics", elem_id="od-benchmark-tab-table-ablation", id=2, elem_classes="subtab"):
	with gradio.Blocks() as demo:
	gradio.DataFrame(df)

	with gradio.TabItem("Arena ELO rating", elem_id="od-benchmark-tab-table-ablation", id=3, elem_classes="subtab"):
	with gradio.Blocks() as demo:
	gradio.DataFrame(df)

	def tab_offline_arena():
	# with gradio.Row():
	# btn_show_history = gradio.Button("🎲 Click here to sample an example + a pair of LLM outputs! ", elem_classes="sample_button")
	with gradio.Row():
	with gradio.Column(scale=2):
	with gradio.Accordion("Choose models to sample from", open=False, elem_classes="accordion-label"):
	model_options = list(MODEL_LIB.keys())
	selected_models = gradio.CheckboxGroup(model_options, info="", value=model_options, show_label=False, elem_id="select-models")
	clear_button = gradio.Button("Clear", elem_classes="btn_boderline_gray", scale=1)
	# clear the selected_models
	clear_button.click(lambda: {selected_models: {"value": [], "__type__": "update"}}, inputs=[], outputs=[selected_models])
	with gradio.Column(scale=1):
	with gradio.Accordion("Choose task types to sample from", open=False, elem_classes="accordion-label"):
	select_tasks = gradio.CheckboxGroup(['Task 1', "Task 2", "Task 3"], info="", value=['Task 1', "Task 2", "Task 3"], show_label=False, elem_id="select-tasks")
	clear_task_button = gradio.Button("Clear", elem_classes="btn_boderline_gray", scale=1)
	# clear the select_tasks
	clear_task_button.click(lambda: {select_tasks: {"value": [], "__type__": "update"}}, inputs=[], outputs=[select_tasks])
	with gradio.Accordion("Choose criteria to sample from", open=False, elem_classes="accordion-label"):
	select_tasks = gradio.CheckboxGroup(['Criterion 1', "Criterion 2", "Criterion 3"], info="", value=['Criterion 1', "Criterion 2", "Criterion 3"], show_label=False, elem_id="select-criteria")
	clear_task_button = gradio.Button("Clear", elem_classes="btn_boderline_gray", scale=1)
	# clear the select_tasks
	clear_task_button.click(lambda: {select_tasks: {"value": [], "__type__": "update"}}, inputs=[], outputs=[select_tasks])


	def build_demo():
	# global original_dfs, available_models, gpt4t_dfs, haiku_dfs, llama_dfs

	with gradio.Blocks(theme=gradio.themes.Base(), css=css, js=js_light) as demo:
	# gradio.HTML(BANNER, elem_id="banner")
	# gradio.Markdown(HEADER_MD.replace("{model_num}", str(len(original_dfs["-1"]))), elem_classes="markdown-text")

	with gradio.Tabs(elem_classes="tab-buttons") as tabs:
	with gradio.TabItem("🐼 MERA leaderboard", elem_id="od-benchmark-tab-table", id=0):
	gradio.Markdown(TEST_MD, elem_classes="markdown-text-details")
	tab_leaderboard()

	with gradio.TabItem("🆚 SBS by categories and criteria", elem_id="od-benchmark-tab-table", id=1):
	# gradio.Markdown(TEST_MD, elem_classes="markdown-text-details")
	tab_offline_arena()

	with gradio.TabItem("🥊 Model arena", elem_id="od-benchmark-tab-table", id=2):
	tab_online_arena()
	# _tab_explore()

	with gradio.TabItem("💪 About MERA", elem_id="od-benchmark-tab-table", id=3):
	gradio.Markdown(TEST_MD, elem_classes="markdown-text")
	# gr.Markdown(f"Last updated on {LAST_UPDATED} \| [Link to V1-legacy](https://huggingface.co/spaces/allenai/WildBench-V1-legacy)", elem_classes="markdown-text-small")

	# with gr.Row():
	# with gr.Accordion("📙 Citation", open=False, elem_classes="accordion-label"):
	# gr.Textbox(
	# value=CITATION_TEXT,
	# lines=7,
	# label="Copy the BibTeX snippet to cite this source",
	# elem_id="citation-button",
	# show_copy_button=True)
	# ).style(show_copy_button=True)

	return demo

	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument("--share", action="store_true")
	# parser.add_argument("--bench_table", help="Path to MERA table", default="data_dir/MERA_jun2024.jsonl")
	args = parser.parse_args()
	# data_load(args.result_file)
	# TYPES = ["number", "markdown", "number"]

	with open("test.md", "r") as f:
	TEST_MD = f.read()

	try:
	session = boto3.session.Session()
	S3_SESSION = session.client(
	service_name='s3',
	endpoint_url=os.getenv('S3_ENDPOINT'),
	aws_access_key_id=os.getenv('S3_ACCESS_KEY'),
	aws_secret_access_key=os.getenv('S3_SECRET_KEY'),
	)
	except:
	print('Failed to start s3 session')

	app = build_demo()
	app.launch(share=args.share, height=3000, width="110%") # share=args.share

	# demo = gradio.Interface(fn=gen, inputs="text", outputs="text")
	# demo.launch()