Spaces:

yourbench
/

visualize-expert-level-filter

Running

sumuks HF Staff

Update app.py

0043673 verified 21 days ago

1.44 kB

	import gradio as gr
	from datasets import load_dataset

	DATASET_NAME = "sumuks/fineweb-10BT-annotated"
	SPLIT = "train"

	SCORE_COLUMN = "score"
	TEXT_COLUMN = "text"
	ID_COLUMN = "id"

	# Load the dataset once when the app starts
	try:
	dataset = load_dataset(DATASET_NAME, split=SPLIT)
	except Exception as e:
	dataset = None
	load_error = str(e)
	else:
	load_error = None


	def get_examples_by_score(score: int, n_examples: int = 5):
	if dataset is None:
	return [f"Dataset could not be loaded: {load_error}"]
	subset = dataset.filter(lambda x: x.get(SCORE_COLUMN) == score)
	n = min(len(subset), n_examples)
	examples = []
	for item in subset.select(range(n)):
	text = item.get(TEXT_COLUMN, "")
	examples.append(text)
	if not examples:
	examples.append("No examples found for this score")
	return examples


	def build_tabs():
	tabs = []
	with gr.Tab("About"):
	gr.Markdown(
	f"# Dataset Inspector\nUsing dataset `{DATASET_NAME}`\n"
	)
	if load_error:
	gr.Markdown(f"Dataset failed to load: {load_error}")
	for score in range(6):
	with gr.Tab(f"Score {score}"):
	examples = get_examples_by_score(score, 2)
	for i, example in enumerate(examples):
	gr.Markdown(f"### Example {i+1}\n{example}")
	return tabs


	with gr.Blocks(theme="default") as demo:
	build_tabs()

	demo.launch()