sumuks's picture
sumuks HF Staff
Update app.py
0043673 verified
raw
history blame
1.44 kB
import gradio as gr
from datasets import load_dataset
DATASET_NAME = "sumuks/fineweb-10BT-annotated"
SPLIT = "train"
SCORE_COLUMN = "score"
TEXT_COLUMN = "text"
ID_COLUMN = "id"
# Load the dataset once when the app starts
try:
dataset = load_dataset(DATASET_NAME, split=SPLIT)
except Exception as e:
dataset = None
load_error = str(e)
else:
load_error = None
def get_examples_by_score(score: int, n_examples: int = 5):
if dataset is None:
return [f"Dataset could not be loaded: {load_error}"]
subset = dataset.filter(lambda x: x.get(SCORE_COLUMN) == score)
n = min(len(subset), n_examples)
examples = []
for item in subset.select(range(n)):
text = item.get(TEXT_COLUMN, "")
examples.append(text)
if not examples:
examples.append("No examples found for this score")
return examples
def build_tabs():
tabs = []
with gr.Tab("About"):
gr.Markdown(
f"# Dataset Inspector\nUsing dataset `{DATASET_NAME}`\n"
)
if load_error:
gr.Markdown(f"**Dataset failed to load:** {load_error}")
for score in range(6):
with gr.Tab(f"Score {score}"):
examples = get_examples_by_score(score, 2)
for i, example in enumerate(examples):
gr.Markdown(f"### Example {i+1}\n{example}")
return tabs
with gr.Blocks(theme="default") as demo:
build_tabs()
demo.launch()