Spaces:

yourbench
/

visualize-expert-level-filter

Running

File size: 7,181 Bytes

import gradio as gr
from datasets import load_dataset
import random

# Available datasets
DATASETS = {
    "Main Dataset": "sumuks/fineweb-10BT-annotated",
    "Ablation Dataset": "sumuks/fineweb-10BT-annotated-ablation-1"
}

SPLIT = "train"

# Column names (from build.py)
SCORE_COLUMN = "score"
TEXT_COLUMN = "text"
ID_COLUMN = "id"
SUMMARY_COLUMN = "summary"
JUSTIFICATION_COLUMN = "justification"
THINKING_COLUMN = "thinking"
MODEL_COLUMN = "annotation_model"
DATE_COLUMN = "annotation_date"

# Global state
current_dataset = None
dataset_name = None
seen_ids = set()

def load_selected_dataset(selected_dataset):
    global current_dataset, dataset_name, seen_ids
    dataset_name = DATASETS[selected_dataset]
    seen_ids = set()  # Reset seen examples when switching datasets
    
    try:
        current_dataset = load_dataset(dataset_name, split=SPLIT)
        return f"✅ Loaded {len(current_dataset)} examples from {dataset_name}"
    except Exception as e:
        current_dataset = None
        return f"❌ Failed to load {dataset_name}: {str(e)}"

def get_examples_by_score(score: int, n_examples: int = 5, show_details: bool = False):
    if current_dataset is None:
        return "Please select and load a dataset first."
    
    subset = current_dataset.filter(lambda x: x.get(SCORE_COLUMN) == score)
    if len(subset) == 0:
        return "No examples found for this score."
    
    n = min(len(subset), n_examples)
    examples_text = []
    
    # Randomly sample indices instead of taking the first n
    total_available = len(subset)
    random_indices = random.sample(range(total_available), n)
    
    for idx in random_indices:
        item = subset[idx]
        example_id = item.get(ID_COLUMN, "Unknown")
        text = item.get(TEXT_COLUMN, "")
        summary = item.get(SUMMARY_COLUMN, "")
        justification = item.get(JUSTIFICATION_COLUMN, "")
        thinking = item.get(THINKING_COLUMN, "")
        model = item.get(MODEL_COLUMN, "")
        date = item.get(DATE_COLUMN, "")
        
        # Build the example display
        example_display = f"**Document ID:** {example_id}\n\n"
        
        if show_details and summary:
            example_display += f"**Summary:** {summary}\n\n"
        
        if show_details and justification:
            example_display += f"**Justification:** {justification}\n\n"
        
        if show_details and thinking:
            example_display += f"**Thinking Process:** {thinking}\n\n"
        
        if show_details and model:
            example_display += f"**Model:** {model} | **Date:** {date}\n\n"
        
        example_display += f"**Text:**\n{text}\n\n---\n"
        examples_text.append(example_display)
    
    return "\n".join(examples_text)

def get_random_unseen_example(show_details: bool = False):
    if current_dataset is None:
        return "Please select and load a dataset first."
    
    # Get all IDs we haven't seen
    all_ids = set(current_dataset[ID_COLUMN])
    unseen_ids = all_ids - seen_ids
    
    if not unseen_ids:
        # Reset if we've seen everything
        seen_ids.clear()
        unseen_ids = all_ids
        if not unseen_ids:
            return "No examples available in dataset."
    
    # Pick random unseen ID
    random_id = random.choice(list(unseen_ids))
    seen_ids.add(random_id)
    
    # Find the item with this ID
    item_idx = current_dataset[ID_COLUMN].index(random_id)
    item = current_dataset[item_idx]
    
    # Extract data
    text = item.get(TEXT_COLUMN, "")
    score = item.get(SCORE_COLUMN, "N/A")
    summary = item.get(SUMMARY_COLUMN, "")
    justification = item.get(JUSTIFICATION_COLUMN, "")
    thinking = item.get(THINKING_COLUMN, "")
    model = item.get(MODEL_COLUMN, "")
    date = item.get(DATE_COLUMN, "")
    
    # Build display
    display = f"**Document ID:** {random_id} | **Score:** {score}\n\n"
    
    if show_details and summary:
        display += f"**Summary:** {summary}\n\n"
    
    if show_details and justification:
        display += f"**Justification:** {justification}\n\n"
    
    if show_details and thinking:
        display += f"**Thinking Process:** {thinking}\n\n"
    
    if show_details and model:
        display += f"**Model:** {model} | **Date:** {date}\n\n"
    
    display += f"**Text:**\n{text}"
    
    return display

def build_interface():
    with gr.Blocks(theme="default", title="Dataset Inspector") as demo:
        gr.Markdown("# 📊 Expert Content Classification Dataset Inspector")
        
        with gr.Row():
            with gr.Column(scale=2):
                dataset_dropdown = gr.Dropdown(
                    choices=list(DATASETS.keys()),
                    label="Select Dataset",
                    value="Main Dataset"
                )
            with gr.Column(scale=1):
                load_btn = gr.Button("Load Dataset", variant="primary")
        
        status_display = gr.Markdown("")
        
        with gr.Row():
            show_details_global = gr.Checkbox(
                label="Show annotation details (summary, justification, thinking)",
                value=False
            )
        
        with gr.Tabs():
            # Random sampling tab
            with gr.Tab("🎲 Random Sampling"):
                gr.Markdown("Sample random examples you haven't seen before")
                with gr.Row():
                    sample_btn = gr.Button("Get Random Example", variant="secondary", size="lg")
                random_output = gr.Markdown("")
            
            # Score-based browsing tabs
            for score in range(6):
                with gr.Tab(f"⭐ Score {score}"):
                    gr.Markdown(f"Browse examples with quality score {score}")
                    with gr.Row():
                        n_examples = gr.Slider(
                            minimum=1,
                            maximum=20,
                            value=3,
                            step=1,
                            label="Number of examples"
                        )
                        show_btn = gr.Button(f"Show Score {score} Examples", variant="secondary")
                    
                    score_output = gr.Markdown("")
                    
                    # Set up the click handler for this score
                    show_btn.click(
                        fn=lambda n, details, s=score: get_examples_by_score(s, n, details),
                        inputs=[n_examples, show_details_global],
                        outputs=score_output
                    )
        
        # Event handlers
        load_btn.click(
            fn=load_selected_dataset,
            inputs=dataset_dropdown,
            outputs=status_display
        )
        
        sample_btn.click(
            fn=get_random_unseen_example,
            inputs=show_details_global,
            outputs=random_output
        )
        
        # Load default dataset on startup
        demo.load(
            fn=lambda: load_selected_dataset("Main Dataset"),
            outputs=status_display
        )
    
    return demo

if __name__ == "__main__":
    demo = build_interface()
    demo.launch()