File size: 2,294 Bytes
940863d
01cde28
 
940863d
01cde28
4c84b65
 
 
940863d
 
 
4c84b65
940863d
4c84b65
 
 
 
 
 
 
 
940863d
4c84b65
 
 
 
 
940863d
 
4c84b65
 
 
940863d
4c84b65
940863d
4c84b65
940863d
4c84b65
 
940863d
 
4c84b65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
940863d
4c84b65
 
 
 
940863d
4c84b65
 
 
3241f6d
4c84b65
940863d
4c84b65
940863d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import gradio as gr
from datasets import load_dataset
import pandas as pd
import matplotlib.pyplot as plt

# ---------- CONFIGURATION ---------- #
DATASET_NAME = "ag_news"
LABEL_MAPPING = {
    0: "World",
    1: "Sports",
    2: "Business",
    3: "Science/Technology"
}
SPLIT = "train[:1000]"  # For demo purposes, only use a subset

# ---------- DATA LOADING ---------- #
def load_data():
    dataset = load_dataset(DATASET_NAME, split=SPLIT)
    df = pd.DataFrame(dataset)
    df["label_name"] = df["label"].map(LABEL_MAPPING)
    return df

df = load_data()

# ---------- APP FUNCTIONALITY ---------- #
def get_data_preview(n_rows: int = 5) -> pd.DataFrame:
    """Return the top n rows of the dataset."""
    return df.head(n_rows)

def get_label_distribution_plot():
    """Return a bar chart of label distribution."""
    counts = df["label_name"].value_counts().sort_index()
    fig, ax = plt.subplots()
    counts.plot(kind="bar", ax=ax, color="#4C72B0")
    ax.set_title("Label Distribution")
    ax.set_ylabel("Number of Samples")
    ax.set_xlabel("Category")
    ax.grid(axis="y", linestyle="--", alpha=0.7)
    plt.tight_layout()
    return fig

# ---------- UI LAYOUT ---------- #
with gr.Blocks(title="AG News Dataset Explorer") as demo:
    gr.Markdown("""
    # 🧠 AG News Dataset Explorer  
    Explore a cleaned and labeled version of the AG News dataset using Hugging Face `datasets`.

    - View sample records  
    - Understand label distribution  
    - Great for data engineers & NLP practitioners  
    """)

    with gr.Tab("πŸ“„ Data Preview"):
        with gr.Row():
            n_slider = gr.Slider(1, 20, value=5, label="Number of rows to preview")
            preview_btn = gr.Button("Show Preview")
        preview_table = gr.Dataframe(label="Sample Rows")
        preview_btn.click(get_data_preview, inputs=n_slider, outputs=preview_table)

    with gr.Tab("πŸ“Š Label Distribution"):
        dist_btn = gr.Button("Generate Distribution Plot")
        dist_plot = gr.Plot(label="Label Distribution")
        dist_btn.click(get_label_distribution_plot, outputs=dist_plot)

    gr.Markdown("""
    ---
    πŸ”— **Dataset**: [AG News on Hugging Face](https://huggingface.co/datasets/ag_news)  
    ✨
    """)

# ---------- LAUNCH ---------- #
demo.launch()