Spaces:
Running
Running
import gradio as gr | |
from datasets import load_dataset | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
# ---------- CONFIGURATION ---------- # | |
DATASET_NAME = "ag_news" | |
LABEL_MAPPING = { | |
0: "World", | |
1: "Sports", | |
2: "Business", | |
3: "Science/Technology" | |
} | |
SPLIT = "train[:1000]" # For demo purposes, only use a subset | |
# ---------- DATA LOADING ---------- # | |
def load_data(): | |
dataset = load_dataset(DATASET_NAME, split=SPLIT) | |
df = pd.DataFrame(dataset) | |
df["label_name"] = df["label"].map(LABEL_MAPPING) | |
return df | |
df = load_data() | |
# ---------- APP FUNCTIONALITY ---------- # | |
def get_data_preview(n_rows: int = 5) -> pd.DataFrame: | |
"""Return the top n rows of the dataset.""" | |
return df.head(n_rows) | |
def get_label_distribution_plot(): | |
"""Return a bar chart of label distribution.""" | |
counts = df["label_name"].value_counts().sort_index() | |
fig, ax = plt.subplots() | |
counts.plot(kind="bar", ax=ax, color="#4C72B0") | |
ax.set_title("Label Distribution") | |
ax.set_ylabel("Number of Samples") | |
ax.set_xlabel("Category") | |
ax.grid(axis="y", linestyle="--", alpha=0.7) | |
plt.tight_layout() | |
return fig | |
# ---------- UI LAYOUT ---------- # | |
with gr.Blocks(title="AG News Dataset Explorer") as demo: | |
gr.Markdown(""" | |
# π§ AG News Dataset Explorer | |
Explore a cleaned and labeled version of the AG News dataset using Hugging Face `datasets`. | |
- View sample records | |
- Understand label distribution | |
- Great for data engineers & NLP practitioners | |
""") | |
with gr.Tab("π Data Preview"): | |
with gr.Row(): | |
n_slider = gr.Slider(1, 20, value=5, label="Number of rows to preview") | |
preview_btn = gr.Button("Show Preview") | |
preview_table = gr.Dataframe(label="Sample Rows") | |
preview_btn.click(get_data_preview, inputs=n_slider, outputs=preview_table) | |
with gr.Tab("π Label Distribution"): | |
dist_btn = gr.Button("Generate Distribution Plot") | |
dist_plot = gr.Plot(label="Label Distribution") | |
dist_btn.click(get_label_distribution_plot, outputs=dist_plot) | |
gr.Markdown(""" | |
--- | |
π **Dataset**: [AG News on Hugging Face](https://huggingface.co/datasets/ag_news) | |
β¨ | |
""") | |
# ---------- LAUNCH ---------- # | |
demo.launch() | |