Spaces:
Running
Running
File size: 2,294 Bytes
940863d 01cde28 940863d 01cde28 4c84b65 940863d 4c84b65 940863d 4c84b65 940863d 4c84b65 940863d 4c84b65 940863d 4c84b65 940863d 4c84b65 940863d 4c84b65 940863d 4c84b65 940863d 4c84b65 940863d 4c84b65 3241f6d 4c84b65 940863d 4c84b65 940863d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import gradio as gr
from datasets import load_dataset
import pandas as pd
import matplotlib.pyplot as plt
# ---------- CONFIGURATION ---------- #
DATASET_NAME = "ag_news"
LABEL_MAPPING = {
0: "World",
1: "Sports",
2: "Business",
3: "Science/Technology"
}
SPLIT = "train[:1000]" # For demo purposes, only use a subset
# ---------- DATA LOADING ---------- #
def load_data():
dataset = load_dataset(DATASET_NAME, split=SPLIT)
df = pd.DataFrame(dataset)
df["label_name"] = df["label"].map(LABEL_MAPPING)
return df
df = load_data()
# ---------- APP FUNCTIONALITY ---------- #
def get_data_preview(n_rows: int = 5) -> pd.DataFrame:
"""Return the top n rows of the dataset."""
return df.head(n_rows)
def get_label_distribution_plot():
"""Return a bar chart of label distribution."""
counts = df["label_name"].value_counts().sort_index()
fig, ax = plt.subplots()
counts.plot(kind="bar", ax=ax, color="#4C72B0")
ax.set_title("Label Distribution")
ax.set_ylabel("Number of Samples")
ax.set_xlabel("Category")
ax.grid(axis="y", linestyle="--", alpha=0.7)
plt.tight_layout()
return fig
# ---------- UI LAYOUT ---------- #
with gr.Blocks(title="AG News Dataset Explorer") as demo:
gr.Markdown("""
# π§ AG News Dataset Explorer
Explore a cleaned and labeled version of the AG News dataset using Hugging Face `datasets`.
- View sample records
- Understand label distribution
- Great for data engineers & NLP practitioners
""")
with gr.Tab("π Data Preview"):
with gr.Row():
n_slider = gr.Slider(1, 20, value=5, label="Number of rows to preview")
preview_btn = gr.Button("Show Preview")
preview_table = gr.Dataframe(label="Sample Rows")
preview_btn.click(get_data_preview, inputs=n_slider, outputs=preview_table)
with gr.Tab("π Label Distribution"):
dist_btn = gr.Button("Generate Distribution Plot")
dist_plot = gr.Plot(label="Label Distribution")
dist_btn.click(get_label_distribution_plot, outputs=dist_plot)
gr.Markdown("""
---
π **Dataset**: [AG News on Hugging Face](https://huggingface.co/datasets/ag_news)
β¨
""")
# ---------- LAUNCH ---------- #
demo.launch()
|