TKM03 commited on
Commit
4c84b65
Β·
verified Β·
1 Parent(s): 940863d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -29
app.py CHANGED
@@ -3,48 +3,70 @@ from datasets import load_dataset
3
  import pandas as pd
4
  import matplotlib.pyplot as plt
5
 
6
- # Load a dataset (you can change this to any HF dataset)
7
- dataset = load_dataset("ag_news", split="train[:1000]")
8
-
9
- # Convert to DataFrame
10
- df = pd.DataFrame(dataset)
11
-
12
- # Label map for better readability
13
- label_map = {
14
  0: "World",
15
  1: "Sports",
16
  2: "Business",
17
- 3: "Sci/Tech"
18
  }
19
- df["label_name"] = df["label"].map(label_map)
 
 
 
 
 
 
 
20
 
21
- def preview_data(n_rows):
 
 
 
 
22
  return df.head(n_rows)
23
 
24
- def plot_distribution():
25
- counts = df["label_name"].value_counts()
 
26
  fig, ax = plt.subplots()
27
- counts.plot(kind="bar", ax=ax, color="skyblue")
28
  ax.set_title("Label Distribution")
29
- ax.set_ylabel("Count")
30
  ax.set_xlabel("Category")
 
 
31
  return fig
32
 
33
- with gr.Blocks() as demo:
34
- gr.Markdown("# 🧠 AG News Dataset Explorer")
35
- gr.Markdown("Explore the AG News dataset from Hugging Face. Useful for data engineers and NLP practitioners.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- with gr.Row():
38
- num_slider = gr.Slider(1, 20, value=5, label="Number of Rows")
39
- data_output = gr.Dataframe()
40
-
41
- show_data_btn = gr.Button("Show Data")
42
- show_data_btn.click(preview_data, inputs=[num_slider], outputs=[data_output])
43
 
44
- gr.Markdown("## πŸ“Š Class Distribution")
45
- dist_btn = gr.Button("Show Distribution Chart")
46
- chart_output = gr.Plot()
47
- dist_btn.click(plot_distribution, outputs=[chart_output])
 
48
 
49
- # Launch app
50
  demo.launch()
 
3
  import pandas as pd
4
  import matplotlib.pyplot as plt
5
 
6
+ # ---------- CONFIGURATION ---------- #
7
+ DATASET_NAME = "ag_news"
8
+ LABEL_MAPPING = {
 
 
 
 
 
9
  0: "World",
10
  1: "Sports",
11
  2: "Business",
12
+ 3: "Science/Technology"
13
  }
14
+ SPLIT = "train[:1000]" # For demo purposes, only use a subset
15
+
16
+ # ---------- DATA LOADING ---------- #
17
+ def load_data():
18
+ dataset = load_dataset(DATASET_NAME, split=SPLIT)
19
+ df = pd.DataFrame(dataset)
20
+ df["label_name"] = df["label"].map(LABEL_MAPPING)
21
+ return df
22
 
23
+ df = load_data()
24
+
25
+ # ---------- APP FUNCTIONALITY ---------- #
26
+ def get_data_preview(n_rows: int = 5) -> pd.DataFrame:
27
+ """Return the top n rows of the dataset."""
28
  return df.head(n_rows)
29
 
30
+ def get_label_distribution_plot():
31
+ """Return a bar chart of label distribution."""
32
+ counts = df["label_name"].value_counts().sort_index()
33
  fig, ax = plt.subplots()
34
+ counts.plot(kind="bar", ax=ax, color="#4C72B0")
35
  ax.set_title("Label Distribution")
36
+ ax.set_ylabel("Number of Samples")
37
  ax.set_xlabel("Category")
38
+ ax.grid(axis="y", linestyle="--", alpha=0.7)
39
+ plt.tight_layout()
40
  return fig
41
 
42
+ # ---------- UI LAYOUT ---------- #
43
+ with gr.Blocks(title="AG News Dataset Explorer") as demo:
44
+ gr.Markdown("""
45
+ # 🧠 AG News Dataset Explorer
46
+ Explore a cleaned and labeled version of the AG News dataset using Hugging Face `datasets`.
47
+
48
+ - View sample records
49
+ - Understand label distribution
50
+ - Great for data engineers & NLP practitioners
51
+ """)
52
+
53
+ with gr.Tab("πŸ“„ Data Preview"):
54
+ with gr.Row():
55
+ n_slider = gr.Slider(1, 20, value=5, label="Number of rows to preview")
56
+ preview_btn = gr.Button("Show Preview")
57
+ preview_table = gr.Dataframe(label="Sample Rows")
58
+ preview_btn.click(get_data_preview, inputs=n_slider, outputs=preview_table)
59
 
60
+ with gr.Tab("πŸ“Š Label Distribution"):
61
+ dist_btn = gr.Button("Generate Distribution Plot")
62
+ dist_plot = gr.Plot(label="Label Distribution")
63
+ dist_btn.click(get_label_distribution_plot, outputs=dist_plot)
 
 
64
 
65
+ gr.Markdown("""
66
+ ---
67
+ πŸ”— **Dataset**: [AG News on Hugging Face](https://huggingface.co/datasets/ag_news)
68
+ ✨ Built by a data engineering enthusiast using Python, Gradio, and Hugging Face.
69
+ """)
70
 
71
+ # ---------- LAUNCH ---------- #
72
  demo.launch()