sumuks HF Staff commited on
Commit
641f594
·
verified ·
1 Parent(s): 8e7fb97

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -0
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from datasets import load_dataset
3
+
4
+ DATASET_NAME = "sumuks/fineweb-10BT-annotated"
5
+ SPLIT = "train"
6
+
7
+ SCORE_COLUMN = "score"
8
+ TEXT_COLUMN = "text"
9
+ ID_COLUMN = "id"
10
+
11
+ # Load the dataset once when the app starts
12
+ try:
13
+ dataset = load_dataset(DATASET_NAME, split=SPLIT)
14
+ except Exception as e:
15
+ dataset = None
16
+ load_error = str(e)
17
+ else:
18
+ load_error = None
19
+
20
+
21
+ def get_examples_by_score(score: int, n_examples: int = 5):
22
+ if dataset is None:
23
+ return [f"Dataset could not be loaded: {load_error}"]
24
+ subset = dataset.filter(lambda x: x.get(SCORE_COLUMN) == score)
25
+ n = min(len(subset), n_examples)
26
+ examples = []
27
+ for item in subset.select(range(n)):
28
+ text = item.get(TEXT_COLUMN, "")
29
+ examples.append(text)
30
+ if not examples:
31
+ examples.append("No examples found for this score")
32
+ return examples
33
+
34
+
35
+ def build_tabs():
36
+ tabs = []
37
+ with gr.Tab("About"):
38
+ gr.Markdown(
39
+ f"# Dataset Inspector\nUsing dataset `{DATASET_NAME}`\n"
40
+ )
41
+ if load_error:
42
+ gr.Markdown(f"**Dataset failed to load:** {load_error}")
43
+ for score in range(6):
44
+ with gr.Tab(f"Score {score}"):
45
+ examples = get_examples_by_score(score, 2)
46
+ for i, example in enumerate(examples):
47
+ gr.Markdown(f"### Example {i+1}\n{example}")
48
+ return tabs
49
+
50
+
51
+ with gr.Blocks(theme="default") as demo:
52
+ build_tabs()
53
+ demo.launch()