Ben Burtenshaw commited on
Commit
7c4fb72
·
1 Parent(s): 1fdaf11

feat: remove column casting type inference

Browse files
Files changed (2) hide show
  1. app.py +102 -61
  2. src/argilla_utils.py +14 -31
app.py CHANGED
@@ -5,7 +5,6 @@ from src import dataset
5
  from src import spaces
6
 
7
 
8
-
9
  def refresh_dataset_settings_view(
10
  columns,
11
  question_columns,
@@ -133,73 +132,115 @@ with gr.Blocks() as app:
133
  )
134
 
135
  # Field columns
136
- field_columns_view = gr.Dropdown(
137
- label="Field Columns",
138
- info="Columns to be used as fields in the Argilla dataset",
139
- choices=dataset.load_columns(),
140
- multiselect=True,
141
- value=dataset.get_field_columns(),
142
- allow_custom_value=True,
143
- )
144
- field_columns_view.change(
145
- fn=lambda value: gr.update(choices=dataset.load_columns()),
146
- inputs=[field_columns_view],
147
- outputs=[field_columns_view],
148
- )
 
149
 
150
  # Question columns
151
- question_columns_view = gr.Dropdown(
152
- label="Question Columns",
153
- info="Columns to be used as question suggestions in the Argilla dataset",
154
- choices=dataset.load_columns(),
155
- multiselect=True,
156
- value=dataset.get_field_columns(),
157
- allow_custom_value=True,
158
- )
159
 
160
- question_columns_view.change(
161
- fn=lambda value: gr.update(choices=dataset.load_columns()),
162
- inputs=[question_columns_view],
163
- outputs=[question_columns_view],
164
- )
165
-
166
- with gr.Accordion(label="Define New Questions", open=False):
167
- with gr.Group():
168
- with gr.Column():
169
- question_type = gr.Dropdown(
170
- label="Question Type",
171
- info="The type of question to be added to the Argilla dataset",
172
- choices=["Text", "Label", "Rating"],
173
- )
174
- with gr.Column():
175
- question_name = gr.Textbox(
176
- label="Question Name",
177
- info="The name of the question to be added to the Argilla dataset",
178
- )
179
- with gr.Column():
180
- gr.Button(value="Add Question").click(
181
- fn=lambda type, name, questions: questions
182
- + [(type, name)],
183
- inputs=[
184
- question_type,
185
- question_name,
186
- question_columns_view,
187
- ],
188
- outputs=[question_columns_view],
189
- )
190
-
191
- with gr.Accordion(label="Define Metadata and Vectors", open=False):
192
- metadata_columns_view = gr.Dropdown(
193
- label="Metadata Columns",
194
- info="Columns to be used as metadata in the Argilla dataset",
195
  choices=dataset.load_columns(),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  multiselect=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  )
198
- vector_columns_view = gr.Dropdown(
199
- label="Vector Columns",
200
- info="Columns to be used as vectors in the Argilla dataset",
 
 
 
 
 
 
 
 
 
201
  choices=dataset.load_columns(),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  multiselect=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  )
204
 
205
  n_records = gr.Slider(1, 10000, 100, label="Number of Records")
@@ -258,7 +299,7 @@ with gr.Blocks() as app:
258
  field_columns_view,
259
  question_columns_view,
260
  metadata_columns_view,
261
- vector_columns_view,
262
  ],
263
  outputs=[records_view, mapping],
264
  )
 
5
  from src import spaces
6
 
7
 
 
8
  def refresh_dataset_settings_view(
9
  columns,
10
  question_columns,
 
132
  )
133
 
134
  # Field columns
135
+ with gr.Accordion(label="Fields", open=True):
136
+ field_columns_view = gr.Dropdown(
137
+ label="Column",
138
+ info="Columns to be used as fields in the Argilla dataset",
139
+ choices=dataset.load_columns(),
140
+ multiselect=True,
141
+ value=dataset.get_field_columns(),
142
+ allow_custom_value=True,
143
+ )
144
+ field_columns_view.change(
145
+ fn=lambda value: gr.update(value=[]),
146
+ inputs=[field_columns_view],
147
+ outputs=[field_columns_view],
148
+ )
149
 
150
  # Question columns
 
 
 
 
 
 
 
 
151
 
152
+ with gr.Accordion(label="Questions", open=True):
153
+ question_type = gr.Dropdown(
154
+ label="Type",
155
+ info="The type of question to be added to the Argilla dataset",
156
+ choices=["Text", "Label", "Rating"],
157
+ )
158
+ question_column = gr.Dropdown(
159
+ label="Column",
160
+ info="Column in the hub dataset to be used as question suggestions in the Argilla dataset",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  choices=dataset.load_columns(),
162
+ allow_custom_value=True,
163
+ )
164
+
165
+ question_name = gr.Textbox(
166
+ label="Name",
167
+ info="The name of the question to be added to the Argilla dataset",
168
+ )
169
+ question_column.select(
170
+ fn=lambda value: value,
171
+ inputs=[question_column],
172
+ outputs=[question_name],
173
+ )
174
+ add_question_btn = gr.Button(value="Add Question")
175
+ question_columns_view = gr.Dropdown(
176
+ label="Question Columns",
177
+ info="Columns to be used as question suggestions in the Argilla dataset",
178
  multiselect=True,
179
+ allow_custom_value=True,
180
+ value=[],
181
+ )
182
+
183
+ # question_columns_view.change(
184
+ # fn=lambda value: gr.update(value=[]),
185
+ # inputs=[question_columns_view],
186
+ # outputs=[question_columns_view],
187
+ # )
188
+
189
+ add_question_btn.click(
190
+ fn=lambda type, name, column, questions: questions
191
+ + [(type, name, column)],
192
+ inputs=[
193
+ question_type,
194
+ question_name,
195
+ question_column,
196
+ question_columns_view,
197
+ ],
198
+ outputs=[question_columns_view],
199
  )
200
+
201
+ # Metadata columns
202
+
203
+ with gr.Accordion(label="Metadata", open=True):
204
+ metadata_type = gr.Dropdown(
205
+ label="Type",
206
+ info="The type of metadata to be added to the Argilla dataset",
207
+ choices=["Integer", "Float", "Term"],
208
+ )
209
+ metadata_column = gr.Dropdown(
210
+ label="Column",
211
+ info="Column in the hub dataset to be used as metadata suggestions in the Argilla dataset",
212
  choices=dataset.load_columns(),
213
+ allow_custom_value=True,
214
+ )
215
+
216
+ metadata_name = gr.Textbox(
217
+ label="Name",
218
+ info="The name of the metadata to be added to the Argilla dataset",
219
+ )
220
+ metadata_column.select(
221
+ fn=lambda value: value,
222
+ inputs=[metadata_column],
223
+ outputs=[question_name],
224
+ )
225
+ add_metadata_btn = gr.Button(value="Add Metadata")
226
+ metadata_columns_view = gr.Dropdown(
227
+ label="Metadata Columns",
228
+ info="Columns to be used as metadata suggestions in the Argilla dataset",
229
  multiselect=True,
230
+ allow_custom_value=True,
231
+ value=[],
232
+ )
233
+
234
+ add_metadata_btn.click(
235
+ fn=lambda type, name, column, metadata: metadata
236
+ + [(type, name, column)],
237
+ inputs=[
238
+ metadata_type,
239
+ metadata_name,
240
+ metadata_column,
241
+ metadata_columns_view,
242
+ ],
243
+ outputs=[metadata_columns_view],
244
  )
245
 
246
  n_records = gr.Slider(1, 10000, 100, label="Number of Records")
 
299
  field_columns_view,
300
  question_columns_view,
301
  metadata_columns_view,
302
+ # vector_columns_view,
303
  ],
304
  outputs=[records_view, mapping],
305
  )
src/argilla_utils.py CHANGED
@@ -11,13 +11,14 @@ from src.dataset import (
11
  is_float,
12
  get_feature_values,
13
  get_feature_labels,
 
14
  )
15
 
16
  client = rg.Argilla(api_url="http://localhost:6900", api_key="owner.apikey")
17
 
18
 
19
  def define_dataset_setting(
20
- dataset_name, field_columns, question_columns, metadata_columns, vector_columns
21
  ):
22
  split = load_split()
23
 
@@ -31,17 +32,7 @@ def define_dataset_setting(
31
  mapping[column_name] = field_column_name
32
 
33
  # Add question columns
34
- for column_name in question_columns:
35
- if isinstance(column_name, (list, tuple)):
36
- question_type, column_name = column_name
37
- elif is_label(split, column_name):
38
- question_type = "Label"
39
- elif is_rating(split, column_name):
40
- question_type = "Rating"
41
- else:
42
- question_type = "Text"
43
-
44
- question_column_name = f"{column_name}_question"
45
  if question_type == "Label":
46
  values = get_feature_values(split, column_name)
47
  titles = get_feature_labels(split, column_name)
@@ -63,29 +54,21 @@ def define_dataset_setting(
63
  if not metadata_columns:
64
  metadata_columns = []
65
 
66
- for column_name in metadata_columns:
67
- metadata_column_name = f"{column_name}_metadata"
68
- if is_int(split, column_name):
69
- metadata.append(rg.IntegerMetadataProperty(name=metadata_column_name))
70
- elif is_float(split, column_name):
71
- metadata.append(rg.FloatMetadataProperty(name=metadata_column_name))
72
- elif is_label:
73
  values = list(map(str, get_feature_values(split, column_name)))
74
  metadata.append(
75
- rg.TermsMetadataProperty(name=metadata_column_name, options=values)
76
  )
77
- mapping[column_name] = metadata_column_name
78
-
79
- # Add vector columns
80
- if not vector_columns:
81
- vector_columns = []
82
-
83
- for column_name in vector_columns:
84
- vectors.append(rg.VectorField(name=column_name))
85
 
86
- settings = rg.Settings(
87
- fields=fields, questions=questions, metadata=metadata, vectors=vectors
88
- )
89
 
90
  dataset = rg.Dataset(name=dataset_name, settings=settings, client=client)
91
 
 
11
  is_float,
12
  get_feature_values,
13
  get_feature_labels,
14
+ load_repo_id,
15
  )
16
 
17
  client = rg.Argilla(api_url="http://localhost:6900", api_key="owner.apikey")
18
 
19
 
20
  def define_dataset_setting(
21
+ dataset_name, field_columns, question_columns, metadata_columns
22
  ):
23
  split = load_split()
24
 
 
32
  mapping[column_name] = field_column_name
33
 
34
  # Add question columns
35
+ for question_type, question_column_name, column_name in question_columns:
 
 
 
 
 
 
 
 
 
 
36
  if question_type == "Label":
37
  values = get_feature_values(split, column_name)
38
  titles = get_feature_labels(split, column_name)
 
54
  if not metadata_columns:
55
  metadata_columns = []
56
 
57
+ for metadata_type, metadata_name, column_name in metadata_columns:
58
+ if metadata_type == "Integer":
59
+ metadata.append(rg.IntegerMetadataProperty(name=metadata_name))
60
+ elif metadata_type == "Float":
61
+ metadata.append(rg.FloatMetadataProperty(name=metadata_name))
62
+ elif metadata_type == "Term":
 
63
  values = list(map(str, get_feature_values(split, column_name)))
64
  metadata.append(
65
+ rg.TermsMetadataProperty(name=metadata_name, options=values)
66
  )
67
+ if column_name in mapping:
68
+ column_name = f"{column_name}__"
69
+ mapping[column_name] = metadata_name
 
 
 
 
 
70
 
71
+ settings = rg.Settings(fields=fields, questions=questions, metadata=metadata)
 
 
72
 
73
  dataset = rg.Dataset(name=dataset_name, settings=settings, client=client)
74