Spaces:
Runtime error
Runtime error
Commit
Β·
7314f90
1
Parent(s):
7835b60
feat: add login css
Browse files
src/distilabel_dataset_generator/apps/sft.py
CHANGED
|
@@ -33,7 +33,10 @@ from src.distilabel_dataset_generator.pipelines.sft import (
|
|
| 33 |
get_response_generator,
|
| 34 |
)
|
| 35 |
from src.distilabel_dataset_generator.utils import (
|
|
|
|
|
|
|
| 36 |
get_org_dropdown,
|
|
|
|
| 37 |
)
|
| 38 |
|
| 39 |
|
|
@@ -341,77 +344,78 @@ def push_dataset_to_argilla(
|
|
| 341 |
return ""
|
| 342 |
|
| 343 |
|
| 344 |
-
with gr.Blocks() as app:
|
| 345 |
-
gr.
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
with gr.
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
examples=
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
|
|
|
| 367 |
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
|
| 416 |
gr.on(
|
| 417 |
triggers=[load_btn.click, btn_apply_to_sample_dataset.click],
|
|
@@ -457,4 +461,5 @@ with gr.Blocks() as app:
|
|
| 457 |
inputs=[org_name, repo_name],
|
| 458 |
outputs=[success_message],
|
| 459 |
)
|
|
|
|
| 460 |
app.load(fn=get_org_dropdown, outputs=[org_name])
|
|
|
|
| 33 |
get_response_generator,
|
| 34 |
)
|
| 35 |
from src.distilabel_dataset_generator.utils import (
|
| 36 |
+
_LOGGED_OUT_CSS,
|
| 37 |
+
get_argilla_client,
|
| 38 |
get_org_dropdown,
|
| 39 |
+
swap_visibilty,
|
| 40 |
)
|
| 41 |
|
| 42 |
|
|
|
|
| 344 |
return ""
|
| 345 |
|
| 346 |
|
| 347 |
+
with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
|
| 348 |
+
with gr.Column() as main_ui:
|
| 349 |
+
gr.Markdown("## Describe the dataset you want")
|
| 350 |
+
gr.HTML("<hr>")
|
| 351 |
+
with gr.Row():
|
| 352 |
+
with gr.Column(scale=1):
|
| 353 |
+
dataset_description = gr.Textbox(
|
| 354 |
+
label="Dataset description",
|
| 355 |
+
placeholder="Give a precise description of your desired dataset.",
|
| 356 |
+
)
|
| 357 |
+
examples = gr.Examples(
|
| 358 |
+
examples=DEFAULT_DATASET_DESCRIPTIONS,
|
| 359 |
+
inputs=[dataset_description],
|
| 360 |
+
cache_examples=False,
|
| 361 |
+
label="Example descriptions",
|
| 362 |
+
)
|
| 363 |
+
system_prompt = gr.Textbox(
|
| 364 |
+
label="System prompt",
|
| 365 |
+
placeholder="You are a helpful assistant.",
|
| 366 |
+
visible=False,
|
| 367 |
+
)
|
| 368 |
+
load_btn = gr.Button("Load Dataset")
|
| 369 |
+
with gr.Column(scale=3):
|
| 370 |
+
pass
|
| 371 |
|
| 372 |
+
gr.Markdown("## Configure your task")
|
| 373 |
+
gr.HTML("<hr>")
|
| 374 |
+
with gr.Row():
|
| 375 |
+
with gr.Column(scale=1):
|
| 376 |
+
num_turns = gr.Number(
|
| 377 |
+
value=1,
|
| 378 |
+
label="Number of turns in the conversation",
|
| 379 |
+
minimum=1,
|
| 380 |
+
maximum=4,
|
| 381 |
+
step=1,
|
| 382 |
+
interactive=True,
|
| 383 |
+
info="Choose between 1 (single turn with 'instruction-response' columns) and 2-4 (multi-turn conversation with a 'messages' column).",
|
| 384 |
+
)
|
| 385 |
+
btn_apply_to_sample_dataset = gr.Button("Refresh dataset")
|
| 386 |
+
with gr.Column(scale=3):
|
| 387 |
+
dataframe = gr.Dataframe()
|
| 388 |
|
| 389 |
+
gr.Markdown("## Generate your dataset")
|
| 390 |
+
gr.HTML("<hr>")
|
| 391 |
+
with gr.Row():
|
| 392 |
+
with gr.Column(scale=1):
|
| 393 |
+
org_name = get_org_dropdown()
|
| 394 |
+
repo_name = gr.Textbox(
|
| 395 |
+
label="Repo name",
|
| 396 |
+
placeholder="dataset_name",
|
| 397 |
+
value=f"my-distiset-{str(uuid.uuid4())[:8]}",
|
| 398 |
+
interactive=True,
|
| 399 |
+
)
|
| 400 |
+
n_rows = gr.Number(
|
| 401 |
+
label="Number of rows",
|
| 402 |
+
value=10,
|
| 403 |
+
interactive=True,
|
| 404 |
+
scale=1,
|
| 405 |
+
)
|
| 406 |
+
private = gr.Checkbox(
|
| 407 |
+
label="Private dataset",
|
| 408 |
+
value=False,
|
| 409 |
+
interactive=True,
|
| 410 |
+
scale=1,
|
| 411 |
+
)
|
| 412 |
+
btn_push_to_hub = gr.Button("Push to Hub", variant="primary", scale=2)
|
| 413 |
+
with gr.Column(scale=3):
|
| 414 |
+
success_message = gr.Markdown()
|
| 415 |
|
| 416 |
+
pipeline_code = get_pipeline_code_ui(
|
| 417 |
+
generate_pipeline_code(system_prompt.value, num_turns.value, n_rows.value)
|
| 418 |
+
)
|
| 419 |
|
| 420 |
gr.on(
|
| 421 |
triggers=[load_btn.click, btn_apply_to_sample_dataset.click],
|
|
|
|
| 461 |
inputs=[org_name, repo_name],
|
| 462 |
outputs=[success_message],
|
| 463 |
)
|
| 464 |
+
app.load(fn=swap_visibilty, outputs=main_ui)
|
| 465 |
app.load(fn=get_org_dropdown, outputs=[org_name])
|
src/distilabel_dataset_generator/apps/textcat.py
CHANGED
|
@@ -33,8 +33,11 @@ from src.distilabel_dataset_generator.pipelines.textcat import (
|
|
| 33 |
get_textcat_generator,
|
| 34 |
)
|
| 35 |
from src.distilabel_dataset_generator.utils import (
|
|
|
|
|
|
|
| 36 |
get_org_dropdown,
|
| 37 |
get_preprocess_labels,
|
|
|
|
| 38 |
)
|
| 39 |
|
| 40 |
|
|
@@ -350,118 +353,119 @@ def update_max_num_labels(labels):
|
|
| 350 |
return gr.update(maximum=len(labels) if labels else 1)
|
| 351 |
|
| 352 |
|
| 353 |
-
with gr.Blocks() as app:
|
| 354 |
-
gr.
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
with gr.
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
examples=
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
with gr.
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
with gr.
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 450 |
)
|
| 451 |
-
btn_push_to_hub = gr.Button("Push to Hub", variant="primary", scale=2)
|
| 452 |
-
with gr.Column(scale=3):
|
| 453 |
-
success_message = gr.Markdown(visible=True)
|
| 454 |
-
|
| 455 |
-
pipeline_code = get_pipeline_code_ui(
|
| 456 |
-
generate_pipeline_code(
|
| 457 |
-
system_prompt.value,
|
| 458 |
-
difficulty=difficulty.value,
|
| 459 |
-
clarity=clarity.value,
|
| 460 |
-
labels=labels.value,
|
| 461 |
-
num_labels=num_labels.value,
|
| 462 |
-
num_rows=n_rows.value,
|
| 463 |
)
|
| 464 |
-
)
|
| 465 |
|
| 466 |
gr.on(
|
| 467 |
triggers=[load_btn.click, btn_apply_to_sample_dataset.click],
|
|
@@ -518,5 +522,5 @@ with gr.Blocks() as app:
|
|
| 518 |
inputs=[org_name, repo_name],
|
| 519 |
outputs=[success_message],
|
| 520 |
)
|
| 521 |
-
|
| 522 |
app.load(fn=get_org_dropdown, outputs=[org_name])
|
|
|
|
| 33 |
get_textcat_generator,
|
| 34 |
)
|
| 35 |
from src.distilabel_dataset_generator.utils import (
|
| 36 |
+
_LOGGED_OUT_CSS,
|
| 37 |
+
get_argilla_client,
|
| 38 |
get_org_dropdown,
|
| 39 |
get_preprocess_labels,
|
| 40 |
+
swap_visibilty,
|
| 41 |
)
|
| 42 |
|
| 43 |
|
|
|
|
| 353 |
return gr.update(maximum=len(labels) if labels else 1)
|
| 354 |
|
| 355 |
|
| 356 |
+
with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
|
| 357 |
+
with gr.Column() as main_ui:
|
| 358 |
+
gr.Markdown("## Describe the dataset you want")
|
| 359 |
+
gr.HTML("<hr>")
|
| 360 |
+
with gr.Row():
|
| 361 |
+
with gr.Column(scale=1):
|
| 362 |
+
dataset_description = gr.Textbox(
|
| 363 |
+
label="Dataset description",
|
| 364 |
+
placeholder="Give a precise description of your desired dataset.",
|
| 365 |
+
)
|
| 366 |
+
examples = gr.Examples(
|
| 367 |
+
examples=DEFAULT_DATASET_DESCRIPTIONS,
|
| 368 |
+
inputs=[dataset_description],
|
| 369 |
+
cache_examples=False,
|
| 370 |
+
label="Example descriptions",
|
| 371 |
+
)
|
| 372 |
+
system_prompt = gr.Textbox(
|
| 373 |
+
label="System prompt",
|
| 374 |
+
placeholder="You are a helpful assistant.",
|
| 375 |
+
visible=False,
|
| 376 |
+
)
|
| 377 |
+
load_btn = gr.Button("Load Dataset")
|
| 378 |
+
with gr.Column(scale=3):
|
| 379 |
+
pass
|
| 380 |
+
|
| 381 |
+
gr.Markdown("## Configure your task")
|
| 382 |
+
gr.HTML("<hr>")
|
| 383 |
+
with gr.Row():
|
| 384 |
+
with gr.Column(scale=1):
|
| 385 |
+
difficulty = gr.Dropdown(
|
| 386 |
+
choices=[
|
| 387 |
+
("High School", "high school"),
|
| 388 |
+
("College", "college"),
|
| 389 |
+
("PhD", "PhD"),
|
| 390 |
+
("Mixed", "mixed"),
|
| 391 |
+
],
|
| 392 |
+
value="mixed",
|
| 393 |
+
label="Difficulty",
|
| 394 |
+
info="Select the comprehension level for the text. Ensure it matches the task context.",
|
| 395 |
+
interactive=True,
|
| 396 |
+
)
|
| 397 |
+
clarity = gr.Dropdown(
|
| 398 |
+
choices=[
|
| 399 |
+
("Clear", "clear"),
|
| 400 |
+
(
|
| 401 |
+
"Understandable",
|
| 402 |
+
"understandable with some effort",
|
| 403 |
+
),
|
| 404 |
+
("Ambiguous", "ambiguous"),
|
| 405 |
+
("Mixed", "mixed"),
|
| 406 |
+
],
|
| 407 |
+
value="mixed",
|
| 408 |
+
label="Clarity",
|
| 409 |
+
info="Set how easily the correct label or labels can be identified.",
|
| 410 |
+
interactive=True,
|
| 411 |
+
)
|
| 412 |
+
labels = gr.Dropdown(
|
| 413 |
+
choices=[],
|
| 414 |
+
allow_custom_value=True,
|
| 415 |
+
interactive=True,
|
| 416 |
+
label="Labels",
|
| 417 |
+
multiselect=True,
|
| 418 |
+
info="Add the labels to classify the text.",
|
| 419 |
+
)
|
| 420 |
+
num_labels = gr.Number(
|
| 421 |
+
label="Number of labels per text",
|
| 422 |
+
value=1,
|
| 423 |
+
minimum=1,
|
| 424 |
+
maximum=10,
|
| 425 |
+
info="Select 1 for single-label and >1 for multi-label.",
|
| 426 |
+
interactive=True,
|
| 427 |
+
)
|
| 428 |
+
btn_apply_to_sample_dataset = gr.Button("Refresh dataset")
|
| 429 |
+
with gr.Column(scale=3):
|
| 430 |
+
dataframe = gr.Dataframe()
|
| 431 |
+
|
| 432 |
+
gr.Markdown("## Generate your dataset")
|
| 433 |
+
gr.HTML("<hr>")
|
| 434 |
+
with gr.Row():
|
| 435 |
+
with gr.Column(scale=1):
|
| 436 |
+
org_name = get_org_dropdown()
|
| 437 |
+
repo_name = gr.Textbox(
|
| 438 |
+
label="Repo name",
|
| 439 |
+
placeholder="dataset_name",
|
| 440 |
+
value=f"my-distiset-{str(uuid.uuid4())[:8]}",
|
| 441 |
+
interactive=True,
|
| 442 |
+
)
|
| 443 |
+
n_rows = gr.Number(
|
| 444 |
+
label="Number of rows",
|
| 445 |
+
value=10,
|
| 446 |
+
interactive=True,
|
| 447 |
+
scale=1,
|
| 448 |
+
)
|
| 449 |
+
private = gr.Checkbox(
|
| 450 |
+
label="Private dataset",
|
| 451 |
+
value=False,
|
| 452 |
+
interactive=True,
|
| 453 |
+
scale=1,
|
| 454 |
+
)
|
| 455 |
+
btn_push_to_hub = gr.Button("Push to Hub", variant="primary", scale=2)
|
| 456 |
+
with gr.Column(scale=3):
|
| 457 |
+
success_message = gr.Markdown(visible=True)
|
| 458 |
+
|
| 459 |
+
pipeline_code = get_pipeline_code_ui(
|
| 460 |
+
generate_pipeline_code(
|
| 461 |
+
system_prompt.value,
|
| 462 |
+
difficulty=difficulty.value,
|
| 463 |
+
clarity=clarity.value,
|
| 464 |
+
labels=labels.value,
|
| 465 |
+
num_labels=num_labels.value,
|
| 466 |
+
num_rows=n_rows.value,
|
| 467 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 468 |
)
|
|
|
|
| 469 |
|
| 470 |
gr.on(
|
| 471 |
triggers=[load_btn.click, btn_apply_to_sample_dataset.click],
|
|
|
|
| 522 |
inputs=[org_name, repo_name],
|
| 523 |
outputs=[success_message],
|
| 524 |
)
|
| 525 |
+
app.load(fn=swap_visibilty, outputs=main_ui)
|
| 526 |
app.load(fn=get_org_dropdown, outputs=[org_name])
|