Spaces:
Runtime error
Runtime error
Commit
ยท
d982700
1
Parent(s):
dd0124d
update failed token message
Browse files
pdm.lock
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
pyproject.toml
CHANGED
|
@@ -19,11 +19,11 @@ license = {text = "Apache 2"}
|
|
| 19 |
|
| 20 |
dependencies = [
|
| 21 |
"distilabel[hf-inference-endpoints,argilla,outlines,instructor]>=1.4.1",
|
| 22 |
-
"gradio[oauth]
|
| 23 |
"transformers>=4.44.2",
|
| 24 |
"sentence-transformers>=3.2.0",
|
| 25 |
"model2vec>=0.2.4",
|
| 26 |
-
"gradio-huggingfacehub-search>=0.0.
|
| 27 |
"argilla>=2.4.0",
|
| 28 |
]
|
| 29 |
|
|
|
|
| 19 |
|
| 20 |
dependencies = [
|
| 21 |
"distilabel[hf-inference-endpoints,argilla,outlines,instructor]>=1.4.1",
|
| 22 |
+
"gradio[oauth]>=5.4.0",
|
| 23 |
"transformers>=4.44.2",
|
| 24 |
"sentence-transformers>=3.2.0",
|
| 25 |
"model2vec>=0.2.4",
|
| 26 |
+
"gradio-huggingfacehub-search>=0.0.12",
|
| 27 |
"argilla>=2.4.0",
|
| 28 |
]
|
| 29 |
|
src/synthetic_dataset_generator/apps/eval.py
CHANGED
|
@@ -739,7 +739,6 @@ with gr.Blocks() as app:
|
|
| 739 |
dataframe = gr.Dataframe(
|
| 740 |
headers=["prompt", "completion", "evaluation"],
|
| 741 |
wrap=True,
|
| 742 |
-
height=500,
|
| 743 |
interactive=False,
|
| 744 |
elem_classes="table-view",
|
| 745 |
)
|
|
|
|
| 739 |
dataframe = gr.Dataframe(
|
| 740 |
headers=["prompt", "completion", "evaluation"],
|
| 741 |
wrap=True,
|
|
|
|
| 742 |
interactive=False,
|
| 743 |
elem_classes="table-view",
|
| 744 |
)
|
src/synthetic_dataset_generator/apps/sft.py
CHANGED
|
@@ -15,7 +15,11 @@ from synthetic_dataset_generator.apps.base import (
|
|
| 15 |
validate_argilla_user_workspace_dataset,
|
| 16 |
validate_push_to_hub,
|
| 17 |
)
|
| 18 |
-
from synthetic_dataset_generator.constants import
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
from synthetic_dataset_generator.pipelines.embeddings import (
|
| 20 |
get_embeddings,
|
| 21 |
get_sentence_embedding_dimensions,
|
|
@@ -82,7 +86,6 @@ def _get_dataframe():
|
|
| 82 |
return gr.Dataframe(
|
| 83 |
headers=["prompt", "completion"],
|
| 84 |
wrap=True,
|
| 85 |
-
height=500,
|
| 86 |
interactive=False,
|
| 87 |
elem_classes="table-view",
|
| 88 |
)
|
|
@@ -97,8 +100,12 @@ def generate_dataset(
|
|
| 97 |
progress=gr.Progress(),
|
| 98 |
) -> pd.DataFrame:
|
| 99 |
progress(0.0, desc="(1/2) Generating instructions")
|
| 100 |
-
magpie_generator = get_magpie_generator(
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
total_steps: int = num_rows * 2
|
| 103 |
batch_size = DEFAULT_BATCH_SIZE
|
| 104 |
|
|
@@ -520,7 +527,7 @@ with gr.Blocks() as app:
|
|
| 520 |
num_turns,
|
| 521 |
num_rows,
|
| 522 |
private,
|
| 523 |
-
temperature
|
| 524 |
],
|
| 525 |
outputs=[success_message],
|
| 526 |
show_progress=True,
|
|
|
|
| 15 |
validate_argilla_user_workspace_dataset,
|
| 16 |
validate_push_to_hub,
|
| 17 |
)
|
| 18 |
+
from synthetic_dataset_generator.constants import (
|
| 19 |
+
DEFAULT_BATCH_SIZE,
|
| 20 |
+
MODEL,
|
| 21 |
+
SFT_AVAILABLE,
|
| 22 |
+
)
|
| 23 |
from synthetic_dataset_generator.pipelines.embeddings import (
|
| 24 |
get_embeddings,
|
| 25 |
get_sentence_embedding_dimensions,
|
|
|
|
| 86 |
return gr.Dataframe(
|
| 87 |
headers=["prompt", "completion"],
|
| 88 |
wrap=True,
|
|
|
|
| 89 |
interactive=False,
|
| 90 |
elem_classes="table-view",
|
| 91 |
)
|
|
|
|
| 100 |
progress=gr.Progress(),
|
| 101 |
) -> pd.DataFrame:
|
| 102 |
progress(0.0, desc="(1/2) Generating instructions")
|
| 103 |
+
magpie_generator = get_magpie_generator(
|
| 104 |
+
system_prompt, num_turns, temperature, is_sample
|
| 105 |
+
)
|
| 106 |
+
response_generator = get_response_generator(
|
| 107 |
+
system_prompt, num_turns, temperature, is_sample
|
| 108 |
+
)
|
| 109 |
total_steps: int = num_rows * 2
|
| 110 |
batch_size = DEFAULT_BATCH_SIZE
|
| 111 |
|
|
|
|
| 527 |
num_turns,
|
| 528 |
num_rows,
|
| 529 |
private,
|
| 530 |
+
temperature,
|
| 531 |
],
|
| 532 |
outputs=[success_message],
|
| 533 |
show_progress=True,
|
src/synthetic_dataset_generator/apps/textcat.py
CHANGED
|
@@ -39,7 +39,6 @@ def _get_dataframe():
|
|
| 39 |
return gr.Dataframe(
|
| 40 |
headers=["labels", "text"],
|
| 41 |
wrap=True,
|
| 42 |
-
height=500,
|
| 43 |
interactive=False,
|
| 44 |
elem_classes="table-view",
|
| 45 |
)
|
|
@@ -96,7 +95,10 @@ def generate_dataset(
|
|
| 96 |
progress(0.0, desc="(1/2) Generating text classification data")
|
| 97 |
labels = get_preprocess_labels(labels)
|
| 98 |
textcat_generator = get_textcat_generator(
|
| 99 |
-
difficulty=difficulty,
|
|
|
|
|
|
|
|
|
|
| 100 |
)
|
| 101 |
labeller_generator = get_labeller_generator(
|
| 102 |
system_prompt=f"{system_prompt} {', '.join(labels)}",
|
|
@@ -541,7 +543,7 @@ with gr.Blocks() as app:
|
|
| 541 |
num_rows,
|
| 542 |
labels,
|
| 543 |
private,
|
| 544 |
-
temperature
|
| 545 |
],
|
| 546 |
outputs=[success_message],
|
| 547 |
show_progress=True,
|
|
@@ -558,7 +560,7 @@ with gr.Blocks() as app:
|
|
| 558 |
labels,
|
| 559 |
num_labels,
|
| 560 |
num_rows,
|
| 561 |
-
temperature
|
| 562 |
],
|
| 563 |
outputs=[pipeline_code],
|
| 564 |
).success(
|
|
|
|
| 39 |
return gr.Dataframe(
|
| 40 |
headers=["labels", "text"],
|
| 41 |
wrap=True,
|
|
|
|
| 42 |
interactive=False,
|
| 43 |
elem_classes="table-view",
|
| 44 |
)
|
|
|
|
| 95 |
progress(0.0, desc="(1/2) Generating text classification data")
|
| 96 |
labels = get_preprocess_labels(labels)
|
| 97 |
textcat_generator = get_textcat_generator(
|
| 98 |
+
difficulty=difficulty,
|
| 99 |
+
clarity=clarity,
|
| 100 |
+
temperature=temperature,
|
| 101 |
+
is_sample=is_sample,
|
| 102 |
)
|
| 103 |
labeller_generator = get_labeller_generator(
|
| 104 |
system_prompt=f"{system_prompt} {', '.join(labels)}",
|
|
|
|
| 543 |
num_rows,
|
| 544 |
labels,
|
| 545 |
private,
|
| 546 |
+
temperature,
|
| 547 |
],
|
| 548 |
outputs=[success_message],
|
| 549 |
show_progress=True,
|
|
|
|
| 560 |
labels,
|
| 561 |
num_labels,
|
| 562 |
num_rows,
|
| 563 |
+
temperature,
|
| 564 |
],
|
| 565 |
outputs=[pipeline_code],
|
| 566 |
).success(
|
src/synthetic_dataset_generator/utils.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import json
|
|
|
|
| 2 |
from typing import List, Optional, Union
|
| 3 |
|
| 4 |
import argilla as rg
|
|
@@ -38,9 +39,15 @@ def list_orgs(oauth_token: Union[OAuthToken, None] = None):
|
|
| 38 |
organizations = [org for org in organizations if org != data["name"]]
|
| 39 |
organizations = [data["name"]] + organizations
|
| 40 |
except Exception as e:
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
)
|
|
|
|
|
|
|
| 44 |
return organizations
|
| 45 |
|
| 46 |
|
|
|
|
| 1 |
import json
|
| 2 |
+
import warnings
|
| 3 |
from typing import List, Optional, Union
|
| 4 |
|
| 5 |
import argilla as rg
|
|
|
|
| 39 |
organizations = [org for org in organizations if org != data["name"]]
|
| 40 |
organizations = [data["name"]] + organizations
|
| 41 |
except Exception as e:
|
| 42 |
+
data = whoami(oauth_token.token)
|
| 43 |
+
warnings.warn(str(e))
|
| 44 |
+
gr.Info(
|
| 45 |
+
"Your user token does not have the necessary permissions to push to organizations."
|
| 46 |
+
"Please check your OAuth permissions in https://huggingface.co/settings/connected-applications."
|
| 47 |
+
"Update yout token permissions to include repo.write: https://huggingface.co/settings/tokens."
|
| 48 |
)
|
| 49 |
+
return [data["name"]]
|
| 50 |
+
|
| 51 |
return organizations
|
| 52 |
|
| 53 |
|