Spaces:
Running
Running
fix typos (#8)
Browse files- fix typos (82d42bf0a876b6f0f68931e28217558ffc035eac)
Co-authored-by: María Grandury <[email protected]>
app.py
CHANGED
@@ -27,19 +27,19 @@ URL = "https://huggingface.co/spaces/infinite-dataset-hub/infinite-dataset-hub"
|
|
27 |
|
28 |
GENERATE_DATASET_NAMES_FOR_SEARCH_QUERY = (
|
29 |
"A Machine Learning Practioner is looking for a dataset that matches '{search_query}'. "
|
30 |
-
f"Generate a list of {MAX_NB_ITEMS_PER_GENERATION_CALL} names of quality
|
31 |
"be helpful. Feel free to reuse words from the query '{search_query}' to name the datasets. "
|
32 |
-
"Every dataset should be about '{search_query}' and have descriptive tags/keywords including the ML task name associated
|
33 |
)
|
34 |
|
35 |
GENERATE_DATASET_CONTENT_FOR_SEARCH_QUERY_AND_NAME_AND_TAGS = (
|
36 |
-
"
|
37 |
"Generate the first 5 rows of a plausible and quality CSV for the dataset '{dataset_name}'. "
|
38 |
"You can get inspiration from related keywords '{tags}' but most importantly the dataset should correspond to the query '{search_query}'. "
|
39 |
-
"Focus on quality text content and
|
40 |
"Reply using a short description of the dataset with title **Dataset Description:** followed by the CSV content in a code block and with title **CSV Content Preview:**."
|
41 |
)
|
42 |
-
GENERATE_MORE_ROWS = "Can you give me 10 additional samples in CSV format as well
|
43 |
GENERATE_VARIANTS_WITH_RARITY_AND_LABEL = "Focus on generating samples for the label '{label}' and ideally generate {rarity} samples."
|
44 |
GENERATE_VARIANTS_WITH_RARITY = "Focus on generating {rarity} samples."
|
45 |
|
@@ -54,7 +54,7 @@ LONG_RARITIES = [
|
|
54 |
"specific but not far-fetched",
|
55 |
"uncommon but still plausible",
|
56 |
"rare but still plausible",
|
57 |
-
"very
|
58 |
]
|
59 |
|
60 |
landing_page_datasets_generated_text = """
|
|
|
27 |
|
28 |
GENERATE_DATASET_NAMES_FOR_SEARCH_QUERY = (
|
29 |
"A Machine Learning Practioner is looking for a dataset that matches '{search_query}'. "
|
30 |
+
f"Generate a list of {MAX_NB_ITEMS_PER_GENERATION_CALL} names of quality datasets that don't exist but sound plausible and would "
|
31 |
"be helpful. Feel free to reuse words from the query '{search_query}' to name the datasets. "
|
32 |
+
"Every dataset should be about '{search_query}' and have descriptive tags/keywords including the ML task name associated with the dataset (classification, regression, anomaly detection, etc.). Use the following format:\n1. DatasetName1 (tag1, tag2, tag3)\n1. DatasetName2 (tag1, tag2, tag3)"
|
33 |
)
|
34 |
|
35 |
GENERATE_DATASET_CONTENT_FOR_SEARCH_QUERY_AND_NAME_AND_TAGS = (
|
36 |
+
"An ML practitioner is looking for a dataset CSV after the query '{search_query}'. "
|
37 |
"Generate the first 5 rows of a plausible and quality CSV for the dataset '{dataset_name}'. "
|
38 |
"You can get inspiration from related keywords '{tags}' but most importantly the dataset should correspond to the query '{search_query}'. "
|
39 |
+
"Focus on quality text content and use a 'label' or 'labels' column if it makes sense (invent labels, avoid reusing the keywords, be accurate while labelling texts). "
|
40 |
"Reply using a short description of the dataset with title **Dataset Description:** followed by the CSV content in a code block and with title **CSV Content Preview:**."
|
41 |
)
|
42 |
+
GENERATE_MORE_ROWS = "Can you give me 10 additional samples in CSV format as well? Use the same CSV header '{csv_header}'."
|
43 |
GENERATE_VARIANTS_WITH_RARITY_AND_LABEL = "Focus on generating samples for the label '{label}' and ideally generate {rarity} samples."
|
44 |
GENERATE_VARIANTS_WITH_RARITY = "Focus on generating {rarity} samples."
|
45 |
|
|
|
54 |
"specific but not far-fetched",
|
55 |
"uncommon but still plausible",
|
56 |
"rare but still plausible",
|
57 |
+
"very niche but still plausible",
|
58 |
]
|
59 |
|
60 |
landing_page_datasets_generated_text = """
|