Infini-d-set

Running

App Files Files Community

lhoestq HF Staff

mariagrandury commited on Oct 11, 2024

Commit

b326353

verified ·

1 Parent(s): d2b9da8

fix typos (#8)

Browse files

- fix typos (82d42bf0a876b6f0f68931e28217558ffc035eac)

Co-authored-by: María Grandury <[email protected]>

Files changed (1) hide show

app.py +6 -6

app.py CHANGED Viewed

@@ -27,19 +27,19 @@ URL = "https://huggingface.co/spaces/infinite-dataset-hub/infinite-dataset-hub"
 GENERATE_DATASET_NAMES_FOR_SEARCH_QUERY = (
         "A Machine Learning Practioner is looking for a dataset that matches '{search_query}'. "
-        f"Generate a list of {MAX_NB_ITEMS_PER_GENERATION_CALL} names of quality dataset that don't exist but sound plausible and would "
         "be helpful. Feel free to reuse words from the query '{search_query}' to name the datasets. "
-        "Every dataset should be about '{search_query}' and have descriptive tags/keywords including the ML task name associated to the dataset (classification, regression, anomaly detection, etc.). Use the following format:\n1. DatasetName1 (tag1, tag2, tag3)\n1. DatasetName2 (tag1, tag2, tag3)"
 )
 GENERATE_DATASET_CONTENT_FOR_SEARCH_QUERY_AND_NAME_AND_TAGS = (
-    "A ML practitioner is looking for a dataset CSV after the query '{search_query}'. "
     "Generate the first 5 rows of a plausible and quality CSV for the dataset '{dataset_name}'. "
     "You can get inspiration from related keywords '{tags}' but most importantly the dataset should correspond to the query '{search_query}'. "
-    "Focus on quality text content and and use a 'label' or 'labels' column if it makes sense (invent labels, avoid reusing the keywords, be accurate while labelling texts). "
     "Reply using a short description of the dataset with title **Dataset Description:** followed by the CSV content in a code block and with title **CSV Content Preview:**."
 )
-GENERATE_MORE_ROWS = "Can you give me 10 additional samples in CSV format as well ? Use the same CSV header '{csv_header}'."
 GENERATE_VARIANTS_WITH_RARITY_AND_LABEL = "Focus on generating samples for the label '{label}' and ideally generate {rarity} samples."
 GENERATE_VARIANTS_WITH_RARITY = "Focus on generating {rarity} samples."
@@ -54,7 +54,7 @@ LONG_RARITIES = [
     "specific but not far-fetched",
     "uncommon but still plausible",
     "rare but still plausible",
-    "very nice but still plausible",
 ]
 landing_page_datasets_generated_text = """

 GENERATE_DATASET_NAMES_FOR_SEARCH_QUERY = (
         "A Machine Learning Practioner is looking for a dataset that matches '{search_query}'. "
+        f"Generate a list of {MAX_NB_ITEMS_PER_GENERATION_CALL} names of quality datasets that don't exist but sound plausible and would "
         "be helpful. Feel free to reuse words from the query '{search_query}' to name the datasets. "
+        "Every dataset should be about '{search_query}' and have descriptive tags/keywords including the ML task name associated with the dataset (classification, regression, anomaly detection, etc.). Use the following format:\n1. DatasetName1 (tag1, tag2, tag3)\n1. DatasetName2 (tag1, tag2, tag3)"
 )
 GENERATE_DATASET_CONTENT_FOR_SEARCH_QUERY_AND_NAME_AND_TAGS = (
+    "An ML practitioner is looking for a dataset CSV after the query '{search_query}'. "
     "Generate the first 5 rows of a plausible and quality CSV for the dataset '{dataset_name}'. "
     "You can get inspiration from related keywords '{tags}' but most importantly the dataset should correspond to the query '{search_query}'. "
+    "Focus on quality text content and use a 'label' or 'labels' column if it makes sense (invent labels, avoid reusing the keywords, be accurate while labelling texts). "
     "Reply using a short description of the dataset with title **Dataset Description:** followed by the CSV content in a code block and with title **CSV Content Preview:**."
 )
+GENERATE_MORE_ROWS = "Can you give me 10 additional samples in CSV format as well? Use the same CSV header '{csv_header}'."
 GENERATE_VARIANTS_WITH_RARITY_AND_LABEL = "Focus on generating samples for the label '{label}' and ideally generate {rarity} samples."
 GENERATE_VARIANTS_WITH_RARITY = "Focus on generating {rarity} samples."
     "specific but not far-fetched",
     "uncommon but still plausible",
     "rare but still plausible",
+    "very niche but still plausible",
 ]
 landing_page_datasets_generated_text = """