lhoestq HF Staff mariagrandury commited on
Commit
b326353
·
verified ·
1 Parent(s): d2b9da8

fix typos (#8)

Browse files

- fix typos (82d42bf0a876b6f0f68931e28217558ffc035eac)


Co-authored-by: María Grandury <[email protected]>

Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -27,19 +27,19 @@ URL = "https://huggingface.co/spaces/infinite-dataset-hub/infinite-dataset-hub"
27
 
28
  GENERATE_DATASET_NAMES_FOR_SEARCH_QUERY = (
29
  "A Machine Learning Practioner is looking for a dataset that matches '{search_query}'. "
30
- f"Generate a list of {MAX_NB_ITEMS_PER_GENERATION_CALL} names of quality dataset that don't exist but sound plausible and would "
31
  "be helpful. Feel free to reuse words from the query '{search_query}' to name the datasets. "
32
- "Every dataset should be about '{search_query}' and have descriptive tags/keywords including the ML task name associated to the dataset (classification, regression, anomaly detection, etc.). Use the following format:\n1. DatasetName1 (tag1, tag2, tag3)\n1. DatasetName2 (tag1, tag2, tag3)"
33
  )
34
 
35
  GENERATE_DATASET_CONTENT_FOR_SEARCH_QUERY_AND_NAME_AND_TAGS = (
36
- "A ML practitioner is looking for a dataset CSV after the query '{search_query}'. "
37
  "Generate the first 5 rows of a plausible and quality CSV for the dataset '{dataset_name}'. "
38
  "You can get inspiration from related keywords '{tags}' but most importantly the dataset should correspond to the query '{search_query}'. "
39
- "Focus on quality text content and and use a 'label' or 'labels' column if it makes sense (invent labels, avoid reusing the keywords, be accurate while labelling texts). "
40
  "Reply using a short description of the dataset with title **Dataset Description:** followed by the CSV content in a code block and with title **CSV Content Preview:**."
41
  )
42
- GENERATE_MORE_ROWS = "Can you give me 10 additional samples in CSV format as well ? Use the same CSV header '{csv_header}'."
43
  GENERATE_VARIANTS_WITH_RARITY_AND_LABEL = "Focus on generating samples for the label '{label}' and ideally generate {rarity} samples."
44
  GENERATE_VARIANTS_WITH_RARITY = "Focus on generating {rarity} samples."
45
 
@@ -54,7 +54,7 @@ LONG_RARITIES = [
54
  "specific but not far-fetched",
55
  "uncommon but still plausible",
56
  "rare but still plausible",
57
- "very nice but still plausible",
58
  ]
59
 
60
  landing_page_datasets_generated_text = """
 
27
 
28
  GENERATE_DATASET_NAMES_FOR_SEARCH_QUERY = (
29
  "A Machine Learning Practioner is looking for a dataset that matches '{search_query}'. "
30
+ f"Generate a list of {MAX_NB_ITEMS_PER_GENERATION_CALL} names of quality datasets that don't exist but sound plausible and would "
31
  "be helpful. Feel free to reuse words from the query '{search_query}' to name the datasets. "
32
+ "Every dataset should be about '{search_query}' and have descriptive tags/keywords including the ML task name associated with the dataset (classification, regression, anomaly detection, etc.). Use the following format:\n1. DatasetName1 (tag1, tag2, tag3)\n1. DatasetName2 (tag1, tag2, tag3)"
33
  )
34
 
35
  GENERATE_DATASET_CONTENT_FOR_SEARCH_QUERY_AND_NAME_AND_TAGS = (
36
+ "An ML practitioner is looking for a dataset CSV after the query '{search_query}'. "
37
  "Generate the first 5 rows of a plausible and quality CSV for the dataset '{dataset_name}'. "
38
  "You can get inspiration from related keywords '{tags}' but most importantly the dataset should correspond to the query '{search_query}'. "
39
+ "Focus on quality text content and use a 'label' or 'labels' column if it makes sense (invent labels, avoid reusing the keywords, be accurate while labelling texts). "
40
  "Reply using a short description of the dataset with title **Dataset Description:** followed by the CSV content in a code block and with title **CSV Content Preview:**."
41
  )
42
+ GENERATE_MORE_ROWS = "Can you give me 10 additional samples in CSV format as well? Use the same CSV header '{csv_header}'."
43
  GENERATE_VARIANTS_WITH_RARITY_AND_LABEL = "Focus on generating samples for the label '{label}' and ideally generate {rarity} samples."
44
  GENERATE_VARIANTS_WITH_RARITY = "Focus on generating {rarity} samples."
45
 
 
54
  "specific but not far-fetched",
55
  "uncommon but still plausible",
56
  "rare but still plausible",
57
+ "very niche but still plausible",
58
  ]
59
 
60
  landing_page_datasets_generated_text = """