Commit
·
4d1c962
1
Parent(s):
c0c68e7
feat: added duration for run
Browse files- .gitignore +1 -0
- src/distilabel_dataset_generator/sft.py +16 -3
.gitignore
CHANGED
|
@@ -160,3 +160,4 @@ cython_debug/
|
|
| 160 |
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 161 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 162 |
#.idea/
|
|
|
|
|
|
| 160 |
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 161 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 162 |
#.idea/
|
| 163 |
+
.DS_Store
|
src/distilabel_dataset_generator/sft.py
CHANGED
|
@@ -232,16 +232,29 @@ def generate_dataset(
|
|
| 232 |
)
|
| 233 |
num_rows = 5000
|
| 234 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
gr.Info(
|
| 236 |
-
"Started pipeline execution. This might take a while, depending on the number of rows and turns you have selected. Don't close this page."
|
|
|
|
| 237 |
)
|
| 238 |
result_queue = multiprocessing.Queue()
|
| 239 |
p = multiprocessing.Process(
|
| 240 |
target=_run_pipeline,
|
| 241 |
args=(result_queue, num_turns, num_rows, system_prompt),
|
| 242 |
)
|
| 243 |
-
|
| 244 |
-
|
|
|
|
|
|
|
|
|
|
| 245 |
distiset = result_queue.get()
|
| 246 |
|
| 247 |
if dataset_name is not None:
|
|
|
|
| 232 |
)
|
| 233 |
num_rows = 5000
|
| 234 |
|
| 235 |
+
if num_rows < 50:
|
| 236 |
+
duration = 60
|
| 237 |
+
elif num_rows < 250:
|
| 238 |
+
duration = 300
|
| 239 |
+
elif num_rows < 1000:
|
| 240 |
+
duration = 500
|
| 241 |
+
else:
|
| 242 |
+
duration = 1000
|
| 243 |
+
|
| 244 |
gr.Info(
|
| 245 |
+
"Started pipeline execution. This might take a while, depending on the number of rows and turns you have selected. Don't close this page.",
|
| 246 |
+
duration=duration,
|
| 247 |
)
|
| 248 |
result_queue = multiprocessing.Queue()
|
| 249 |
p = multiprocessing.Process(
|
| 250 |
target=_run_pipeline,
|
| 251 |
args=(result_queue, num_turns, num_rows, system_prompt),
|
| 252 |
)
|
| 253 |
+
try:
|
| 254 |
+
p.start()
|
| 255 |
+
p.join()
|
| 256 |
+
except Exception as e:
|
| 257 |
+
raise gr.Error(f"An error occurred during dataset generation: {str(e)}")
|
| 258 |
distiset = result_queue.get()
|
| 259 |
|
| 260 |
if dataset_name is not None:
|