Spaces:
Runtime error
Runtime error
| import os | |
| import traceback | |
| import re | |
| import gradio as gr | |
| import math | |
| from ...config import Config | |
| from ...utils.prompter import Prompter | |
| from .data_processing import get_data_from_input | |
| def refresh_preview( | |
| template, | |
| load_dataset_from, | |
| dataset_from_data_dir, | |
| dataset_text, | |
| dataset_text_format, | |
| dataset_plain_text_input_variables_separator, | |
| dataset_plain_text_input_and_output_separator, | |
| dataset_plain_text_data_separator, | |
| max_preview_count, | |
| ): | |
| try: | |
| prompter = Prompter(template) | |
| variable_names = prompter.get_variable_names() | |
| data = get_data_from_input( | |
| load_dataset_from=load_dataset_from, | |
| dataset_text=dataset_text, | |
| dataset_text_format=dataset_text_format, | |
| dataset_plain_text_input_variables_separator=dataset_plain_text_input_variables_separator, | |
| dataset_plain_text_input_and_output_separator=dataset_plain_text_input_and_output_separator, | |
| dataset_plain_text_data_separator=dataset_plain_text_data_separator, | |
| dataset_from_data_dir=dataset_from_data_dir, | |
| prompter=prompter | |
| ) | |
| train_data = prompter.get_train_data_from_dataset( | |
| data, max_preview_count) | |
| train_data = train_data[:max_preview_count] | |
| data_count = len(data) | |
| headers = ['Prompt', 'Completion'] | |
| preview_data = [ | |
| [item.get("prompt", ""), item.get("completion", "")] | |
| for item in train_data | |
| ] | |
| if not prompter.template_module: | |
| variable_names = prompter.get_variable_names() | |
| headers += [f"Variable: {variable_name}" for variable_name in variable_names] | |
| variables = [ | |
| [item.get(f"_var_{name}", "") for name in variable_names] | |
| for item in train_data | |
| ] | |
| preview_data = [d + v for d, v in zip(preview_data, variables)] | |
| preview_info_message = f"The dataset has about {data_count} item(s)." | |
| if data_count > max_preview_count: | |
| preview_info_message += f" Previewing the first {max_preview_count}." | |
| info_message = f"about {data_count} item(s)." | |
| if load_dataset_from == "Data Dir": | |
| info_message = "This dataset contains about " + info_message | |
| update_message = gr.Markdown.update(info_message, visible=True) | |
| return ( | |
| gr.Dataframe.update( | |
| value={'data': preview_data, 'headers': headers}), | |
| gr.Markdown.update(preview_info_message), | |
| update_message, | |
| update_message | |
| ) | |
| except Exception as e: | |
| update_message = gr.Markdown.update( | |
| f"<span class=\"finetune_dataset_error_message\">Error: {e}.</span>", | |
| visible=True) | |
| return ( | |
| gr.Dataframe.update(value={'data': [], 'headers': []}), | |
| gr.Markdown.update( | |
| "Set the dataset in the \"Prepare\" tab, then preview it here."), | |
| update_message, | |
| update_message | |
| ) | |
| def refresh_dataset_items_count( | |
| template, | |
| load_dataset_from, | |
| dataset_from_data_dir, | |
| dataset_text, | |
| dataset_text_format, | |
| dataset_plain_text_input_variables_separator, | |
| dataset_plain_text_input_and_output_separator, | |
| dataset_plain_text_data_separator, | |
| max_preview_count, | |
| ): | |
| try: | |
| prompter = Prompter(template) | |
| data = get_data_from_input( | |
| load_dataset_from=load_dataset_from, | |
| dataset_text=dataset_text, | |
| dataset_text_format=dataset_text_format, | |
| dataset_plain_text_input_variables_separator=dataset_plain_text_input_variables_separator, | |
| dataset_plain_text_input_and_output_separator=dataset_plain_text_input_and_output_separator, | |
| dataset_plain_text_data_separator=dataset_plain_text_data_separator, | |
| dataset_from_data_dir=dataset_from_data_dir, | |
| prompter=prompter | |
| ) | |
| train_data = prompter.get_train_data_from_dataset( | |
| data) | |
| data_count = len(train_data) | |
| preview_info_message = f"The dataset contains {data_count} item(s)." | |
| if data_count > max_preview_count: | |
| preview_info_message += f" Previewing the first {max_preview_count}." | |
| info_message = f"{data_count} item(s)." | |
| if load_dataset_from == "Data Dir": | |
| info_message = "This dataset contains " + info_message | |
| update_message = gr.Markdown.update(info_message, visible=True) | |
| return ( | |
| gr.Markdown.update(preview_info_message), | |
| update_message, | |
| update_message, | |
| gr.Slider.update(maximum=math.floor(data_count / 2)) | |
| ) | |
| except Exception as e: | |
| update_message = gr.Markdown.update( | |
| f"<span class=\"finetune_dataset_error_message\">Error: {e}.</span>", | |
| visible=True) | |
| trace = traceback.format_exc() | |
| traces = [s.strip() for s in re.split("\n * File ", trace)] | |
| traces_to_show = [s for s in traces if os.path.join( | |
| Config.data_dir, "templates") in s] | |
| traces_to_show = [re.sub(" *\n *", ": ", s) for s in traces_to_show] | |
| if len(traces_to_show) > 0: | |
| update_message = gr.Markdown.update( | |
| f"<span class=\"finetune_dataset_error_message\">Error: {e} ({','.join(traces_to_show)}).</span>", | |
| visible=True) | |
| return ( | |
| gr.Markdown.update( | |
| "Set the dataset in the \"Prepare\" tab, then preview it here."), | |
| update_message, | |
| update_message, | |
| gr.Slider.update(maximum=1) | |
| ) | |