Spaces:
Running
Running
# Tokenization/pretraining/instruction_formatter.py | |
class InstructionFormatter: | |
def format_sample(sample): | |
""" | |
Formats a sample dict with 'instruction', 'input', and 'output' fields. | |
This is a placeholder; customize as needed for your data. | |
""" | |
# Ensure required fields exist | |
instruction = sample.get("instruction", "") | |
input_ = sample.get("input", "") | |
output = sample.get("output", "") | |
return { | |
"instruction": instruction.strip(), | |
"input": input_.strip(), | |
"output": output.strip(), | |
} | |