Spaces:
Build error
Build error
YH
commited on
Fix Bug in Dataset Building Process (#18)
Browse files* Fix dataset build bug
* Fix lang dataset attr
mmgpt/datasets/builder.py
CHANGED
@@ -23,7 +23,7 @@ def build_dataset(dataset_config, **kwargs):
|
|
23 |
return ConcatDataset(datasets)
|
24 |
dataset_type = dataset_config.pop("type")
|
25 |
sample = dataset_config.pop("sample", -1)
|
26 |
-
if
|
27 |
dataset = LlavaDataset(
|
28 |
**dataset_config,
|
29 |
**kwargs,
|
|
|
23 |
return ConcatDataset(datasets)
|
24 |
dataset_type = dataset_config.pop("type")
|
25 |
sample = dataset_config.pop("sample", -1)
|
26 |
+
if dataset_type == "llava":
|
27 |
dataset = LlavaDataset(
|
28 |
**dataset_config,
|
29 |
**kwargs,
|
mmgpt/train/instruction_finetune.py
CHANGED
@@ -172,7 +172,7 @@ def main():
|
|
172 |
raise ValueError("dataset_config must be specified")
|
173 |
|
174 |
dataset = build_dataset(
|
175 |
-
|
176 |
vis_processor=image_processor,
|
177 |
tokenizer=tokenizer,
|
178 |
)
|
@@ -185,9 +185,9 @@ def main():
|
|
185 |
)
|
186 |
|
187 |
# build language dataset and dataloader for multi-modality training
|
188 |
-
if dataset_config.language_datasets is not None and len(args.language_datasets) > 0:
|
189 |
lang_dataset = build_dataset(
|
190 |
-
|
191 |
tokenizer=tokenizer,
|
192 |
)
|
193 |
lang_dataloader = DataLoader(
|
|
|
172 |
raise ValueError("dataset_config must be specified")
|
173 |
|
174 |
dataset = build_dataset(
|
175 |
+
dataset_config=dataset_config.visual_datasets,
|
176 |
vis_processor=image_processor,
|
177 |
tokenizer=tokenizer,
|
178 |
)
|
|
|
185 |
)
|
186 |
|
187 |
# build language dataset and dataloader for multi-modality training
|
188 |
+
if dataset_config.get('language_datasets') is not None and len(args.language_datasets) > 0:
|
189 |
lang_dataset = build_dataset(
|
190 |
+
dataset_config=dataset_config.language_datasets,
|
191 |
tokenizer=tokenizer,
|
192 |
)
|
193 |
lang_dataloader = DataLoader(
|