YH commited on
Commit
49437cb
·
unverified ·
1 Parent(s): d84c342

Fix Bug in Dataset Building Process (#18)

Browse files

* Fix dataset build bug

* Fix lang dataset attr

mmgpt/datasets/builder.py CHANGED
@@ -23,7 +23,7 @@ def build_dataset(dataset_config, **kwargs):
23
  return ConcatDataset(datasets)
24
  dataset_type = dataset_config.pop("type")
25
  sample = dataset_config.pop("sample", -1)
26
- if dataset_config.type == "llava":
27
  dataset = LlavaDataset(
28
  **dataset_config,
29
  **kwargs,
 
23
  return ConcatDataset(datasets)
24
  dataset_type = dataset_config.pop("type")
25
  sample = dataset_config.pop("sample", -1)
26
+ if dataset_type == "llava":
27
  dataset = LlavaDataset(
28
  **dataset_config,
29
  **kwargs,
mmgpt/train/instruction_finetune.py CHANGED
@@ -172,7 +172,7 @@ def main():
172
  raise ValueError("dataset_config must be specified")
173
 
174
  dataset = build_dataset(
175
- config=dataset_config.visual_datasets,
176
  vis_processor=image_processor,
177
  tokenizer=tokenizer,
178
  )
@@ -185,9 +185,9 @@ def main():
185
  )
186
 
187
  # build language dataset and dataloader for multi-modality training
188
- if dataset_config.language_datasets is not None and len(args.language_datasets) > 0:
189
  lang_dataset = build_dataset(
190
- config=dataset_config.language_datasets,
191
  tokenizer=tokenizer,
192
  )
193
  lang_dataloader = DataLoader(
 
172
  raise ValueError("dataset_config must be specified")
173
 
174
  dataset = build_dataset(
175
+ dataset_config=dataset_config.visual_datasets,
176
  vis_processor=image_processor,
177
  tokenizer=tokenizer,
178
  )
 
185
  )
186
 
187
  # build language dataset and dataloader for multi-modality training
188
+ if dataset_config.get('language_datasets') is not None and len(args.language_datasets) > 0:
189
  lang_dataset = build_dataset(
190
+ dataset_config=dataset_config.language_datasets,
191
  tokenizer=tokenizer,
192
  )
193
  lang_dataloader = DataLoader(