import os import random from PIL import Image from .vqa_dataset import VQADataset class OCRVQADataset(VQADataset): def process_image(self, ann): image_path = os.path.join(self.vis_root, ann["filename"]) image = Image.open(image_path).convert("RGB") image = self.vis_processor(image) return image def process_text(self, ann): index = random.choice(list(range(len(ann["questions"])))) question = ann["questions"][index] answer = ann["answers"][index] instruction = self.prompter(question) return dict(instruction=instruction, answer=answer)