Spaces:

g0th
/

Studymaker

Sleeping

App Files Files Community

g0th commited on May 27

Commit

34a7f38

verified ·

1 Parent(s): 3225b73

Update ppt_parser.py

Browse files

Files changed (1) hide show

ppt_parser.py +29 -1

ppt_parser.py CHANGED Viewed

@@ -44,11 +44,37 @@ def transfer_to_structure(pptx_file, images_dir_path):
     for i, slide in enumerate(prs.slides):
         slide_item = {}
         for j, shape in enumerate(slide.shapes):
             shape_item = {}
-            if isinstance(shape, GroupShape):
                 shape_item['type'] = "group"
                 shape_item['group_content'] = transfer_textbox_content_in_group(shape)
             elif isinstance(shape, Picture):
                 shape_item['type'] = "picture"
                 image_path = os.path.join(images_dir_path, f"picture_{j}.png")
@@ -61,7 +87,9 @@ def transfer_to_structure(pptx_file, images_dir_path):
                 image_stream = io.BytesIO(shape.image.blob)
                 shape_image = Image.open(image_stream)
                 shape_image.save(image_path)
             slide_item[f"shape_{j}"] = shape_item
         item[f"slide_{i}"] = slide_item
     return print_json(item), image_path_list

     for i, slide in enumerate(prs.slides):
         slide_item = {}
         for j, shape in enumerate(slide.shapes):
             shape_item = {}
+            # Case 1: Normal text box
+            if shape.has_text_frame:
+                shape_item['type'] = "text"
+                text_frame = shape.text_frame
+                for r, paragraph in enumerate(text_frame.paragraphs):
+                    if paragraph.runs:
+                        original_run = paragraph.runs[0]
+                        paragraph_item = {
+                            'text': paragraph.text,
+                            'align': paragraph.alignment,
+                            'font': {
+                                'name': original_run.font.name,
+                                'bold': original_run.font.bold,
+                                'italic': original_run.font.italic,
+                                'underline': original_run.font.underline,
+                                'color': str(original_run.font.color.rgb),
+                                'language_id': original_run.font.language_id,
+                            }
+                        }
+                        shape_item[f'paragraph_{r}'] = paragraph_item
+            # Case 2: Grouped shapes
+            elif isinstance(shape, GroupShape):
                 shape_item['type'] = "group"
                 shape_item['group_content'] = transfer_textbox_content_in_group(shape)
+            # Case 3: Picture
             elif isinstance(shape, Picture):
                 shape_item['type'] = "picture"
                 image_path = os.path.join(images_dir_path, f"picture_{j}.png")
                 image_stream = io.BytesIO(shape.image.blob)
                 shape_image = Image.open(image_stream)
                 shape_image.save(image_path)
             slide_item[f"shape_{j}"] = shape_item
         item[f"slide_{i}"] = slide_item
     return print_json(item), image_path_list