Spaces:

g0th
/

Studymaker

Sleeping

App Files Files Community

g0th commited on May 27

Commit

b681c52

verified ·

1 Parent(s): f40afe0

Create ppt_parser.py

Browse files

Files changed (1) hide show

ppt_parser.py +67 -0

ppt_parser.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import json
+import os
+from pptx import Presentation
+from pptx.util import Inches
+from pptx.shapes.group import GroupShape
+from pptx.shapes.picture import Picture
+from PIL import Image
+import io
+def print_json(item):
+    return json.dumps(item, ensure_ascii=False, indent=4)
+def transfer_textbox_content_in_group(group_shape):
+    group_shape_item = {}
+    for l, shape in enumerate(group_shape.shapes):
+        shape_item = {}
+        if shape.has_text_frame:
+            shape_item['type'] = "text"
+            shape_item['location'] = (shape.left, shape.top)
+            text_frame = shape.text_frame
+            for r, paragraph in enumerate(text_frame.paragraphs):
+                if paragraph.runs:
+                    original_run = paragraph.runs[0]
+                    paragraph_item = {
+                        'text': paragraph.text,
+                        'align': paragraph.alignment,
+                        'font': {
+                            'name': original_run.font.name,
+                            'bold': original_run.font.bold,
+                            'italic': original_run.font.italic,
+                            'underline': original_run.font.underline,
+                            'color': str(original_run.font.color.rgb),
+                            'language_id': original_run.font.language_id,
+                        }
+                    }
+                    shape_item[f'paragraph_{r}'] = paragraph_item
+        group_shape_item[f"shape_{l}"] = shape_item
+    return group_shape_item
+def transfer_to_structure(pptx_file, images_dir_path):
+    item = {}
+    prs = Presentation(pptx_file)
+    image_path_list = []
+    for i, slide in enumerate(prs.slides):
+        slide_item = {}
+        for j, shape in enumerate(slide.shapes):
+            shape_item = {}
+            if isinstance(shape, GroupShape):
+                shape_item['type'] = "group"
+                shape_item['group_content'] = transfer_textbox_content_in_group(shape)
+            elif isinstance(shape, Picture):
+                shape_item['type'] = "picture"
+                image_path = os.path.join(images_dir_path, f"picture_{j}.png")
+                image_path_list.append(image_path)
+                shape_item['image_path'] = image_path
+                shape_item['size'] = shape.image.size
+                shape_item['dpi'] = shape.image.dpi
+                shape_item['location'] = (shape.left, shape.top)
+                shape_item['location_inches'] = (Inches(shape.left).inches, Inches(shape.top).inches)
+                image_stream = io.BytesIO(shape.image.blob)
+                shape_image = Image.open(image_stream)
+                shape_image.save(image_path)
+            slide_item[f"shape_{j}"] = shape_item
+        item[f"slide_{i}"] = slide_item
+    return print_json(item), image_path_list