g0th commited on
Commit
3b3c05a
·
verified ·
1 Parent(s): 0711926

Update ppt_parser.py

Browse files
Files changed (1) hide show
  1. ppt_parser.py +6 -1
ppt_parser.py CHANGED
@@ -59,12 +59,15 @@ def transfer_to_structure(pptx_file, images_dir_path):
59
  prs = Presentation(pptx_file)
60
  image_path_list = []
61
 
 
 
62
  for i, slide in enumerate(prs.slides):
63
  slide_item = {}
64
 
65
  for j, shape in enumerate(slide.shapes):
66
  shape_item = {}
67
 
 
68
  if shape.has_text_frame:
69
  shape_item['type'] = "text"
70
  text_frame = shape.text_frame
@@ -73,10 +76,12 @@ def transfer_to_structure(pptx_file, images_dir_path):
73
  if data:
74
  shape_item[f'paragraph_{r}'] = data
75
 
 
76
  elif isinstance(shape, GroupShape):
77
  shape_item['type'] = "group"
78
  shape_item['group_content'] = transfer_textbox_content_in_group(shape)
79
 
 
80
  elif isinstance(shape, Picture):
81
  shape_item['type'] = "picture"
82
  image_path = os.path.join(images_dir_path, f"picture_{j}.png")
@@ -91,7 +96,7 @@ def transfer_to_structure(pptx_file, images_dir_path):
91
  shape_image = Image.open(image_stream)
92
  shape_image.save(image_path)
93
  except Exception:
94
- pass
95
 
96
  slide_item[f"shape_{j}"] = shape_item
97
 
 
59
  prs = Presentation(pptx_file)
60
  image_path_list = []
61
 
62
+ os.makedirs(images_dir_path, exist_ok=True)
63
+
64
  for i, slide in enumerate(prs.slides):
65
  slide_item = {}
66
 
67
  for j, shape in enumerate(slide.shapes):
68
  shape_item = {}
69
 
70
+ # Case 1: Normal text box
71
  if shape.has_text_frame:
72
  shape_item['type'] = "text"
73
  text_frame = shape.text_frame
 
76
  if data:
77
  shape_item[f'paragraph_{r}'] = data
78
 
79
+ # Case 2: Grouped shapes
80
  elif isinstance(shape, GroupShape):
81
  shape_item['type'] = "group"
82
  shape_item['group_content'] = transfer_textbox_content_in_group(shape)
83
 
84
+ # Case 3: Picture
85
  elif isinstance(shape, Picture):
86
  shape_item['type'] = "picture"
87
  image_path = os.path.join(images_dir_path, f"picture_{j}.png")
 
96
  shape_image = Image.open(image_stream)
97
  shape_image.save(image_path)
98
  except Exception:
99
+ pass # Could not parse image
100
 
101
  slide_item[f"shape_{j}"] = shape_item
102