g0th commited on
Commit
34a7f38
·
verified ·
1 Parent(s): 3225b73

Update ppt_parser.py

Browse files
Files changed (1) hide show
  1. ppt_parser.py +29 -1
ppt_parser.py CHANGED
@@ -44,11 +44,37 @@ def transfer_to_structure(pptx_file, images_dir_path):
44
 
45
  for i, slide in enumerate(prs.slides):
46
  slide_item = {}
 
47
  for j, shape in enumerate(slide.shapes):
48
  shape_item = {}
49
- if isinstance(shape, GroupShape):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  shape_item['type'] = "group"
51
  shape_item['group_content'] = transfer_textbox_content_in_group(shape)
 
 
52
  elif isinstance(shape, Picture):
53
  shape_item['type'] = "picture"
54
  image_path = os.path.join(images_dir_path, f"picture_{j}.png")
@@ -61,7 +87,9 @@ def transfer_to_structure(pptx_file, images_dir_path):
61
  image_stream = io.BytesIO(shape.image.blob)
62
  shape_image = Image.open(image_stream)
63
  shape_image.save(image_path)
 
64
  slide_item[f"shape_{j}"] = shape_item
 
65
  item[f"slide_{i}"] = slide_item
66
 
67
  return print_json(item), image_path_list
 
44
 
45
  for i, slide in enumerate(prs.slides):
46
  slide_item = {}
47
+
48
  for j, shape in enumerate(slide.shapes):
49
  shape_item = {}
50
+
51
+ # Case 1: Normal text box
52
+ if shape.has_text_frame:
53
+ shape_item['type'] = "text"
54
+ text_frame = shape.text_frame
55
+ for r, paragraph in enumerate(text_frame.paragraphs):
56
+ if paragraph.runs:
57
+ original_run = paragraph.runs[0]
58
+ paragraph_item = {
59
+ 'text': paragraph.text,
60
+ 'align': paragraph.alignment,
61
+ 'font': {
62
+ 'name': original_run.font.name,
63
+ 'bold': original_run.font.bold,
64
+ 'italic': original_run.font.italic,
65
+ 'underline': original_run.font.underline,
66
+ 'color': str(original_run.font.color.rgb),
67
+ 'language_id': original_run.font.language_id,
68
+ }
69
+ }
70
+ shape_item[f'paragraph_{r}'] = paragraph_item
71
+
72
+ # Case 2: Grouped shapes
73
+ elif isinstance(shape, GroupShape):
74
  shape_item['type'] = "group"
75
  shape_item['group_content'] = transfer_textbox_content_in_group(shape)
76
+
77
+ # Case 3: Picture
78
  elif isinstance(shape, Picture):
79
  shape_item['type'] = "picture"
80
  image_path = os.path.join(images_dir_path, f"picture_{j}.png")
 
87
  image_stream = io.BytesIO(shape.image.blob)
88
  shape_image = Image.open(image_stream)
89
  shape_image.save(image_path)
90
+
91
  slide_item[f"shape_{j}"] = shape_item
92
+
93
  item[f"slide_{i}"] = slide_item
94
 
95
  return print_json(item), image_path_list