Spaces:

g0th
/

Studymaker

Sleeping

App Files Files Community

Studymaker / ppt_parser.py

g0th

Update ppt_parser.py

34a7f38 verified 3 months ago

raw

history blame

3.97 kB

	import json
	import os
	from pptx import Presentation
	from pptx.util import Inches
	from pptx.shapes.group import GroupShape
	from pptx.shapes.picture import Picture
	from PIL import Image
	import io

	def print_json(item):
	return json.dumps(item, ensure_ascii=False, indent=4)

	def transfer_textbox_content_in_group(group_shape):
	group_shape_item = {}
	for l, shape in enumerate(group_shape.shapes):
	shape_item = {}
	if shape.has_text_frame:
	shape_item['type'] = "text"
	shape_item['location'] = (shape.left, shape.top)
	text_frame = shape.text_frame
	for r, paragraph in enumerate(text_frame.paragraphs):
	if paragraph.runs:
	original_run = paragraph.runs[0]
	paragraph_item = {
	'text': paragraph.text,
	'align': paragraph.alignment,
	'font': {
	'name': original_run.font.name,
	'bold': original_run.font.bold,
	'italic': original_run.font.italic,
	'underline': original_run.font.underline,
	'color': str(original_run.font.color.rgb),
	'language_id': original_run.font.language_id,
	}
	}
	shape_item[f'paragraph_{r}'] = paragraph_item
	group_shape_item[f"shape_{l}"] = shape_item
	return group_shape_item

	def transfer_to_structure(pptx_file, images_dir_path):
	item = {}
	prs = Presentation(pptx_file)
	image_path_list = []

	for i, slide in enumerate(prs.slides):
	slide_item = {}

	for j, shape in enumerate(slide.shapes):
	shape_item = {}

	# Case 1: Normal text box
	if shape.has_text_frame:
	shape_item['type'] = "text"
	text_frame = shape.text_frame
	for r, paragraph in enumerate(text_frame.paragraphs):
	if paragraph.runs:
	original_run = paragraph.runs[0]
	paragraph_item = {
	'text': paragraph.text,
	'align': paragraph.alignment,
	'font': {
	'name': original_run.font.name,
	'bold': original_run.font.bold,
	'italic': original_run.font.italic,
	'underline': original_run.font.underline,
	'color': str(original_run.font.color.rgb),
	'language_id': original_run.font.language_id,
	}
	}
	shape_item[f'paragraph_{r}'] = paragraph_item

	# Case 2: Grouped shapes
	elif isinstance(shape, GroupShape):
	shape_item['type'] = "group"
	shape_item['group_content'] = transfer_textbox_content_in_group(shape)

	# Case 3: Picture
	elif isinstance(shape, Picture):
	shape_item['type'] = "picture"
	image_path = os.path.join(images_dir_path, f"picture_{j}.png")
	image_path_list.append(image_path)
	shape_item['image_path'] = image_path
	shape_item['size'] = shape.image.size
	shape_item['dpi'] = shape.image.dpi
	shape_item['location'] = (shape.left, shape.top)
	shape_item['location_inches'] = (Inches(shape.left).inches, Inches(shape.top).inches)
	image_stream = io.BytesIO(shape.image.blob)
	shape_image = Image.open(image_stream)
	shape_image.save(image_path)

	slide_item[f"shape_{j}"] = shape_item

	item[f"slide_{i}"] = slide_item

	return print_json(item), image_path_list