Spaces:
Sleeping
Sleeping
File size: 5,306 Bytes
cd3a196 4a27668 cd3a196 4a27668 1e1921b 4a27668 cd3a196 4a27668 cd3a196 4a27668 cd3a196 fefc92e 4a27668 fefc92e 4a27668 fefc92e cd3a196 4a27668 043141a cd3a196 043141a 1e1921b a80ebbc 1e1921b cd3a196 c24b5ac cd3a196 747b69f cd3a196 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
import gradio as gr
from pptx import Presentation
from pptx.util import Pt, Inches
from pptx.shapes.group import GroupShape
from pptx.dml.color import RGBColor
from pptx.enum.text import PP_ALIGN
from pptx.shapes.picture import Picture
import json
import os
from PIL import Image
import io
from pptx2png import pptx_to_images, render_images_with_skia
def print_json(item):
item_json = json.dumps(item, ensure_ascii=False, indent=4)
return item_json
def transfer_textbox_content_in_group(group_shape):
"""Edit the content of text boxes within a group shape."""
group_shape_item = {}
for l, shape in enumerate(group_shape.shapes):
shape_item = {}
if shape.has_text_frame:
shape_item['type'] = "text"
shape_item['location'] = (shape.left, shape.top)
text_frame = shape.text_frame
for r, paragraph in enumerate(text_frame.paragraphs):
original_run = paragraph.runs[0]
paragraph_item = {}
paragraph_item['text'] = paragraph.text
paragraph_item['align'] = paragraph.alignment
font_item = {}
font_item['name'] = original_run.font.name
font_item['bold'] = original_run.font.bold
font_item['italic'] = original_run.font.italic
font_item['underline'] = original_run.font.underline
font_item['color'] = original_run.font.color.rgb
font_item['language_id'] = original_run.font.language_id
paragraph_item['font'] = font_item
shape_item[f'paragraph_{r}'] = paragraph_item
group_shape_item[f"shape_{l}"] = shape_item
return group_shape_item
def transfer_to_structure(pptx_file, images_dir_path):
item = {}
prs = Presentation(pptx_file)
image_path_list = []
# Iterate through each slide in the presentation
for i, slide in enumerate(prs.slides):
# Iterate through each shape in the slide
slide_item = {}
for j, shape in enumerate(slide.shapes):
# If the shape is a group, process its shapes
shape_item = {}
if shape.has_text_frame:
text_frame = shape.text_frame
for paragraph in text_frame.paragraphs:
# Clear the existing text but keep the paragraph
paragraph.clear()
# Add a new run with the new content and copy font style
run = paragraph.add_run()
run.text = new_content
if paragraph.runs:
original_run = paragraph.runs[0]
copy_font_style(original_run, run)
elif isinstance(shape, GroupShape):
shape_item['type'] = "group"
group_shape_item = transfer_textbox_content_in_group(shape)
shape_item['group_content'] = group_shape_item
pass
elif isinstance(shape, Picture):
shape_item['type'] = "picture"
image_path = os.path.join(images_dir_path, f"picture_{j}.png")
image_path_list.append(image_path)
shape_item['image_path'] = image_path
shape_item['size'] = shape.image.size # width, height
shape_item['dpi'] = shape.image.dpi # (horz_dpi, vert_dpi)
shape_item['location'] = (shape.left, shape.top)
shape_item['location_inches'] = (Inches(shape.left).inches, Inches(shape.top).inches)
image_stream = io.BytesIO(shape.image.blob)
shape_image = Image.open(image_stream)
shape_image.save(image_path)
pass
slide_item[f"shape_{j}"] = shape_item
item[f"slide_{i}"] = slide_item
return print_json(item), image_path_list
def copy_font_style(original_run, new_run):
new_run.font.name = original_run.font.name
new_run.font.bold = original_run.font.bold
new_run.font.italic = original_run.font.italic
new_run.font.underline = original_run.font.underline
new_run.font.color.rgb = original_run.font.color.rgb
new_run.font.language_id = original_run.font.language_id
def process_pptx(pptx_file):
images_dir_path = "images"
output_dir = "rendered_png" # Directory to save the rendered images
if not os.path.exists(images_dir_path):
os.makedirs(images_dir_path)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
json_output, image_paths = transfer_to_structure(pptx_file, images_dir_path)
images, slide_dimensions = pptx_to_images(pptx_file)
rendered_image_path = render_images_with_skia(images, slide_dimensions, output_dir)
print(image_paths, rendered_image_path)
return json_output, image_paths, rendered_image_path
# Gradio interface
iface = gr.Interface(
fn=process_pptx,
inputs=gr.File(label="Upload PowerPoint File"),
outputs=[gr.Textbox(label="JSON Output"), gr.Gallery(label="Extracted Images"), gr.Image(type="filepath", label="PPT Preview")],
title="PowerPoint to JSON Converter",
description="Upload a PowerPoint file to convert its structure to JSON and display extracted images."
)
iface.launch() |