Spaces:
Sleeping
Sleeping
File size: 7,635 Bytes
afefb95 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 |
# -*- coding: utf-8 -*-
# Use ReportLab package to create PDF poster
from reportlab.pdfbase import pdfmetrics
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib.units import cm
from reportlab.platypus import (
SimpleDocTemplate,
Paragraph,
Spacer,
Table,
TableStyle,
Image,
Flowable,
ListFlowable,
ListItem,
)
from reportlab.lib import colors
from reportlab.pdfbase.cidfonts import UnicodeCIDFont
import yt_dlp
import cv2
from PIL import Image as PILImage
import os
import tempfile
import re
import uuid
import pymupdf
'''
# UnicodeCIDfont names
$chs$ = Chinese Simplified (mainland): '$STSong-Light$'
$cht$ = Chinese Traditional (Taiwan): '$MSung-Light$', '$MHei-Medium$'
$kor$ = Korean: '$HYSMyeongJoStd-Medium$','$HYGothic-Medium$'
$jpn$ = Japanese: '$HeiseiMin-W3$', '$HeiseiKakuGo-W5$'
'''
# Configuration
# Register the Chinese font with Reportlab
pdfmetrics.registerFont(UnicodeCIDFont('STSong-Light'))
pdfmetrics.registerFont(UnicodeCIDFont('MSung-Light'))
PAGE_SIZE = A4
MARGIN = 1.0 * cm
COLUMNS = 3 # Now using 3 columns
STYLE = getSampleStyleSheet()
style_body = STYLE["BodyText"]
style_title = STYLE['Title']
style_title.alignment = 1 # center the title
# Calculate available width for tables
page_width = PAGE_SIZE[0] - 2*MARGIN
col_width = page_width / COLUMNS
img_width = col_width - 1*cm # Leave some padding
img_height = 5*cm
#==========================================================================
def create_poster(filename, images, lang, summary, url = None):
print("Output language is:", lang)
#generate PDF file
doc = SimpleDocTemplate(filename, pagesize=PAGE_SIZE,
leftMargin=MARGIN, rightMargin=MARGIN,
topMargin=MARGIN, bottomMargin=MARGIN)
story = []
# Define a style with the detected language font
if lang.lower() == 'chinese':
style_body.fontName = 'STSong-Light'
style_title.fontName = 'STSong-Light'
else:
style_body.fontName = 'Helvetica'
style_title.fontName = 'Helvetica-Bold'
# Create table data for detected images
table_data = []
list_content = []
with tempfile.TemporaryDirectory() as temp_dir:
# Process output summary
question = []
current_answer = []
answers_part = summary.strip().split("\n")
title_text = "Summary" # initialize title
title = Paragraph(f"<b>{title_text}</b>", style_title)
for line in answers_part: #.split("\n"):
if re.search("0.", line): #title line
clean_line = line.replace("*", "")
title_text = clean_line.split("0.")
if len(title_text) > 1:
title_text = title_text[1]
else:
title_text = title_text[0]
index = title_text.find(':')
if index != -1:
title_text = title_text[index+1:]
title = Paragraph(f"<b>{title_text}</b>", style_title)
elif re.search(r'\d\.', line):
# Start of a new question-answer section
list_content.append(Spacer(1, 0.3*cm))
if current_answer:
list_item = Paragraph(f"<b>{current_answer}</b>", style_body),
list_content.append(list_item)
current_answer = []
line_content = line.replace("*", "").split(":")
question = str(line_content[0])
question = Paragraph(f"<b>{question}</b>", style_body)
list_content.append(question)
if len(line_content) > 1: #handle same line answer
list_item = Paragraph(f"<b>{line_content[1]}</b>", style_body)
list_content.append(list_item)
elif line.strip() and question:
list_item = Paragraph(f"<b>{line}</b>", style_body)
list_content.append(list_item)
#add last section
if current_answer:
list_item = Paragraph(f"<b>{line}</b>", style_body)
list_content.append(list_item)
# construct the full list
markdown_output = ListFlowable(list_content,
bulletType='bullet',
bulletColor='white', value='circle'
),
output_list = markdown_output[0] #workaround for converting tuple to list
# Insert images into a table
row_cells = []
for id, image in enumerate(images):
face_filename = f'{id}.jpg'
image_path = os.path.join(temp_dir, face_filename)
cv2.imwrite(image_path, image)
# Load the image back into memory because Image object needs filepath input
pil_img = PILImage.open(image_path)
# Create cell content
cell_content = [
Spacer(1, 0.3*cm),
Image(image_path, width=img_width, height=img_height),
Spacer(1, 0.3*cm),
]
row_cells.append(cell_content)
# Add row to table
table_data.append(row_cells)
# Create table with styling
tbl = Table(table_data,
colWidths=[col_width]*COLUMNS,
rowHeights=img_height+0.5*cm)
tbl.setStyle(TableStyle([
('ALIGN', (0,0), (-1,-1), 'CENTER'),
('VALIGN', (0,0), (-1,-1), 'CENTER'),
('PADDING', (0,0), (-1,-1), 10),
('BOX', (0,0), (-1,-1), 0.5, colors.white),
('INNERGRID', (0,0), (-1,-1), 0.5, colors.white),
]))
# add flowables
story.append(title)
story.append(tbl)
story.append(Spacer(1,0.3*cm))
story.append(output_list)
story.append(Spacer(1,0.5*cm))
# Clickable video link
if url:
link_text = f'<link href="{url}"><font color="{colors.blue}"><u>View Original Video</u></font></link>'
link_paragraph = Paragraph(link_text, style_body)
story.append(link_paragraph)
# build a page
doc.build(story)
return filename
def generate_unique_filename(extension):
return f"{uuid.uuid4()}{extension}"
def generate_tmp_filename(basename, extension):
return f"{basename}{extension}"
def pdf_to_jpg(pdf_path, output_path):
doc = pymupdf.open(pdf_path)
page = doc.load_page(0)
pix = page.get_pixmap()
pix.save(output_path, "JPEG")
image = PILImage.open(output_path)
doc.close()
return image, output_path
def download_youtube_video(url):
"""Downloads a Youtube video using yt-dlp."""
basename = os.path.basename(url)
output_path = generate_tmp_filename(basename, ".mp4")
ydl_opts = {
'outtmpl': output_path, # Path where the video will be saved
'format': 'best', # Download the best quality available
'cookiefile': 'cookies.txt', # Path to your cookies file #JW 20250115
}
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
return output_path
except Exception as e:
print("load yt_dlp:", e)
return str(e)
|