wangjin2000 commited on
Commit
afefb95
·
verified ·
1 Parent(s): a81e750

Upload utils.py

Browse files
Files changed (1) hide show
  1. utils.py +223 -0
utils.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ # Use ReportLab package to create PDF poster
3
+ from reportlab.pdfbase import pdfmetrics
4
+ from reportlab.lib.pagesizes import A4
5
+ from reportlab.lib.styles import getSampleStyleSheet
6
+ from reportlab.lib.units import cm
7
+ from reportlab.platypus import (
8
+ SimpleDocTemplate,
9
+ Paragraph,
10
+ Spacer,
11
+ Table,
12
+ TableStyle,
13
+ Image,
14
+ Flowable,
15
+ ListFlowable,
16
+ ListItem,
17
+ )
18
+ from reportlab.lib import colors
19
+ from reportlab.pdfbase.cidfonts import UnicodeCIDFont
20
+
21
+ import yt_dlp
22
+ import cv2
23
+ from PIL import Image as PILImage
24
+
25
+ import os
26
+ import tempfile
27
+ import re
28
+ import uuid
29
+
30
+ import pymupdf
31
+
32
+ '''
33
+ # UnicodeCIDfont names
34
+ $chs$ = Chinese Simplified (mainland): '$STSong-Light$'
35
+ $cht$ = Chinese Traditional (Taiwan): '$MSung-Light$', '$MHei-Medium$'
36
+ $kor$ = Korean: '$HYSMyeongJoStd-Medium$','$HYGothic-Medium$'
37
+ $jpn$ = Japanese: '$HeiseiMin-W3$', '$HeiseiKakuGo-W5$'
38
+ '''
39
+
40
+ # Configuration
41
+ # Register the Chinese font with Reportlab
42
+ pdfmetrics.registerFont(UnicodeCIDFont('STSong-Light'))
43
+ pdfmetrics.registerFont(UnicodeCIDFont('MSung-Light'))
44
+
45
+ PAGE_SIZE = A4
46
+ MARGIN = 1.0 * cm
47
+ COLUMNS = 3 # Now using 3 columns
48
+ STYLE = getSampleStyleSheet()
49
+ style_body = STYLE["BodyText"]
50
+ style_title = STYLE['Title']
51
+ style_title.alignment = 1 # center the title
52
+
53
+ # Calculate available width for tables
54
+ page_width = PAGE_SIZE[0] - 2*MARGIN
55
+ col_width = page_width / COLUMNS
56
+ img_width = col_width - 1*cm # Leave some padding
57
+ img_height = 5*cm
58
+ #==========================================================================
59
+ def create_poster(filename, images, lang, summary, url = None):
60
+ print("Output language is:", lang)
61
+ #generate PDF file
62
+ doc = SimpleDocTemplate(filename, pagesize=PAGE_SIZE,
63
+ leftMargin=MARGIN, rightMargin=MARGIN,
64
+ topMargin=MARGIN, bottomMargin=MARGIN)
65
+
66
+ story = []
67
+
68
+ # Define a style with the detected language font
69
+ if lang.lower() == 'chinese':
70
+ style_body.fontName = 'STSong-Light'
71
+ style_title.fontName = 'STSong-Light'
72
+ else:
73
+ style_body.fontName = 'Helvetica'
74
+ style_title.fontName = 'Helvetica-Bold'
75
+
76
+ # Create table data for detected images
77
+ table_data = []
78
+ list_content = []
79
+
80
+ with tempfile.TemporaryDirectory() as temp_dir:
81
+ # Process output summary
82
+ question = []
83
+ current_answer = []
84
+ answers_part = summary.strip().split("\n")
85
+ title_text = "Summary" # initialize title
86
+ title = Paragraph(f"<b>{title_text}</b>", style_title)
87
+
88
+ for line in answers_part: #.split("\n"):
89
+ if re.search("0.", line): #title line
90
+ clean_line = line.replace("*", "")
91
+ title_text = clean_line.split("0.")
92
+ if len(title_text) > 1:
93
+ title_text = title_text[1]
94
+ else:
95
+ title_text = title_text[0]
96
+
97
+ index = title_text.find(':')
98
+ if index != -1:
99
+ title_text = title_text[index+1:]
100
+ title = Paragraph(f"<b>{title_text}</b>", style_title)
101
+
102
+ elif re.search(r'\d\.', line):
103
+ # Start of a new question-answer section
104
+ list_content.append(Spacer(1, 0.3*cm))
105
+ if current_answer:
106
+ list_item = Paragraph(f"<b>{current_answer}</b>", style_body),
107
+ list_content.append(list_item)
108
+ current_answer = []
109
+
110
+ line_content = line.replace("*", "").split(":")
111
+ question = str(line_content[0])
112
+
113
+ question = Paragraph(f"<b>{question}</b>", style_body)
114
+ list_content.append(question)
115
+
116
+ if len(line_content) > 1: #handle same line answer
117
+ list_item = Paragraph(f"<b>{line_content[1]}</b>", style_body)
118
+ list_content.append(list_item)
119
+ elif line.strip() and question:
120
+ list_item = Paragraph(f"<b>{line}</b>", style_body)
121
+ list_content.append(list_item)
122
+
123
+ #add last section
124
+ if current_answer:
125
+ list_item = Paragraph(f"<b>{line}</b>", style_body)
126
+ list_content.append(list_item)
127
+
128
+ # construct the full list
129
+ markdown_output = ListFlowable(list_content,
130
+ bulletType='bullet',
131
+ bulletColor='white', value='circle'
132
+ ),
133
+ output_list = markdown_output[0] #workaround for converting tuple to list
134
+
135
+ # Insert images into a table
136
+ row_cells = []
137
+ for id, image in enumerate(images):
138
+ face_filename = f'{id}.jpg'
139
+ image_path = os.path.join(temp_dir, face_filename)
140
+ cv2.imwrite(image_path, image)
141
+
142
+ # Load the image back into memory because Image object needs filepath input
143
+ pil_img = PILImage.open(image_path)
144
+
145
+ # Create cell content
146
+ cell_content = [
147
+ Spacer(1, 0.3*cm),
148
+ Image(image_path, width=img_width, height=img_height),
149
+ Spacer(1, 0.3*cm),
150
+ ]
151
+ row_cells.append(cell_content)
152
+
153
+ # Add row to table
154
+ table_data.append(row_cells)
155
+
156
+ # Create table with styling
157
+ tbl = Table(table_data,
158
+ colWidths=[col_width]*COLUMNS,
159
+ rowHeights=img_height+0.5*cm)
160
+
161
+ tbl.setStyle(TableStyle([
162
+ ('ALIGN', (0,0), (-1,-1), 'CENTER'),
163
+ ('VALIGN', (0,0), (-1,-1), 'CENTER'),
164
+ ('PADDING', (0,0), (-1,-1), 10),
165
+ ('BOX', (0,0), (-1,-1), 0.5, colors.white),
166
+ ('INNERGRID', (0,0), (-1,-1), 0.5, colors.white),
167
+ ]))
168
+
169
+ # add flowables
170
+ story.append(title)
171
+ story.append(tbl)
172
+ story.append(Spacer(1,0.3*cm))
173
+ story.append(output_list)
174
+ story.append(Spacer(1,0.5*cm))
175
+
176
+ # Clickable video link
177
+ if url:
178
+ link_text = f'<link href="{url}"><font color="{colors.blue}"><u>View Original Video</u></font></link>'
179
+ link_paragraph = Paragraph(link_text, style_body)
180
+ story.append(link_paragraph)
181
+
182
+ # build a page
183
+ doc.build(story)
184
+
185
+ return filename
186
+
187
+ def generate_unique_filename(extension):
188
+ return f"{uuid.uuid4()}{extension}"
189
+
190
+ def generate_tmp_filename(basename, extension):
191
+ return f"{basename}{extension}"
192
+
193
+ def pdf_to_jpg(pdf_path, output_path):
194
+ doc = pymupdf.open(pdf_path)
195
+ page = doc.load_page(0)
196
+ pix = page.get_pixmap()
197
+ pix.save(output_path, "JPEG")
198
+
199
+ image = PILImage.open(output_path)
200
+
201
+ doc.close()
202
+
203
+ return image, output_path
204
+
205
+ def download_youtube_video(url):
206
+ """Downloads a Youtube video using yt-dlp."""
207
+ basename = os.path.basename(url)
208
+ output_path = generate_tmp_filename(basename, ".mp4")
209
+
210
+ ydl_opts = {
211
+ 'outtmpl': output_path, # Path where the video will be saved
212
+ 'format': 'best', # Download the best quality available
213
+ 'cookiefile': 'cookies.txt', # Path to your cookies file #JW 20250115
214
+ }
215
+
216
+ try:
217
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
218
+ ydl.download([url])
219
+ return output_path
220
+ except Exception as e:
221
+ print("load yt_dlp:", e)
222
+ return str(e)
223
+