ginipick commited on
Commit
400ea45
ยท
verified ยท
1 Parent(s): a30c98c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -3
app.py CHANGED
@@ -971,11 +971,49 @@ async def convert_text_to_pdf(text_content: str, title: str) -> str:
971
  # ์˜๊ตฌ ์ €์žฅ์†Œ์˜ ํŒŒ์ผ ๊ฒฝ๋กœ
972
  file_path = PERMANENT_PDF_DIR / filename
973
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
974
  # ์ž„์‹œ PDF ํŒŒ์ผ ์ƒ์„ฑ
975
  pdf_buffer = io.BytesIO()
976
- doc = SimpleDocTemplate(pdf_buffer, pagesize=letter)
977
  styles = getSampleStyleSheet()
978
 
 
 
 
 
979
  # ๋‚ด์šฉ์„ ๋ฌธ๋‹จ์œผ๋กœ ๋ถ„ํ• 
980
  content = []
981
 
@@ -991,7 +1029,10 @@ async def convert_text_to_pdf(text_content: str, title: str) -> str:
991
  paragraphs = text_content.split('\n\n')
992
  for para in paragraphs:
993
  if para.strip():
994
- p = Paragraph(para.replace('\n', '<br/>'), normal_style)
 
 
 
995
  content.append(p)
996
  content.append(Spacer(1, 10))
997
 
@@ -1098,7 +1139,23 @@ async def text_to_pdf(file: UploadFile = File(...)):
1098
 
1099
  # ํŒŒ์ผ ํƒ€์ž…์— ๋”ฐ๋ผ ํ…์ŠคํŠธ ์ถ”์ถœ
1100
  if filename.endswith('.txt'):
1101
- text_content = content.decode('utf-8', errors='replace')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1102
  elif filename.endswith('.docx') or filename.endswith('.doc'):
1103
  # ์ž„์‹œ ํŒŒ์ผ๋กœ ์ €์žฅ
1104
  with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(filename)[1]) as temp_file:
 
971
  # ์˜๊ตฌ ์ €์žฅ์†Œ์˜ ํŒŒ์ผ ๊ฒฝ๋กœ
972
  file_path = PERMANENT_PDF_DIR / filename
973
 
974
+ # ํฐํŠธ ์„ค์ • (ํ•œ๊ธ€ ์ง€์› ํฐํŠธ)
975
+ from reportlab.pdfbase import pdfmetrics
976
+ from reportlab.pdfbase.ttfonts import TTFont
977
+
978
+ # ํฐํŠธ ๋””๋ ‰ํ† ๋ฆฌ ์„ค์ •
979
+ FONT_DIR = BASE / "fonts"
980
+ if not FONT_DIR.exists():
981
+ FONT_DIR.mkdir(parents=True)
982
+
983
+ # ๊ธฐ๋ณธ ํ•œ๊ธ€ ํฐํŠธ ํŒŒ์ผ ๊ฒฝ๋กœ
984
+ default_font_path = FONT_DIR / "NanumGothic.ttf"
985
+
986
+ # ํฐํŠธ ํŒŒ์ผ์ด ์—†์œผ๋ฉด ๋‹ค์šด๋กœ๋“œ
987
+ if not default_font_path.exists():
988
+ import urllib.request
989
+ try:
990
+ # ๋‚˜๋ˆ”๊ณ ๋”• ํฐํŠธ ๋‹ค์šด๋กœ๋“œ (Google Fonts์—์„œ)
991
+ font_url = "https://fonts.gstatic.com/s/nanumgothic/v21/PN_3Rfi-oW3hYwmKDpxS7F_z-7rJxHVIsPV5MbNO2rV2_va-Nv6p.ttf"
992
+ urllib.request.urlretrieve(font_url, str(default_font_path))
993
+ logger.info(f"ํ•œ๊ธ€ ํฐํŠธ ๋‹ค์šด๋กœ๋“œ ์™„๋ฃŒ: {default_font_path}")
994
+ except Exception as e:
995
+ logger.error(f"ํฐํŠธ ๋‹ค์šด๋กœ๋“œ ์‹คํŒจ: {e}")
996
+ # ํฐํŠธ ๋‹ค์šด๋กœ๋“œ ์‹คํŒจ ์‹œ ๊ธฐ๋ณธ ํฐํŠธ ์‚ฌ์šฉ
997
+ default_font_path = None
998
+
999
+ # ํฐํŠธ ๋“ฑ๋ก
1000
+ font_name = "NanumGothic"
1001
+ if default_font_path and default_font_path.exists():
1002
+ pdfmetrics.registerFont(TTFont(font_name, str(default_font_path)))
1003
+ else:
1004
+ # ๊ธฐ๋ณธ ํฐํŠธ๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†๋Š” ๊ฒฝ์šฐ Helvetica ์‚ฌ์šฉ
1005
+ font_name = "Helvetica"
1006
+ logger.warning("ํ•œ๊ธ€ ํฐํŠธ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์–ด ๊ธฐ๋ณธ ํฐํŠธ๋ฅผ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค. ํ•œ๊ธ€์ด ์ œ๋Œ€๋กœ ํ‘œ์‹œ๋˜์ง€ ์•Š์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.")
1007
+
1008
  # ์ž„์‹œ PDF ํŒŒ์ผ ์ƒ์„ฑ
1009
  pdf_buffer = io.BytesIO()
1010
+ doc = SimpleDocTemplate(pdf_buffer, pagesize=letter, encoding='utf-8')
1011
  styles = getSampleStyleSheet()
1012
 
1013
+ # ์Šคํƒ€์ผ ์„ค์ • - ํ•œ๊ธ€ ํฐํŠธ ์ ์šฉ
1014
+ styles['Title'].fontName = font_name
1015
+ styles['Normal'].fontName = font_name
1016
+
1017
  # ๋‚ด์šฉ์„ ๋ฌธ๋‹จ์œผ๋กœ ๋ถ„ํ• 
1018
  content = []
1019
 
 
1029
  paragraphs = text_content.split('\n\n')
1030
  for para in paragraphs:
1031
  if para.strip():
1032
+ # XML ์ด์Šค์ผ€์ดํ”„ ์ฒ˜๋ฆฌ
1033
+ from xml.sax.saxutils import escape
1034
+ safe_para = escape(para.replace('\n', '<br/>'))
1035
+ p = Paragraph(safe_para, normal_style)
1036
  content.append(p)
1037
  content.append(Spacer(1, 10))
1038
 
 
1139
 
1140
  # ํŒŒ์ผ ํƒ€์ž…์— ๋”ฐ๋ผ ํ…์ŠคํŠธ ์ถ”์ถœ
1141
  if filename.endswith('.txt'):
1142
+ # ์ธ์ฝ”๋”ฉ ์ž๋™ ๊ฐ์ง€ ์‹œ๋„
1143
+ encodings = ['utf-8', 'euc-kr', 'cp949', 'latin1']
1144
+ text_content = None
1145
+
1146
+ for encoding in encodings:
1147
+ try:
1148
+ text_content = content.decode(encoding, errors='strict')
1149
+ logger.info(f"ํ…์ŠคํŠธ ํŒŒ์ผ ์ธ์ฝ”๋”ฉ ๊ฐ์ง€: {encoding}")
1150
+ break
1151
+ except UnicodeDecodeError:
1152
+ continue
1153
+
1154
+ if text_content is None:
1155
+ # ๋ชจ๋“  ์ธ์ฝ”๋”ฉ ์‹œ๋„ ์‹คํŒจ ์‹œ ๊ธฐ๋ณธ์ ์œผ๋กœ UTF-8๋กœ ์‹œ๋„ํ•˜๊ณ  ์˜ค๋ฅ˜๋Š” ๋Œ€์ฒด ๋ฌธ์ž๋กœ ์ฒ˜๋ฆฌ
1156
+ text_content = content.decode('utf-8', errors='replace')
1157
+ logger.warning("ํ…์ŠคํŠธ ํŒŒ์ผ ์ธ์ฝ”๋”ฉ์„ ๊ฐ์ง€ํ•  ์ˆ˜ ์—†์–ด UTF-8์œผ๋กœ ์‹œ๋„ํ•ฉ๋‹ˆ๋‹ค.")
1158
+
1159
  elif filename.endswith('.docx') or filename.endswith('.doc'):
1160
  # ์ž„์‹œ ํŒŒ์ผ๋กœ ์ €์žฅ
1161
  with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(filename)[1]) as temp_file: