Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update app.py
Browse files
app.py
CHANGED
@@ -971,11 +971,49 @@ async def convert_text_to_pdf(text_content: str, title: str) -> str:
|
|
971 |
# ์๊ตฌ ์ ์ฅ์์ ํ์ผ ๊ฒฝ๋ก
|
972 |
file_path = PERMANENT_PDF_DIR / filename
|
973 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
974 |
# ์์ PDF ํ์ผ ์์ฑ
|
975 |
pdf_buffer = io.BytesIO()
|
976 |
-
doc = SimpleDocTemplate(pdf_buffer, pagesize=letter)
|
977 |
styles = getSampleStyleSheet()
|
978 |
|
|
|
|
|
|
|
|
|
979 |
# ๋ด์ฉ์ ๋ฌธ๋จ์ผ๋ก ๋ถํ
|
980 |
content = []
|
981 |
|
@@ -991,7 +1029,10 @@ async def convert_text_to_pdf(text_content: str, title: str) -> str:
|
|
991 |
paragraphs = text_content.split('\n\n')
|
992 |
for para in paragraphs:
|
993 |
if para.strip():
|
994 |
-
|
|
|
|
|
|
|
995 |
content.append(p)
|
996 |
content.append(Spacer(1, 10))
|
997 |
|
@@ -1098,7 +1139,23 @@ async def text_to_pdf(file: UploadFile = File(...)):
|
|
1098 |
|
1099 |
# ํ์ผ ํ์
์ ๋ฐ๋ผ ํ
์คํธ ์ถ์ถ
|
1100 |
if filename.endswith('.txt'):
|
1101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1102 |
elif filename.endswith('.docx') or filename.endswith('.doc'):
|
1103 |
# ์์ ํ์ผ๋ก ์ ์ฅ
|
1104 |
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(filename)[1]) as temp_file:
|
|
|
971 |
# ์๊ตฌ ์ ์ฅ์์ ํ์ผ ๊ฒฝ๋ก
|
972 |
file_path = PERMANENT_PDF_DIR / filename
|
973 |
|
974 |
+
# ํฐํธ ์ค์ (ํ๊ธ ์ง์ ํฐํธ)
|
975 |
+
from reportlab.pdfbase import pdfmetrics
|
976 |
+
from reportlab.pdfbase.ttfonts import TTFont
|
977 |
+
|
978 |
+
# ํฐํธ ๋๋ ํ ๋ฆฌ ์ค์
|
979 |
+
FONT_DIR = BASE / "fonts"
|
980 |
+
if not FONT_DIR.exists():
|
981 |
+
FONT_DIR.mkdir(parents=True)
|
982 |
+
|
983 |
+
# ๊ธฐ๋ณธ ํ๊ธ ํฐํธ ํ์ผ ๊ฒฝ๋ก
|
984 |
+
default_font_path = FONT_DIR / "NanumGothic.ttf"
|
985 |
+
|
986 |
+
# ํฐํธ ํ์ผ์ด ์์ผ๋ฉด ๋ค์ด๋ก๋
|
987 |
+
if not default_font_path.exists():
|
988 |
+
import urllib.request
|
989 |
+
try:
|
990 |
+
# ๋๋๊ณ ๋ ํฐํธ ๋ค์ด๋ก๋ (Google Fonts์์)
|
991 |
+
font_url = "https://fonts.gstatic.com/s/nanumgothic/v21/PN_3Rfi-oW3hYwmKDpxS7F_z-7rJxHVIsPV5MbNO2rV2_va-Nv6p.ttf"
|
992 |
+
urllib.request.urlretrieve(font_url, str(default_font_path))
|
993 |
+
logger.info(f"ํ๊ธ ํฐํธ ๋ค์ด๋ก๋ ์๋ฃ: {default_font_path}")
|
994 |
+
except Exception as e:
|
995 |
+
logger.error(f"ํฐํธ ๋ค์ด๋ก๋ ์คํจ: {e}")
|
996 |
+
# ํฐํธ ๋ค์ด๋ก๋ ์คํจ ์ ๊ธฐ๋ณธ ํฐํธ ์ฌ์ฉ
|
997 |
+
default_font_path = None
|
998 |
+
|
999 |
+
# ํฐํธ ๋ฑ๋ก
|
1000 |
+
font_name = "NanumGothic"
|
1001 |
+
if default_font_path and default_font_path.exists():
|
1002 |
+
pdfmetrics.registerFont(TTFont(font_name, str(default_font_path)))
|
1003 |
+
else:
|
1004 |
+
# ๊ธฐ๋ณธ ํฐํธ๋ฅผ ์ฌ์ฉํ ์ ์๋ ๊ฒฝ์ฐ Helvetica ์ฌ์ฉ
|
1005 |
+
font_name = "Helvetica"
|
1006 |
+
logger.warning("ํ๊ธ ํฐํธ๋ฅผ ์ฐพ์ ์ ์์ด ๊ธฐ๋ณธ ํฐํธ๋ฅผ ์ฌ์ฉํฉ๋๋ค. ํ๊ธ์ด ์ ๋๋ก ํ์๋์ง ์์ ์ ์์ต๋๋ค.")
|
1007 |
+
|
1008 |
# ์์ PDF ํ์ผ ์์ฑ
|
1009 |
pdf_buffer = io.BytesIO()
|
1010 |
+
doc = SimpleDocTemplate(pdf_buffer, pagesize=letter, encoding='utf-8')
|
1011 |
styles = getSampleStyleSheet()
|
1012 |
|
1013 |
+
# ์คํ์ผ ์ค์ - ํ๊ธ ํฐํธ ์ ์ฉ
|
1014 |
+
styles['Title'].fontName = font_name
|
1015 |
+
styles['Normal'].fontName = font_name
|
1016 |
+
|
1017 |
# ๋ด์ฉ์ ๋ฌธ๋จ์ผ๋ก ๋ถํ
|
1018 |
content = []
|
1019 |
|
|
|
1029 |
paragraphs = text_content.split('\n\n')
|
1030 |
for para in paragraphs:
|
1031 |
if para.strip():
|
1032 |
+
# XML ์ด์ค์ผ์ดํ ์ฒ๋ฆฌ
|
1033 |
+
from xml.sax.saxutils import escape
|
1034 |
+
safe_para = escape(para.replace('\n', '<br/>'))
|
1035 |
+
p = Paragraph(safe_para, normal_style)
|
1036 |
content.append(p)
|
1037 |
content.append(Spacer(1, 10))
|
1038 |
|
|
|
1139 |
|
1140 |
# ํ์ผ ํ์
์ ๋ฐ๋ผ ํ
์คํธ ์ถ์ถ
|
1141 |
if filename.endswith('.txt'):
|
1142 |
+
# ์ธ์ฝ๋ฉ ์๋ ๊ฐ์ง ์๋
|
1143 |
+
encodings = ['utf-8', 'euc-kr', 'cp949', 'latin1']
|
1144 |
+
text_content = None
|
1145 |
+
|
1146 |
+
for encoding in encodings:
|
1147 |
+
try:
|
1148 |
+
text_content = content.decode(encoding, errors='strict')
|
1149 |
+
logger.info(f"ํ
์คํธ ํ์ผ ์ธ์ฝ๋ฉ ๊ฐ์ง: {encoding}")
|
1150 |
+
break
|
1151 |
+
except UnicodeDecodeError:
|
1152 |
+
continue
|
1153 |
+
|
1154 |
+
if text_content is None:
|
1155 |
+
# ๋ชจ๋ ์ธ์ฝ๋ฉ ์๋ ์คํจ ์ ๊ธฐ๋ณธ์ ์ผ๋ก UTF-8๋ก ์๋ํ๊ณ ์ค๋ฅ๋ ๋์ฒด ๋ฌธ์๋ก ์ฒ๋ฆฌ
|
1156 |
+
text_content = content.decode('utf-8', errors='replace')
|
1157 |
+
logger.warning("ํ
์คํธ ํ์ผ ์ธ์ฝ๋ฉ์ ๊ฐ์งํ ์ ์์ด UTF-8์ผ๋ก ์๋ํฉ๋๋ค.")
|
1158 |
+
|
1159 |
elif filename.endswith('.docx') or filename.endswith('.doc'):
|
1160 |
# ์์ ํ์ผ๋ก ์ ์ฅ
|
1161 |
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(filename)[1]) as temp_file:
|