ginipick commited on
Commit
3732818
Β·
verified Β·
1 Parent(s): 3fe6497

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +264 -8
app.py CHANGED
@@ -9,6 +9,13 @@ import threading
9
  import concurrent.futures
10
  from openai import OpenAI
11
  import fitz # PyMuPDF
 
 
 
 
 
 
 
12
 
13
  # λ‘œκΉ… μ„€μ •
14
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
@@ -948,6 +955,191 @@ async def upload_pdf(file: UploadFile = File(...)):
948
  status_code=500
949
  )
950
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
951
  # κ΄€λ¦¬μž 인증 μ—”λ“œν¬μΈνŠΈ
952
  @app.post("/api/admin-login")
953
  async def admin_login(password: str = Form(...)):
@@ -963,9 +1155,17 @@ async def delete_pdf(path: str):
963
  if not pdf_file.exists():
964
  return {"success": False, "message": "νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€"}
965
 
966
- # PDF 파일 μ‚­μ œ
 
 
 
967
  pdf_file.unlink()
968
 
 
 
 
 
 
969
  # κ΄€λ ¨ μΊμ‹œ 파일 μ‚­μ œ
970
  pdf_name = pdf_file.stem
971
  cache_path = get_cache_path(pdf_name)
@@ -979,7 +1179,7 @@ async def delete_pdf(path: str):
979
  # λ©”νƒ€λ°μ΄ν„°μ—μ„œ ν•΄λ‹Ή 파일 ID 제거
980
  to_remove = []
981
  for pid, fpath in pdf_metadata.items():
982
- if os.path.basename(fpath) == pdf_file.name:
983
  to_remove.append(pid)
984
 
985
  for pid in to_remove:
@@ -1351,7 +1551,9 @@ HTML = """
1351
  font-weight: 500;
1352
  display: flex;
1353
  align-items: center;
1354
- box-shadow: var(--shadow-sm);
 
 
1355
  transition: var(--transition);
1356
  position: relative;
1357
  overflow: hidden;
@@ -2111,7 +2313,11 @@ HTML = """
2111
  <button class="upload" id="pdfUploadBtn">
2112
  <i class="fas fa-file-pdf"></i> PDF Upload
2113
  </button>
 
 
 
2114
  <input id="pdfInput" type="file" accept="application/pdf" style="display:none">
 
2115
  </div>
2116
 
2117
  <div class="section-title">Projects</div>
@@ -2436,9 +2642,6 @@ async function submitQuestion(question) {
2436
  }
2437
  }
2438
 
2439
-
2440
-
2441
-
2442
 
2443
  // DOM이 λ‘œλ“œλ˜λ©΄ μ‹€ν–‰
2444
  document.addEventListener('DOMContentLoaded', function() {
@@ -2473,6 +2676,26 @@ async function submitQuestion(question) {
2473
  console.error("PDF μ—…λ‘œλ“œ μš”μ†Œλ₯Ό 찾을 수 μ—†μŒ");
2474
  }
2475
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2476
  // μ„œλ²„ PDF λ‘œλ“œ 및 μΊμ‹œ μƒνƒœ 확인
2477
  loadServerPDFs();
2478
 
@@ -2573,6 +2796,40 @@ async function submitQuestion(question) {
2573
  }
2574
  }
2575
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2576
  function addCard(i, thumb, title, isCached = false, pdfId = null) {
2577
  const d = document.createElement('div');
2578
  d.className = 'card fade-in';
@@ -3636,8 +3893,7 @@ async function submitQuestion(question) {
3636
  }
3637
  </script>
3638
  </body>
3639
- </html>
3640
- """
3641
 
3642
  if __name__ == "__main__":
3643
  uvicorn.run("app:app", host="0.0.0.0", port=int(os.getenv("PORT", 7860)))
 
9
  import concurrent.futures
10
  from openai import OpenAI
11
  import fitz # PyMuPDF
12
+ import tempfile
13
+ from reportlab.lib.pagesizes import letter
14
+ from reportlab.pdfgen import canvas
15
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
16
+ from reportlab.lib.styles import getSampleStyleSheet
17
+ import io
18
+ import docx2txt
19
 
20
  # λ‘œκΉ… μ„€μ •
21
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 
955
  status_code=500
956
  )
957
 
958
+ # ν…μŠ€νŠΈ νŒŒμΌμ„ PDF둜 λ³€ν™˜ν•˜λŠ” ν•¨μˆ˜
959
+ async def convert_text_to_pdf(text_content: str, title: str) -> str:
960
+ try:
961
+ # 제λͺ©μ—μ„œ μœ νš¨ν•œ 파일λͺ… 생성
962
+ import re
963
+ safe_title = re.sub(r'[^\w\-_\. ]', '_', title)
964
+ if not safe_title:
965
+ safe_title = "aibook"
966
+
967
+ # νƒ€μž„μŠ€νƒ¬ν”„ μΆ”κ°€λ‘œ κ³ μœ ν•œ 파일λͺ… 생성
968
+ timestamp = int(time.time())
969
+ filename = f"{safe_title}_{timestamp}.pdf"
970
+
971
+ # 영ꡬ μ €μž₯μ†Œμ˜ 파일 경둜
972
+ file_path = PERMANENT_PDF_DIR / filename
973
+
974
+ # μž„μ‹œ PDF 파일 생성
975
+ pdf_buffer = io.BytesIO()
976
+ doc = SimpleDocTemplate(pdf_buffer, pagesize=letter)
977
+ styles = getSampleStyleSheet()
978
+
979
+ # λ‚΄μš©μ„ λ¬Έλ‹¨μœΌλ‘œ λΆ„ν• 
980
+ content = []
981
+
982
+ # 제λͺ© μΆ”κ°€
983
+ title_style = styles['Title']
984
+ content.append(Paragraph(title, title_style))
985
+ content.append(Spacer(1, 12))
986
+
987
+ # λ³Έλ¬Έ ν…μŠ€νŠΈ μŠ€νƒ€μΌ
988
+ normal_style = styles['Normal']
989
+
990
+ # ν…μŠ€νŠΈλ₯Ό λ‹¨λ½μœΌλ‘œ λΆ„λ¦¬ν•˜μ—¬ μΆ”κ°€
991
+ paragraphs = text_content.split('\n\n')
992
+ for para in paragraphs:
993
+ if para.strip():
994
+ p = Paragraph(para.replace('\n', '<br/>'), normal_style)
995
+ content.append(p)
996
+ content.append(Spacer(1, 10))
997
+
998
+ # PDF 생성
999
+ doc.build(content)
1000
+
1001
+ # 파일둜 μ €μž₯
1002
+ with open(file_path, 'wb') as f:
1003
+ f.write(pdf_buffer.getvalue())
1004
+
1005
+ # 메인 디렉토리에도 볡사
1006
+ with open(PDF_DIR / filename, 'wb') as f:
1007
+ f.write(pdf_buffer.getvalue())
1008
+
1009
+ # PDF ID 생성 및 메타데이터 μ €μž₯
1010
+ pdf_id = generate_pdf_id(filename)
1011
+ pdf_metadata[pdf_id] = str(file_path)
1012
+ save_pdf_metadata()
1013
+
1014
+ # λ°±κ·ΈλΌμš΄λ“œμ—μ„œ 캐싱 μ‹œμž‘
1015
+ asyncio.create_task(cache_pdf(str(file_path)))
1016
+
1017
+ return {
1018
+ "path": str(file_path),
1019
+ "filename": filename,
1020
+ "id": pdf_id
1021
+ }
1022
+
1023
+ except Exception as e:
1024
+ logger.error(f"ν…μŠ€νŠΈλ₯Ό PDF둜 λ³€ν™˜ 쀑 였λ₯˜: {e}")
1025
+ raise e
1026
+
1027
+ # AIλ₯Ό μ‚¬μš©ν•˜μ—¬ ν…μŠ€νŠΈλ₯Ό 더 κ΅¬μ‘°ν™”λœ ν˜•μ‹μœΌλ‘œ λ³€ν™˜
1028
+ async def enhance_text_with_ai(text_content: str, title: str) -> str:
1029
+ try:
1030
+ # API ν‚€κ°€ μ—†κ±°λ‚˜ μœ νš¨ν•˜μ§€ μ•Šμ€ 경우 원본 ν…μŠ€νŠΈ λ°˜ν™˜
1031
+ if not HAS_VALID_API_KEY or not openai_client:
1032
+ return text_content
1033
+
1034
+ # ν…μŠ€νŠΈκ°€ 짧은 경우 원본 λ°˜ν™˜
1035
+ if len(text_content) < 100:
1036
+ return text_content
1037
+
1038
+ # μ»¨ν…μŠ€νŠΈ 크기λ₯Ό κ³ λ €ν•˜μ—¬ ν…μŠ€νŠΈκ°€ λ„ˆλ¬΄ κΈΈλ©΄ μ•žλΆ€λΆ„λ§Œ μ‚¬μš©
1039
+ max_context_length = 60000
1040
+ if len(text_content) > max_context_length:
1041
+ text_to_process = text_content[:max_context_length] + "...(μ΄ν•˜ μƒλž΅)"
1042
+ else:
1043
+ text_to_process = text_content
1044
+
1045
+ # OpenAI API ν˜ΈμΆœν•˜μ—¬ ν…μŠ€νŠΈ ꡬ쑰화
1046
+ try:
1047
+ system_prompt = """
1048
+ 당신은 ν…μŠ€νŠΈλ₯Ό 잘 κ΅¬μ‘°ν™”λœ μ „μžμ±… ν˜•μ‹μœΌλ‘œ λ³€ν™˜ν•˜λŠ” μ „λ¬Έκ°€μž…λ‹ˆλ‹€.
1049
+ 제곡된 원본 ν…μŠ€νŠΈλ₯Ό λΆ„μ„ν•˜κ³ , λ‹€μŒκ³Ό 같이 κ°œμ„ ν•΄μ£Όμ„Έμš”:
1050
+
1051
+ 1. 논리적인 μ„Ήμ…˜μœΌλ‘œ λ‚˜λˆ„κ³  μ μ ˆν•œ 제λͺ©κ³Ό λΆ€μ œλͺ© μΆ”κ°€
1052
+ 2. 단락을 μžμ—°μŠ€λŸ½κ²Œ κ΅¬μ„±ν•˜κ³  가독성 κ°œμ„ 
1053
+ 3. μ€‘μš”ν•œ λ‚΄μš©μ€ κ°•μ‘°ν•˜κ±°λ‚˜ μš”μ•½ν•˜μ—¬ ν‘œμ‹œ
1054
+ 4. 원본 λ‚΄μš©μ˜ μ˜λ―Έμ™€ λ§₯락은 μœ μ§€ν•˜λ©΄μ„œ κΉ”λ”ν•˜κ²Œ 정리
1055
+ 5. λ§žμΆ€λ²•κ³Ό 문법 였λ₯˜ μˆ˜μ •
1056
+
1057
+ 원본 λ‚΄μš©μ„ λͺ¨λ‘ μœ μ§€ν•˜λ˜, 잘 μ •λ¦¬λœ μ „μžμ±…μ²˜λŸΌ 보이도둝 κ΅¬μ‘°ν™”ν•΄μ£Όμ„Έμš”.
1058
+ μˆ˜μ •λ³Έμ„ 직접 λ°˜ν™˜ν•˜κ³ , 원본 λ‚΄μš©μ„ λͺ¨λ‘ 포함해야 ν•©λ‹ˆλ‹€.
1059
+ """
1060
+
1061
+ response = openai_client.chat.completions.create(
1062
+ model="gpt-4.1-mini",
1063
+ messages=[
1064
+ {"role": "system", "content": system_prompt},
1065
+ {"role": "user", "content": f"제λͺ©: {title}\n\n원본 ν…μŠ€νŠΈ:\n{text_to_process}"}
1066
+ ],
1067
+ temperature=0.7,
1068
+ max_tokens=4000,
1069
+ timeout=60.0
1070
+ )
1071
+
1072
+ enhanced_text = response.choices[0].message.content
1073
+ return enhanced_text
1074
+
1075
+ except Exception as api_error:
1076
+ logger.error(f"AI ν…μŠ€νŠΈ ν–₯상 였λ₯˜: {api_error}")
1077
+ # 였λ₯˜ λ°œμƒ μ‹œ 원본 ν…μŠ€νŠΈ λ°˜ν™˜
1078
+ return text_content
1079
+
1080
+ except Exception as e:
1081
+ logger.error(f"AI ν…μŠ€νŠΈ ν–₯상 였λ₯˜: {e}")
1082
+ return text_content
1083
+
1084
+ # ν…μŠ€νŠΈ νŒŒμΌμ„ PDF둜 λ³€ν™˜ν•˜λŠ” μ—”λ“œν¬μΈνŠΈ
1085
+ @app.post("/api/text-to-pdf")
1086
+ async def text_to_pdf(file: UploadFile = File(...)):
1087
+ try:
1088
+ # μ§€μ›ν•˜λŠ” 파일 ν˜•μ‹ 확인
1089
+ filename = file.filename.lower()
1090
+ if not (filename.endswith('.txt') or filename.endswith('.docx') or filename.endswith('.doc')):
1091
+ return JSONResponse(
1092
+ content={"success": False, "message": "μ§€μ›ν•˜λŠ” 파일 ν˜•μ‹μ€ .txt, .docx, .docμž…λ‹ˆλ‹€."},
1093
+ status_code=400
1094
+ )
1095
+
1096
+ # 파일 λ‚΄μš© 읽기
1097
+ content = await file.read()
1098
+
1099
+ # 파일 νƒ€μž…μ— 따라 ν…μŠ€νŠΈ μΆ”μΆœ
1100
+ if filename.endswith('.txt'):
1101
+ text_content = content.decode('utf-8', errors='replace')
1102
+ elif filename.endswith('.docx') or filename.endswith('.doc'):
1103
+ # μž„μ‹œ 파일둜 μ €μž₯
1104
+ with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(filename)[1]) as temp_file:
1105
+ temp_file.write(content)
1106
+ temp_path = temp_file.name
1107
+
1108
+ try:
1109
+ # docx2txt둜 ν…μŠ€νŠΈ μΆ”μΆœ
1110
+ text_content = docx2txt.process(temp_path)
1111
+ finally:
1112
+ # μž„μ‹œ 파일 μ‚­μ œ
1113
+ os.unlink(temp_path)
1114
+
1115
+ # 파일λͺ…μ—μ„œ 제λͺ© μΆ”μΆœ (ν™•μž₯자 μ œμ™Έ)
1116
+ title = os.path.splitext(filename)[0]
1117
+
1118
+ # AI둜 ν…μŠ€νŠΈ λ‚΄μš© ν–₯상
1119
+ enhanced_text = await enhance_text_with_ai(text_content, title)
1120
+
1121
+ # ν…μŠ€νŠΈλ₯Ό PDF둜 λ³€ν™˜
1122
+ pdf_info = await convert_text_to_pdf(enhanced_text, title)
1123
+
1124
+ return JSONResponse(
1125
+ content={
1126
+ "success": True,
1127
+ "path": pdf_info["path"],
1128
+ "name": os.path.splitext(pdf_info["filename"])[0],
1129
+ "id": pdf_info["id"],
1130
+ "viewUrl": f"/view/{pdf_info['id']}"
1131
+ },
1132
+ status_code=200
1133
+ )
1134
+ except Exception as e:
1135
+ import traceback
1136
+ error_details = traceback.format_exc()
1137
+ logger.error(f"ν…μŠ€νŠΈλ₯Ό PDF둜 λ³€ν™˜ 쀑 였λ₯˜: {str(e)}\n{error_details}")
1138
+ return JSONResponse(
1139
+ content={"success": False, "message": str(e)},
1140
+ status_code=500
1141
+ )
1142
+
1143
  # κ΄€λ¦¬μž 인증 μ—”λ“œν¬μΈνŠΈ
1144
  @app.post("/api/admin-login")
1145
  async def admin_login(password: str = Form(...)):
 
1155
  if not pdf_file.exists():
1156
  return {"success": False, "message": "νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€"}
1157
 
1158
+ # PDF 파일λͺ… κ°€μ Έμ˜€κΈ°
1159
+ filename = pdf_file.name
1160
+
1161
+ # PDF 파일 μ‚­μ œ (영ꡬ μ €μž₯μ†Œμ—μ„œ)
1162
  pdf_file.unlink()
1163
 
1164
+ # 메인 λ””λ ‰ν† λ¦¬μ—μ„œλ„ λ™μΌν•œ 파일이 있으면 μ‚­μ œ (버그 μˆ˜μ •)
1165
+ main_file_path = PDF_DIR / filename
1166
+ if main_file_path.exists():
1167
+ main_file_path.unlink()
1168
+
1169
  # κ΄€λ ¨ μΊμ‹œ 파일 μ‚­μ œ
1170
  pdf_name = pdf_file.stem
1171
  cache_path = get_cache_path(pdf_name)
 
1179
  # λ©”νƒ€λ°μ΄ν„°μ—μ„œ ν•΄λ‹Ή 파일 ID 제거
1180
  to_remove = []
1181
  for pid, fpath in pdf_metadata.items():
1182
+ if os.path.basename(fpath) == filename:
1183
  to_remove.append(pid)
1184
 
1185
  for pid in to_remove:
 
1551
  font-weight: 500;
1552
  display: flex;
1553
  align-items: center;
1554
+
1555
+
1556
+ box-shadow: var(--shadow-sm);
1557
  transition: var(--transition);
1558
  position: relative;
1559
  overflow: hidden;
 
2313
  <button class="upload" id="pdfUploadBtn">
2314
  <i class="fas fa-file-pdf"></i> PDF Upload
2315
  </button>
2316
+ <button class="upload" id="textToAIBookBtn">
2317
+ <i class="fas fa-file-alt"></i> Text to AI-Book
2318
+ </button>
2319
  <input id="pdfInput" type="file" accept="application/pdf" style="display:none">
2320
+ <input id="textInput" type="file" accept=".txt,.docx,.doc" style="display:none">
2321
  </div>
2322
 
2323
  <div class="section-title">Projects</div>
 
2642
  }
2643
  }
2644
 
 
 
 
2645
 
2646
  // DOM이 λ‘œλ“œλ˜λ©΄ μ‹€ν–‰
2647
  document.addEventListener('DOMContentLoaded', function() {
 
2676
  console.error("PDF μ—…λ‘œλ“œ μš”μ†Œλ₯Ό 찾을 수 μ—†μŒ");
2677
  }
2678
 
2679
+ // ν…μŠ€νŠΈ μ—…λ‘œλ“œ λ²„νŠΌ
2680
+ const textBtn = document.getElementById('textToAIBookBtn');
2681
+ const textInput = document.getElementById('textInput');
2682
+
2683
+ if (textBtn && textInput) {
2684
+ // λ²„νŠΌ 클릭 μ‹œ 파일 μž…λ ₯ 트리거
2685
+ textBtn.addEventListener('click', function() {
2686
+ textInput.click();
2687
+ });
2688
+
2689
+ // 파일 선택 μ‹œ 처리
2690
+ textInput.addEventListener('change', function(e) {
2691
+ const file = e.target.files[0];
2692
+ if (!file) return;
2693
+
2694
+ // μ„œλ²„μ— ν…μŠ€νŠΈ 파일 μ—…λ‘œλ“œ (영ꡬ μ €μž₯μ†Œμ— PDF둜 λ³€ν™˜ν•˜μ—¬ μ €μž₯)
2695
+ uploadTextToServer(file);
2696
+ });
2697
+ }
2698
+
2699
  // μ„œλ²„ PDF λ‘œλ“œ 및 μΊμ‹œ μƒνƒœ 확인
2700
  loadServerPDFs();
2701
 
 
2796
  }
2797
  }
2798
 
2799
+ // μ„œλ²„μ— ν…μŠ€νŠΈ νŒŒμΌμ„ μ—…λ‘œλ“œν•˜μ—¬ PDF둜 λ³€ν™˜ν•˜λŠ” ν•¨μˆ˜
2800
+ async function uploadTextToServer(file) {
2801
+ try {
2802
+ showLoading("ν…μŠ€νŠΈ 뢄석 및 PDF λ³€ν™˜ 쀑...");
2803
+
2804
+ const formData = new FormData();
2805
+ formData.append('file', file);
2806
+
2807
+ const response = await fetch('/api/text-to-pdf', {
2808
+ method: 'POST',
2809
+ body: formData
2810
+ });
2811
+
2812
+ const result = await response.json();
2813
+
2814
+ if (result.success) {
2815
+ hideLoading();
2816
+
2817
+ // μ—…λ‘œλ“œ 성곡 μ‹œ μ„œλ²„ PDF 리슀트 λ¦¬λ‘œλ“œ
2818
+ await loadServerPDFs();
2819
+
2820
+ // 성곡 λ©”μ‹œμ§€
2821
+ showMessage("ν…μŠ€νŠΈκ°€ μ„±κ³΅μ μœΌλ‘œ PDF둜 λ³€ν™˜λ˜μ—ˆμŠ΅λ‹ˆλ‹€! 곡유 URL: " + result.viewUrl);
2822
+ } else {
2823
+ hideLoading();
2824
+ showError("λ³€ν™˜ μ‹€νŒ¨: " + (result.message || "μ•Œ 수 μ—†λŠ” 였λ₯˜"));
2825
+ }
2826
+ } catch (error) {
2827
+ console.error("ν…μŠ€νŠΈ λ³€ν™˜ 였λ₯˜:", error);
2828
+ hideLoading();
2829
+ showError("ν…μŠ€νŠΈλ₯Ό PDF둜 λ³€ν™˜ν•˜λŠ” 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€.");
2830
+ }
2831
+ }
2832
+
2833
  function addCard(i, thumb, title, isCached = false, pdfId = null) {
2834
  const d = document.createElement('div');
2835
  d.className = 'card fade-in';
 
3893
  }
3894
  </script>
3895
  </body>
3896
+ </html>
 
3897
 
3898
  if __name__ == "__main__":
3899
  uvicorn.run("app:app", host="0.0.0.0", port=int(os.getenv("PORT", 7860)))