Spaces:

ginipick
/

AI-BOOK

Running on CPU Upgrade

App Files Files Community

ginipick commited on May 19

Commit

1ea68f5

verified ·

1 Parent(s): 285f7e3

Update app.py

Browse files

Files changed (1) hide show

app.py +121 -36

app.py CHANGED Viewed

@@ -70,17 +70,21 @@ def save_pdf_metadata():
         logger.error(f"메타데이터 저장 오류: {e}")
 # PDF ID 생성 (파일명 + 타임스탬프 기반)
 def generate_pdf_id(filename: str) -> str:
     # 파일명에서 확장자 제거
     base_name = os.path.splitext(filename)[0]
-    # URL 안전 문자열로 변환
-    safe_name = urllib.parse.quote(base_name, safe='')
     # 타임스탬프 추가로 고유성 보장
     timestamp = int(time.time())
     # 짧은 임의 문자열 추가
     random_suffix = uuid.uuid4().hex[:6]
     return f"{safe_name}_{timestamp}_{random_suffix}"
 # PDF 파일 목록 가져오기 (메인 디렉토리용)
 def get_pdf_files():
     pdf_files = []
@@ -297,15 +301,21 @@ async def cache_pdf(pdf_path: str):
             pdf_cache[pdf_name]["error"] = str(e)
 # PDF ID로 PDF 경로 찾기
 def get_pdf_path_by_id(pdf_id: str) -> str:
     if pdf_id in pdf_metadata:
         path = pdf_metadata[pdf_id]
         # 파일 존재 확인
         if os.path.exists(path):
             return path
-        # 영구 저장소에서 파일명으로 찾기
         filename = os.path.basename(path)
         perm_path = PERMANENT_PDF_DIR / filename
         if perm_path.exists():
             # 메타데이터 업데이트
@@ -313,7 +323,7 @@ def get_pdf_path_by_id(pdf_id: str) -> str:
             save_pdf_metadata()
             return str(perm_path)
-        # 메인 디렉토리에서 파일명으로 찾기
         main_path = PDF_DIR / filename
         if main_path.exists():
             # 메타데이터 업데이트
@@ -321,6 +331,34 @@ def get_pdf_path_by_id(pdf_id: str) -> str:
             save_pdf_metadata()
             return str(main_path)
     return None
 # 시작 시 모든 PDF 파일 캐싱
@@ -378,6 +416,28 @@ async def init_cache_all_pdfs():
 # 백그라운드 작업 시작 함수
 @app.on_event("startup")
 async def startup_event():
     # 백그라운드 태스크로 캐싱 실행
     asyncio.create_task(init_cache_all_pdfs())
@@ -685,6 +745,22 @@ async def unfeature_pdf(path: str):
 async def view_pdf_by_id(pdf_id: str):
     # PDF ID 유효한지 확인
     pdf_path = get_pdf_path_by_id(pdf_id)
     if not pdf_path:
         return HTMLResponse(
             content=f"<html><body><h1>PDF를 찾을 수 없습니다</h1><p>ID: {pdf_id}</p><a href='/'>홈으로 돌아가기</a></body></html>",
@@ -1591,38 +1667,47 @@ HTML = """
         showError("PDF 업로드 중 오류가 발생했습니다.");
       }
     }
-    function addCard(i, thumb, title, isCached = false, pdfId = null) {
-      const d = document.createElement('div');
-      d.className = 'card fade-in';
-      d.onclick = () => open(i);
-      // PDF ID가 있으면 데이터 속성으로 저장
-      if (pdfId) {
-          d.dataset.pdfId = pdfId;
-      }
-      // 제목 처리
-      const displayTitle = title ?
-          (title.length > 15 ? title.substring(0, 15) + '...' : title) :
-          '프로젝트 ' + (i+1);
-      // 캐시 상태 뱃지 추가
-      const cachedBadge = isCached ?
-          '<div class="cached-status">캐시됨</div>' : '';
-      d.innerHTML = `
-          <div class="card-inner">
-              ${cachedBadge}
-              <img src="${thumb}" alt="${displayTitle}" loading="lazy">
-              <p title="${title || '프로젝트 ' + (i+1)}">${displayTitle}</p>
-          </div>
-      `;
-      grid.appendChild(d);
-      // 프로젝트가 있으면 '프로젝트 없음' 메시지 숨기기
-      $id('noProjects').style.display = 'none';
-    }
     /* ── 프로젝트 저장 ── */
     function save(pages, title, isCached = false, pdfId = null) {

         logger.error(f"메타데이터 저장 오류: {e}")
 # PDF ID 생성 (파일명 + 타임스탬프 기반)
+# PDF ID 생성 (파일명 + 타임스탬프 기반) - 더 단순하고 안전한 방식으로 변경
 def generate_pdf_id(filename: str) -> str:
     # 파일명에서 확장자 제거
     base_name = os.path.splitext(filename)[0]
+    # 안전한 문자열로 변환 (URL 인코딩 대신 직접 변환)
+    import re
+    safe_name = re.sub(r'[^\w\-_]', '_', base_name.replace(" ", "_"))
     # 타임스탬프 추가로 고유성 보장
     timestamp = int(time.time())
     # 짧은 임의 문자열 추가
     random_suffix = uuid.uuid4().hex[:6]
     return f"{safe_name}_{timestamp}_{random_suffix}"
 # PDF 파일 목록 가져오기 (메인 디렉토리용)
 def get_pdf_files():
     pdf_files = []
             pdf_cache[pdf_name]["error"] = str(e)
 # PDF ID로 PDF 경로 찾기
+# PDF ID로 PDF 경로 찾기 (개선된 검색 로직)
 def get_pdf_path_by_id(pdf_id: str) -> str:
+    logger.info(f"PDF ID로 파일 조회: {pdf_id}")
+    # 1. 메타데이터에서 직접 ID로 검색
     if pdf_id in pdf_metadata:
         path = pdf_metadata[pdf_id]
         # 파일 존재 확인
         if os.path.exists(path):
             return path
+        # 파일이 이동했을 수 있으므로 파일명으로 검색
         filename = os.path.basename(path)
+        # 영구 저장소에서 검색
         perm_path = PERMANENT_PDF_DIR / filename
         if perm_path.exists():
             # 메타데이터 업데이트
             save_pdf_metadata()
             return str(perm_path)
+        # 메인 디렉토리에서 검색
         main_path = PDF_DIR / filename
         if main_path.exists():
             # 메타데이터 업데이트
             save_pdf_metadata()
             return str(main_path)
+    # 2. 파일명 부분만 추출하여 모든 PDF 파일 검색
+    try:
+        # ID 형식: filename_timestamp_random
+        # 파일명 부분만 추출
+        name_part = pdf_id.split('_')[0] if '_' in pdf_id else pdf_id
+        # 모든 PDF 파일 검색
+        for file_path in get_pdf_files() + get_permanent_pdf_files():
+            # 파일명이 ID의 시작 부분과 일치하면
+            file_basename = os.path.basename(file_path)
+            if file_basename.startswith(name_part) or file_path.stem.startswith(name_part):
+                # ID 매핑 업데이트
+                pdf_metadata[pdf_id] = str(file_path)
+                save_pdf_metadata()
+                return str(file_path)
+    except Exception as e:
+        logger.error(f"파일명 검색 중 오류: {e}")
+    # 3. 모든 PDF 파일에 대해 메타데이터 확인
+    for pid, path in pdf_metadata.items():
+        if os.path.exists(path):
+            file_basename = os.path.basename(path)
+            # 유사한 파일명을 가진 경우
+            if pdf_id in pid or pid in pdf_id:
+                pdf_metadata[pdf_id] = path
+                save_pdf_metadata()
+                return path
     return None
 # 시작 시 모든 PDF 파일 캐싱
 # 백그라운드 작업 시작 함수
 @app.on_event("startup")
 async def startup_event():
+    # PDF 메타데이터 로드
+    load_pdf_metadata()
+    # 누락된 PDF 파일에 대한 메타데이터 생성
+    for pdf_file in get_pdf_files() + get_permanent_pdf_files():
+        found = False
+        for pid, path in pdf_metadata.items():
+            if os.path.basename(path) == pdf_file.name:
+                found = True
+                # 경로 업데이트
+                if not os.path.exists(path):
+                    pdf_metadata[pid] = str(pdf_file)
+                break
+        if not found:
+            # 새 ID 생성 및 메타데이터에 추가
+            pdf_id = generate_pdf_id(pdf_file.name)
+            pdf_metadata[pdf_id] = str(pdf_file)
+    # 변경사항 저장
+    save_pdf_metadata()
     # 백그라운드 태스크로 캐싱 실행
     asyncio.create_task(init_cache_all_pdfs())
 async def view_pdf_by_id(pdf_id: str):
     # PDF ID 유효한지 확인
     pdf_path = get_pdf_path_by_id(pdf_id)
+    if not pdf_path:
+        # 일단 모든 PDF 메타데이터를 다시 로드하고 재시도
+        load_pdf_metadata()
+        pdf_path = get_pdf_path_by_id(pdf_id)
+        if not pdf_path:
+            # 모든 PDF 파일을 직접 스캔��여 유사한 이름 찾기
+            for file_path in get_pdf_files() + get_permanent_pdf_files():
+                name_part = pdf_id.split('_')[0] if '_' in pdf_id else pdf_id
+                if file_path.stem.startswith(name_part):
+                    pdf_metadata[pdf_id] = str(file_path)
+                    save_pdf_metadata()
+                    pdf_path = str(file_path)
+                    break
     if not pdf_path:
         return HTMLResponse(
             content=f"<html><body><h1>PDF를 찾을 수 없습니다</h1><p>ID: {pdf_id}</p><a href='/'>홈으로 돌아가기</a></body></html>",
         showError("PDF 업로드 중 오류가 발생했습니다.");
       }
     }
+function addCard(i, thumb, title, isCached = false, pdfId = null) {
+  const d = document.createElement('div');
+  d.className = 'card fade-in';
+  d.onclick = () => open(i);
+  // PDF ID가 있으면 데이터 속성으로 저장
+  if (pdfId) {
+    d.dataset.pdfId = pdfId;
+  }
+  // 제목 처리
+  const displayTitle = title ?
+    (title.length > 15 ? title.substring(0, 15) + '...' : title) :
+    '프로젝트 ' + (i+1);
+  // 캐시 상태 뱃지 추가
+  const cachedBadge = isCached ?
+    '<div class="cached-status">캐시됨</div>' : '';
+  // 바로가기 링크 추가 (PDF ID가 있는 경우에만)
+  const linkHtml = pdfId ?
+    `<div style="position: absolute; bottom: 55px; left: 50%; transform: translateX(-50%); z-index:5;">
+      <a href="/view/${pdfId}" target="_blank" style="color:#4a6ee0; font-size:11px;">바로가기 링크</a>
+    </div>` : '';
+  d.innerHTML = `
+    <div class="card-inner">
+      ${cachedBadge}
+      <img src="${thumb}" alt="${displayTitle}" loading="lazy">
+      ${linkHtml}
+      <p title="${title || '프로젝트 ' + (i+1)}">${displayTitle}</p>
+    </div>
+  `;
+  grid.appendChild(d);
+  // 프로젝트가 있으면 '프로젝트 없음' 메시지 숨기기
+  $id('noProjects').style.display = 'none';
+}
     /* ── 프로젝트 저장 ── */
     function save(pages, title, isCached = false, pdfId = null) {