AI-BOOK / app.py
ginipick's picture
Update app.py
fa889b1 verified
raw
history blame
54.3 kB
from fastapi import FastAPI, BackgroundTasks, UploadFile, File, Form, Request, Query
from fastapi.responses import HTMLResponse, JSONResponse, Response, RedirectResponse
from fastapi.staticfiles import StaticFiles
import pathlib, os, uvicorn, base64, json, shutil, uuid, time, urllib.parse
from typing import Dict, List, Any, Optional
import asyncio
import logging
import threading
import concurrent.futures
from openai import OpenAI
import fitz # PyMuPDF
import tempfile
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet
import io
import docx2txt
# ๋กœ๊น… ์„ค์ •
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
BASE = pathlib.Path(__file__).parent
app = FastAPI()
app.mount("/static", StaticFiles(directory=BASE), name="static")
# PDF ๋””๋ ‰ํ† ๋ฆฌ ์„ค์ •
PDF_DIR = BASE / "pdf"
if not PDF_DIR.exists():
PDF_DIR.mkdir(parents=True)
# ์˜๊ตฌ PDF ๋””๋ ‰ํ† ๋ฆฌ ์„ค์ • (Hugging Face ์˜๊ตฌ ๋””์Šคํฌ)
PERMANENT_PDF_DIR = pathlib.Path("/data/pdfs") if os.path.exists("/data") else BASE / "permanent_pdfs"
if not PERMANENT_PDF_DIR.exists():
PERMANENT_PDF_DIR.mkdir(parents=True)
# ์บ์‹œ ๋””๋ ‰ํ† ๋ฆฌ ์„ค์ •
CACHE_DIR = BASE / "cache"
if not CACHE_DIR.exists():
CACHE_DIR.mkdir(parents=True)
# PDF ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ๋””๋ ‰ํ† ๋ฆฌ ๋ฐ ํŒŒ์ผ ์„ค์ •
METADATA_DIR = pathlib.Path("/data/metadata") if os.path.exists("/data") else BASE / "metadata"
if not METADATA_DIR.exists():
METADATA_DIR.mkdir(parents=True)
PDF_METADATA_FILE = METADATA_DIR / "pdf_metadata.json"
# ์ž„๋ฒ ๋”ฉ ์บ์‹œ ๋””๋ ‰ํ† ๋ฆฌ ์„ค์ •
EMBEDDING_DIR = pathlib.Path("/data/embeddings") if os.path.exists("/data") else BASE / "embeddings"
if not EMBEDDING_DIR.exists():
EMBEDDING_DIR.mkdir(parents=True)
# ๊ด€๋ฆฌ์ž ๋น„๋ฐ€๋ฒˆํ˜ธ
ADMIN_PASSWORD = os.getenv("PASSWORD", "admin") # ํ™˜๊ฒฝ ๋ณ€์ˆ˜์—์„œ ๊ฐ€์ ธ์˜ค๊ธฐ, ๊ธฐ๋ณธ๊ฐ’์€ ํ…Œ์ŠคํŠธ์šฉ
# OpenAI API ํ‚ค ์„ค์ •
OPENAI_API_KEY = os.getenv("LLM_API", "")
# API ํ‚ค๊ฐ€ ์—†๊ฑฐ๋‚˜ ๋น„์–ด์žˆ์„ ๋•Œ ํ”Œ๋ž˜๊ทธ ์„ค์ •
HAS_VALID_API_KEY = bool(OPENAI_API_KEY and OPENAI_API_KEY.strip())
if HAS_VALID_API_KEY:
try:
openai_client = OpenAI(api_key=OPENAI_API_KEY, timeout=30.0)
logger.info("OpenAI ํด๋ผ์ด์–ธํŠธ ์ดˆ๊ธฐํ™” ์„ฑ๊ณต")
except Exception as e:
logger.error(f"OpenAI ํด๋ผ์ด์–ธํŠธ ์ดˆ๊ธฐํ™” ์‹คํŒจ: {e}")
HAS_VALID_API_KEY = False
else:
logger.warning("์œ ํšจํ•œ OpenAI API ํ‚ค๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค. AI ๊ธฐ๋Šฅ์ด ์ œํ•œ๋ฉ๋‹ˆ๋‹ค.")
openai_client = None
# ์ „์—ญ ์บ์‹œ ๊ฐ์ฒด
pdf_cache: Dict[str, Dict[str, Any]] = {}
# ์บ์‹ฑ ๋ฝ
cache_locks = {}
# PDF ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ (ID to ๊ฒฝ๋กœ ๋งคํ•‘)
pdf_metadata: Dict[str, str] = {}
# PDF ์ž„๋ฒ ๋”ฉ ์บ์‹œ
pdf_embeddings: Dict[str, Dict[str, Any]] = {}
# PDF ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ๋กœ๋“œ
def load_pdf_metadata():
global pdf_metadata
if PDF_METADATA_FILE.exists():
try:
with open(PDF_METADATA_FILE, "r") as f:
pdf_metadata = json.load(f)
logger.info(f"PDF ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ๋กœ๋“œ ์™„๋ฃŒ: {len(pdf_metadata)} ํ•ญ๋ชฉ")
except Exception as e:
logger.error(f"๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ๋กœ๋“œ ์˜ค๋ฅ˜: {e}")
pdf_metadata = {}
else:
pdf_metadata = {}
# PDF ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ์ €์žฅ
def save_pdf_metadata():
try:
with open(PDF_METADATA_FILE, "w") as f:
json.dump(pdf_metadata, f)
except Exception as e:
logger.error(f"๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ์ €์žฅ ์˜ค๋ฅ˜: {e}")
# PDF ID ์ƒ์„ฑ (ํŒŒ์ผ๋ช… + ํƒ€์ž„์Šคํƒฌํ”„ ๊ธฐ๋ฐ˜) - ๋” ๋‹จ์ˆœํ•˜๊ณ  ์•ˆ์ „ํ•œ ๋ฐฉ์‹์œผ๋กœ ๋ณ€๊ฒฝ
def generate_pdf_id(filename: str) -> str:
# ํŒŒ์ผ๋ช…์—์„œ ํ™•์žฅ์ž ์ œ๊ฑฐ
base_name = os.path.splitext(filename)[0]
# ์•ˆ์ „ํ•œ ๋ฌธ์ž์—ด๋กœ ๋ณ€ํ™˜ (URL ์ธ์ฝ”๋”ฉ ๋Œ€์‹  ์ง์ ‘ ๋ณ€ํ™˜)
import re
safe_name = re.sub(r'[^\w\-_]', '_', base_name.replace(" ", "_"))
# ํƒ€์ž„์Šคํƒฌํ”„ ์ถ”๊ฐ€๋กœ ๊ณ ์œ ์„ฑ ๋ณด์žฅ
timestamp = int(time.time())
# ์งง์€ ์ž„์˜ ๋ฌธ์ž์—ด ์ถ”๊ฐ€
random_suffix = uuid.uuid4().hex[:6]
return f"{safe_name}_{timestamp}_{random_suffix}"
# PDF ํŒŒ์ผ ๋ชฉ๋ก ๊ฐ€์ ธ์˜ค๊ธฐ (๋ฉ”์ธ ๋””๋ ‰ํ† ๋ฆฌ์šฉ)
def get_pdf_files():
pdf_files = []
if PDF_DIR.exists():
pdf_files = [f for f in PDF_DIR.glob("*.pdf")]
return pdf_files
# ์˜๊ตฌ ์ €์žฅ์†Œ์˜ PDF ํŒŒ์ผ ๋ชฉ๋ก ๊ฐ€์ ธ์˜ค๊ธฐ
def get_permanent_pdf_files():
pdf_files = []
if PERMANENT_PDF_DIR.exists():
pdf_files = [f for f in PERMANENT_PDF_DIR.glob("*.pdf")]
return pdf_files
# PDF ์ธ๋„ค์ผ ์ƒ์„ฑ ๋ฐ ํ”„๋กœ์ ํŠธ ๋ฐ์ดํ„ฐ ์ค€๋น„
def generate_pdf_projects():
projects_data = []
# ๋ฉ”์ธ ๋””๋ ‰ํ† ๋ฆฌ์™€ ์˜๊ตฌ ์ €์žฅ์†Œ์˜ ํŒŒ์ผ๋“ค ๊ฐ€์ ธ์˜ค๊ธฐ
pdf_files = get_pdf_files()
permanent_pdf_files = get_permanent_pdf_files()
# ๋ชจ๋“  ํŒŒ์ผ ํ•ฉ์น˜๊ธฐ (ํŒŒ์ผ๋ช… ๊ธฐ์ค€์œผ๋กœ ์ค‘๋ณต ์ œ๊ฑฐ)
unique_files = {}
# ๋จผ์ € ๋ฉ”์ธ ๋””๋ ‰ํ† ๋ฆฌ์˜ ํŒŒ์ผ๋“ค ์ถ”๊ฐ€
for file in pdf_files:
unique_files[file.name] = file
# ์˜๊ตฌ ์ €์žฅ์†Œ์˜ ํŒŒ์ผ๋“ค ์ถ”๊ฐ€ (๋™์ผ ํŒŒ์ผ๋ช…์ด ์žˆ์œผ๋ฉด ์˜๊ตฌ ์ €์žฅ์†Œ ํŒŒ์ผ ์šฐ์„ )
for file in permanent_pdf_files:
unique_files[file.name] = file
# ์ค‘๋ณต ์ œ๊ฑฐ๋œ ํŒŒ์ผ๋“ค๋กœ ํ”„๋กœ์ ํŠธ ๋ฐ์ดํ„ฐ ์ƒ์„ฑ
for pdf_file in unique_files.values():
# ํ•ด๋‹น ํŒŒ์ผ์˜ PDF ID ์ฐพ๊ธฐ
pdf_id = None
for pid, path in pdf_metadata.items():
if os.path.basename(path) == pdf_file.name:
pdf_id = pid
break
# ID๊ฐ€ ์—†์œผ๋ฉด ์ƒˆ๋กœ ์ƒ์„ฑํ•˜๊ณ  ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ์— ์ถ”๊ฐ€
if not pdf_id:
pdf_id = generate_pdf_id(pdf_file.name)
pdf_metadata[pdf_id] = str(pdf_file)
save_pdf_metadata()
projects_data.append({
"path": str(pdf_file),
"name": pdf_file.stem,
"id": pdf_id,
"cached": pdf_file.stem in pdf_cache and pdf_cache[pdf_file.stem].get("status") == "completed"
})
return projects_data
# ์บ์‹œ ํŒŒ์ผ ๊ฒฝ๋กœ ์ƒ์„ฑ
def get_cache_path(pdf_name: str):
return CACHE_DIR / f"{pdf_name}_cache.json"
# ์ž„๋ฒ ๋”ฉ ์บ์‹œ ํŒŒ์ผ ๊ฒฝ๋กœ ์ƒ์„ฑ
def get_embedding_path(pdf_id: str):
return EMBEDDING_DIR / f"{pdf_id}_embedding.json"
# PDF ํ…์ŠคํŠธ ์ถ”์ถœ ํ•จ์ˆ˜
def extract_pdf_text(pdf_path: str) -> List[Dict[str, Any]]:
try:
doc = fitz.open(pdf_path)
chunks = []
for page_num in range(len(doc)):
page = doc[page_num]
text = page.get_text()
# ํŽ˜์ด์ง€ ํ…์ŠคํŠธ๊ฐ€ ์žˆ๋Š” ๊ฒฝ์šฐ๋งŒ ์ถ”๊ฐ€
if text.strip():
chunks.append({
"page": page_num + 1,
"text": text,
"chunk_id": f"page_{page_num + 1}"
})
return chunks
except Exception as e:
logger.error(f"PDF ํ…์ŠคํŠธ ์ถ”์ถœ ์˜ค๋ฅ˜: {e}")
return []
# PDF ID๋กœ ์ž„๋ฒ ๋”ฉ ์ƒ์„ฑ ๋˜๋Š” ๊ฐ€์ ธ์˜ค๊ธฐ
async def get_pdf_embedding(pdf_id: str) -> Dict[str, Any]:
try:
# ์ž„๋ฒ ๋”ฉ ์บ์‹œ ํ™•์ธ
embedding_path = get_embedding_path(pdf_id)
if embedding_path.exists():
try:
with open(embedding_path, "r", encoding="utf-8") as f:
return json.load(f)
except Exception as e:
logger.error(f"์ž„๋ฒ ๋”ฉ ์บ์‹œ ๋กœ๋“œ ์˜ค๋ฅ˜: {e}")
# PDF ๊ฒฝ๋กœ ์ฐพ๊ธฐ
pdf_path = get_pdf_path_by_id(pdf_id)
if not pdf_path:
raise ValueError(f"PDF ID {pdf_id}์— ํ•ด๋‹นํ•˜๋Š” ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค")
# ํ…์ŠคํŠธ ์ถ”์ถœ
chunks = extract_pdf_text(pdf_path)
if not chunks:
raise ValueError(f"PDF์—์„œ ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {pdf_path}")
# ์ž„๋ฒ ๋”ฉ ์ €์žฅ ๋ฐ ๋ฐ˜ํ™˜
embedding_data = {
"pdf_id": pdf_id,
"pdf_path": pdf_path,
"chunks": chunks,
"created_at": time.time()
}
# ์ž„๋ฒ ๋”ฉ ์บ์‹œ ์ €์žฅ
with open(embedding_path, "w", encoding="utf-8") as f:
json.dump(embedding_data, f, ensure_ascii=False)
return embedding_data
except Exception as e:
logger.error(f"PDF ์ž„๋ฒ ๋”ฉ ์ƒ์„ฑ ์˜ค๋ฅ˜: {e}")
return {"error": str(e), "pdf_id": pdf_id}
# PDF ๋‚ด์šฉ ๊ธฐ๋ฐ˜ ์งˆ์˜์‘๋‹ต
# PDF ๋‚ด์šฉ ๊ธฐ๋ฐ˜ ์งˆ์˜์‘๋‹ต ํ•จ์ˆ˜ ๊ฐœ์„ 
async def query_pdf(pdf_id: str, query: str) -> Dict[str, Any]:
try:
# API ํ‚ค๊ฐ€ ์—†๊ฑฐ๋‚˜ ์œ ํšจํ•˜์ง€ ์•Š์€ ๊ฒฝ์šฐ
if not HAS_VALID_API_KEY or not openai_client:
return {
"error": "OpenAI API ํ‚ค๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.",
"answer": "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ํ˜„์žฌ AI ๊ธฐ๋Šฅ์ด ๋น„ํ™œ์„ฑํ™”๋˜์–ด ์žˆ์–ด ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ์‹œ์Šคํ…œ ๊ด€๋ฆฌ์ž์—๊ฒŒ ๋ฌธ์˜ํ•˜์„ธ์š”."
}
# ์ž„๋ฒ ๋”ฉ ๋ฐ์ดํ„ฐ ๊ฐ€์ ธ์˜ค๊ธฐ
embedding_data = await get_pdf_embedding(pdf_id)
if "error" in embedding_data:
return {"error": embedding_data["error"]}
# ์ฒญํฌ ํ…์ŠคํŠธ ๋ชจ์œผ๊ธฐ (์ž„์‹œ๋กœ ๊ฐ„๋‹จํ•˜๊ฒŒ ์ „์ฒด ํ…์ŠคํŠธ ์‚ฌ์šฉ)
all_text = "\n\n".join([f"Page {chunk['page']}: {chunk['text']}" for chunk in embedding_data["chunks"]])
# ์ปจํ…์ŠคํŠธ ํฌ๊ธฐ๋ฅผ ๊ณ ๋ คํ•˜์—ฌ ํ…์ŠคํŠธ๊ฐ€ ๋„ˆ๋ฌด ๊ธธ๋ฉด ์•ž๋ถ€๋ถ„๋งŒ ์‚ฌ์šฉ
max_context_length = 60000 # ํ† ํฐ ์ˆ˜๊ฐ€ ์•„๋‹Œ ๋ฌธ์ž ์ˆ˜ ๊ธฐ์ค€ (๋Œ€๋žต์ ์ธ ์ œํ•œ)
if len(all_text) > max_context_length:
all_text = all_text[:max_context_length] + "...(์ดํ•˜ ์ƒ๋žต)"
# ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ์ค€๋น„
system_prompt = """
The default language is set to English. However, please respond in the language used in the user's prompt (e.g., English, Korean, Japanese, Chinese, etc.).
You are an assistant that answers questions based solely on the provided PDF context. Please use only the information from the provided PDF content to respond. If relevant information is not available in the context, honestly reply with, "The requested information could not be found in the provided PDF."
Please ensure your responses are clear and concise, citing relevant page numbers. Always respond politely and courteously.
"""
# gpt-4.1-mini ๋ชจ๋ธ ์‚ฌ์šฉ
try:
# ํƒ€์ž„์•„์›ƒ ๋ฐ ์žฌ์‹œ๋„ ์„ค์ • ๊ฐœ์„ 
for attempt in range(3): # ์ตœ๋Œ€ 3๋ฒˆ ์žฌ์‹œ๋„
try:
response = openai_client.chat.completions.create(
model="gpt-4.1-mini",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": f"The default language is set to English.๋‹ค์Œ PDF ๋‚ด์šฉ์„ ์ฐธ๊ณ ํ•˜์—ฌ ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•ด์ฃผ์„ธ์š”.\n\nPDF ๋‚ด์šฉ:\n{all_text}\n\n์งˆ๋ฌธ: {query}"}
],
temperature=0.7,
max_tokens=2048,
timeout=30.0 # 30์ดˆ ํƒ€์ž„์•„์›ƒ
)
answer = response.choices[0].message.content
return {
"answer": answer,
"pdf_id": pdf_id,
"query": query
}
except Exception as api_error:
logger.error(f"OpenAI API ํ˜ธ์ถœ ์˜ค๋ฅ˜ (์‹œ๋„ {attempt+1}/3): {api_error}")
if attempt == 2: # ๋งˆ์ง€๋ง‰ ์‹œ๋„์—์„œ๋„ ์‹คํŒจ
raise api_error
await asyncio.sleep(1 * (attempt + 1)) # ์žฌ์‹œ๋„ ๊ฐ„ ์ง€์—ฐ ์‹œ๊ฐ„ ์ฆ๊ฐ€
# ์—ฌ๊ธฐ๊นŒ์ง€ ๋„๋‹ฌํ•˜์ง€ ์•Š์•„์•ผ ํ•จ
raise Exception("API ํ˜ธ์ถœ ์žฌ์‹œ๋„ ๋ชจ๋‘ ์‹คํŒจ")
except Exception as api_error:
logger.error(f"OpenAI API ํ˜ธ์ถœ ์ตœ์ข… ์˜ค๋ฅ˜: {api_error}")
# ์˜ค๋ฅ˜ ์œ ํ˜•์— ๋”ฐ๋ฅธ ๋” ๋ช…ํ™•ํ•œ ๋ฉ”์‹œ์ง€ ์ œ๊ณต
error_message = str(api_error)
if "Connection" in error_message:
return {"error": "OpenAI ์„œ๋ฒ„์™€ ์—ฐ๊ฒฐํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ์ธํ„ฐ๋„ท ์—ฐ๊ฒฐ์„ ํ™•์ธํ•˜์„ธ์š”."}
elif "Unauthorized" in error_message or "Authentication" in error_message:
return {"error": "API ํ‚ค๊ฐ€ ์œ ํšจํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค."}
elif "Rate limit" in error_message:
return {"error": "API ํ˜ธ์ถœ ํ•œ๋„๋ฅผ ์ดˆ๊ณผํ–ˆ์Šต๋‹ˆ๋‹ค. ์ž ์‹œ ํ›„ ๋‹ค์‹œ ์‹œ๋„ํ•˜์„ธ์š”."}
else:
return {"error": f"AI ์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {error_message}"}
except Exception as e:
logger.error(f"์งˆ์˜์‘๋‹ต ์ฒ˜๋ฆฌ ์˜ค๋ฅ˜: {e}")
return {"error": str(e)}
# PDF ์š”์•ฝ ์ƒ์„ฑ
# PDF ์š”์•ฝ ์ƒ์„ฑ ํ•จ์ˆ˜ ๊ฐœ์„ 
async def summarize_pdf(pdf_id: str) -> Dict[str, Any]:
try:
# API ํ‚ค๊ฐ€ ์—†๊ฑฐ๋‚˜ ์œ ํšจํ•˜์ง€ ์•Š์€ ๊ฒฝ์šฐ
if not HAS_VALID_API_KEY or not openai_client:
return {
"error": "OpenAI API ํ‚ค๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. 'LLM_API' ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋ฅผ ํ™•์ธํ•˜์„ธ์š”.",
"summary": "API ํ‚ค๊ฐ€ ์—†์–ด ์š”์•ฝ์„ ์ƒ์„ฑํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ์‹œ์Šคํ…œ ๊ด€๋ฆฌ์ž์—๊ฒŒ ๋ฌธ์˜ํ•˜์„ธ์š”."
}
# ์ž„๋ฒ ๋”ฉ ๋ฐ์ดํ„ฐ ๊ฐ€์ ธ์˜ค๊ธฐ
embedding_data = await get_pdf_embedding(pdf_id)
if "error" in embedding_data:
return {"error": embedding_data["error"], "summary": "PDF์—์„œ ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."}
# ์ฒญํฌ ํ…์ŠคํŠธ ๋ชจ์œผ๊ธฐ (์ œํ•œ๋œ ๊ธธ์ด)
all_text = "\n\n".join([f"Page {chunk['page']}: {chunk['text']}" for chunk in embedding_data["chunks"]])
# ์ปจํ…์ŠคํŠธ ํฌ๊ธฐ๋ฅผ ๊ณ ๋ คํ•˜์—ฌ ํ…์ŠคํŠธ๊ฐ€ ๋„ˆ๋ฌด ๊ธธ๋ฉด ์•ž๋ถ€๋ถ„๋งŒ ์‚ฌ์šฉ
max_context_length = 60000 # ํ† ํฐ ์ˆ˜๊ฐ€ ์•„๋‹Œ ๋ฌธ์ž ์ˆ˜ ๊ธฐ์ค€ (๋Œ€๋žต์ ์ธ ์ œํ•œ)
if len(all_text) > max_context_length:
all_text = all_text[:max_context_length] + "...(์ดํ•˜ ์ƒ๋žต)"
# OpenAI API ํ˜ธ์ถœ
try:
# ํƒ€์ž„์•„์›ƒ ๋ฐ ์žฌ์‹œ๋„ ์„ค์ • ๊ฐœ์„ 
for attempt in range(3): # ์ตœ๋Œ€ 3๋ฒˆ ์žฌ์‹œ๋„
try:
response = openai_client.chat.completions.create(
model="gpt-4.1-mini",
messages=[
{"role": "system", "content": "The default language is set to English. ๋‹ค์Œ PDF ๋‚ด์šฉ์„ ๊ฐ„๊ฒฐํ•˜๊ฒŒ ์š”์•ฝํ•ด์ฃผ์„ธ์š”. ํ•ต์‹ฌ ์ฃผ์ œ์™€ ์ฃผ์š” ํฌ์ธํŠธ๋ฅผ ํฌํ•จํ•œ ์š”์•ฝ์„ 500์ž ์ด๋‚ด๋กœ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”."},
{"role": "user", "content": f"PDF ๋‚ด์šฉ:\n{all_text}"}
],
temperature=0.7,
max_tokens=1024,
timeout=30.0 # 30์ดˆ ํƒ€์ž„์•„์›ƒ
)
summary = response.choices[0].message.content
return {
"summary": summary,
"pdf_id": pdf_id
}
except Exception as api_error:
logger.error(f"OpenAI API ํ˜ธ์ถœ ์˜ค๋ฅ˜ (์‹œ๋„ {attempt+1}/3): {api_error}")
if attempt == 2: # ๋งˆ์ง€๋ง‰ ์‹œ๋„์—์„œ๋„ ์‹คํŒจ
raise api_error
await asyncio.sleep(1 * (attempt + 1)) # ์žฌ์‹œ๋„ ๊ฐ„ ์ง€์—ฐ ์‹œ๊ฐ„ ์ฆ๊ฐ€
# ์—ฌ๊ธฐ๊นŒ์ง€ ๋„๋‹ฌํ•˜์ง€ ์•Š์•„์•ผ ํ•จ
raise Exception("API ํ˜ธ์ถœ ์žฌ์‹œ๋„ ๋ชจ๋‘ ์‹คํŒจ")
except Exception as api_error:
logger.error(f"OpenAI API ํ˜ธ์ถœ ์ตœ์ข… ์˜ค๋ฅ˜: {api_error}")
# ์˜ค๋ฅ˜ ์œ ํ˜•์— ๋”ฐ๋ฅธ ๋” ๋ช…ํ™•ํ•œ ๋ฉ”์‹œ์ง€ ์ œ๊ณต
error_message = str(api_error)
if "Connection" in error_message:
return {"error": "OpenAI ์„œ๋ฒ„์™€ ์—ฐ๊ฒฐํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ์ธํ„ฐ๋„ท ์—ฐ๊ฒฐ์„ ํ™•์ธํ•˜์„ธ์š”.", "pdf_id": pdf_id}
elif "Unauthorized" in error_message or "Authentication" in error_message:
return {"error": "API ํ‚ค๊ฐ€ ์œ ํšจํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.", "pdf_id": pdf_id}
elif "Rate limit" in error_message:
return {"error": "API ํ˜ธ์ถœ ํ•œ๋„๋ฅผ ์ดˆ๊ณผํ–ˆ์Šต๋‹ˆ๋‹ค. ์ž ์‹œ ํ›„ ๋‹ค์‹œ ์‹œ๋„ํ•˜์„ธ์š”.", "pdf_id": pdf_id}
else:
return {"error": f"AI ์š”์•ฝ ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {error_message}", "pdf_id": pdf_id}
except Exception as e:
logger.error(f"PDF ์š”์•ฝ ์ƒ์„ฑ ์˜ค๋ฅ˜: {e}")
return {
"error": str(e),
"summary": "PDF ์š”์•ฝ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค. PDF ํŽ˜์ด์ง€ ์ˆ˜๊ฐ€ ๋„ˆ๋ฌด ๋งŽ๊ฑฐ๋‚˜ ํ˜•์‹์ด ์ง€์›๋˜์ง€ ์•Š์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค."
}
# ์ตœ์ ํ™”๋œ PDF ํŽ˜์ด์ง€ ์บ์‹ฑ ํ•จ์ˆ˜
async def cache_pdf(pdf_path: str):
try:
import fitz # PyMuPDF
pdf_file = pathlib.Path(pdf_path)
pdf_name = pdf_file.stem
# ๋ฝ ์ƒ์„ฑ - ๋™์ผํ•œ PDF์— ๋Œ€ํ•ด ๋™์‹œ ์บ์‹ฑ ๋ฐฉ์ง€
if pdf_name not in cache_locks:
cache_locks[pdf_name] = threading.Lock()
# ์ด๋ฏธ ์บ์‹ฑ ์ค‘์ด๊ฑฐ๋‚˜ ์บ์‹ฑ ์™„๋ฃŒ๋œ PDF๋Š” ๊ฑด๋„ˆ๋›ฐ๊ธฐ
if pdf_name in pdf_cache and pdf_cache[pdf_name].get("status") in ["processing", "completed"]:
logger.info(f"PDF {pdf_name} ์ด๋ฏธ ์บ์‹ฑ ์™„๋ฃŒ ๋˜๋Š” ์ง„ํ–‰ ์ค‘")
return
with cache_locks[pdf_name]:
# ์ด์ค‘ ์ฒดํฌ - ๋ฝ ํš๋“ ํ›„ ๋‹ค์‹œ ํ™•์ธ
if pdf_name in pdf_cache and pdf_cache[pdf_name].get("status") in ["processing", "completed"]:
return
# ์บ์‹œ ์ƒํƒœ ์—…๋ฐ์ดํŠธ
pdf_cache[pdf_name] = {"status": "processing", "progress": 0, "pages": []}
# ์บ์‹œ ํŒŒ์ผ์ด ์ด๋ฏธ ์กด์žฌํ•˜๋Š”์ง€ ํ™•์ธ
cache_path = get_cache_path(pdf_name)
if cache_path.exists():
try:
with open(cache_path, "r") as cache_file:
cached_data = json.load(cache_file)
if cached_data.get("status") == "completed" and cached_data.get("pages"):
pdf_cache[pdf_name] = cached_data
pdf_cache[pdf_name]["status"] = "completed"
logger.info(f"์บ์‹œ ํŒŒ์ผ์—์„œ {pdf_name} ๋กœ๋“œ ์™„๋ฃŒ")
return
except Exception as e:
logger.error(f"์บ์‹œ ํŒŒ์ผ ๋กœ๋“œ ์‹คํŒจ: {e}")
# PDF ํŒŒ์ผ ์—ด๊ธฐ
doc = fitz.open(pdf_path)
total_pages = doc.page_count
# ๋ฏธ๋ฆฌ ์ธ๋„ค์ผ๋งŒ ๋จผ์ € ์ƒ์„ฑ (๋น ๋ฅธ UI ๋กœ๋”ฉ์šฉ)
if total_pages > 0:
# ์ฒซ ํŽ˜์ด์ง€ ์ธ๋„ค์ผ ์ƒ์„ฑ
page = doc[0]
pix_thumb = page.get_pixmap(matrix=fitz.Matrix(0.2, 0.2)) # ๋” ์ž‘์€ ์ธ๋„ค์ผ
thumb_data = pix_thumb.tobytes("png")
b64_thumb = base64.b64encode(thumb_data).decode('utf-8')
thumb_src = f"data:image/png;base64,{b64_thumb}"
# ์ธ๋„ค์ผ ํŽ˜์ด์ง€๋งŒ ๋จผ์ € ์บ์‹œ
pdf_cache[pdf_name]["pages"] = [{"thumb": thumb_src, "src": ""}]
pdf_cache[pdf_name]["progress"] = 1
pdf_cache[pdf_name]["total_pages"] = total_pages
# ์ด๋ฏธ์ง€ ํ•ด์ƒ๋„ ๋ฐ ์••์ถ• ํ’ˆ์งˆ ์„ค์ • (์„ฑ๋Šฅ ์ตœ์ ํ™”)
scale_factor = 1.0 # ๊ธฐ๋ณธ ํ•ด์ƒ๋„ (๋‚ฎ์ถœ์ˆ˜๋ก ๋กœ๋”ฉ ๋น ๋ฆ„)
jpeg_quality = 80 # JPEG ํ’ˆ์งˆ (๋‚ฎ์ถœ์ˆ˜๋ก ์šฉ๋Ÿ‰ ์ž‘์•„์ง)
# ํŽ˜์ด์ง€ ์ฒ˜๋ฆฌ ์ž‘์—…์ž ํ•จ์ˆ˜ (๋ณ‘๋ ฌ ์ฒ˜๋ฆฌ์šฉ)
def process_page(page_num):
try:
page = doc[page_num]
# ์ด๋ฏธ์ง€๋กœ ๋ณ€ํ™˜ ์‹œ ๋งคํŠธ๋ฆญ์Šค ์Šค์ผ€์ผ๋ง ์ ์šฉ (์„ฑ๋Šฅ ์ตœ์ ํ™”)
pix = page.get_pixmap(matrix=fitz.Matrix(scale_factor, scale_factor))
# JPEG ํ˜•์‹์œผ๋กœ ์ธ์ฝ”๋”ฉ (PNG๋ณด๋‹ค ํฌ๊ธฐ ์ž‘์Œ)
img_data = pix.tobytes("jpeg", jpeg_quality)
b64_img = base64.b64encode(img_data).decode('utf-8')
img_src = f"data:image/jpeg;base64,{b64_img}"
# ์ธ๋„ค์ผ (์ฒซ ํŽ˜์ด์ง€๊ฐ€ ์•„๋‹ˆ๋ฉด ๋นˆ ๋ฌธ์ž์—ด)
thumb_src = "" if page_num > 0 else pdf_cache[pdf_name]["pages"][0]["thumb"]
return {
"page_num": page_num,
"src": img_src,
"thumb": thumb_src
}
except Exception as e:
logger.error(f"ํŽ˜์ด์ง€ {page_num} ์ฒ˜๋ฆฌ ์˜ค๋ฅ˜: {e}")
return {
"page_num": page_num,
"src": "",
"thumb": "",
"error": str(e)
}
# ๋ณ‘๋ ฌ ์ฒ˜๋ฆฌ๋กœ ๋ชจ๋“  ํŽ˜์ด์ง€ ์ฒ˜๋ฆฌ
pages = [None] * total_pages
processed_count = 0
# ํŽ˜์ด์ง€ ๋ฐฐ์น˜ ์ฒ˜๋ฆฌ (๋ฉ”๋ชจ๋ฆฌ ๊ด€๋ฆฌ)
batch_size = 5 # ํ•œ ๋ฒˆ์— ์ฒ˜๋ฆฌํ•  ํŽ˜์ด์ง€ ์ˆ˜
for batch_start in range(0, total_pages, batch_size):
batch_end = min(batch_start + batch_size, total_pages)
current_batch = list(range(batch_start, batch_end))
# ๋ณ‘๋ ฌ ์ฒ˜๋ฆฌ๋กœ ๋ฐฐ์น˜ ํŽ˜์ด์ง€ ๋ Œ๋”๋ง
with concurrent.futures.ThreadPoolExecutor(max_workers=min(5, batch_size)) as executor:
batch_results = list(executor.map(process_page, current_batch))
# ๊ฒฐ๊ณผ ์ €์žฅ
for result in batch_results:
page_num = result["page_num"]
pages[page_num] = {
"src": result["src"],
"thumb": result["thumb"]
}
processed_count += 1
progress = round(processed_count / total_pages * 100)
pdf_cache[pdf_name]["progress"] = progress
# ์ค‘๊ฐ„ ์ €์žฅ
pdf_cache[pdf_name]["pages"] = pages
try:
with open(cache_path, "w") as cache_file:
json.dump({
"status": "processing",
"progress": pdf_cache[pdf_name]["progress"],
"pages": pdf_cache[pdf_name]["pages"],
"total_pages": total_pages
}, cache_file)
except Exception as e:
logger.error(f"์ค‘๊ฐ„ ์บ์‹œ ์ €์žฅ ์‹คํŒจ: {e}")
# ์บ์‹ฑ ์™„๋ฃŒ
pdf_cache[pdf_name] = {
"status": "completed",
"progress": 100,
"pages": pages,
"total_pages": total_pages
}
# ์ตœ์ข… ์บ์‹œ ํŒŒ์ผ ์ €์žฅ
try:
with open(cache_path, "w") as cache_file:
json.dump(pdf_cache[pdf_name], cache_file)
logger.info(f"PDF {pdf_name} ์บ์‹ฑ ์™„๋ฃŒ, {total_pages}ํŽ˜์ด์ง€")
except Exception as e:
logger.error(f"์ตœ์ข… ์บ์‹œ ์ €์žฅ ์‹คํŒจ: {e}")
except Exception as e:
import traceback
logger.error(f"PDF ์บ์‹ฑ ์˜ค๋ฅ˜: {str(e)}\n{traceback.format_exc()}")
if pdf_name in pdf_cache:
pdf_cache[pdf_name]["status"] = "error"
pdf_cache[pdf_name]["error"] = str(e)
# PDF ID๋กœ PDF ๊ฒฝ๋กœ ์ฐพ๊ธฐ (๊ฐœ์„ ๋œ ๊ฒ€์ƒ‰ ๋กœ์ง)
def get_pdf_path_by_id(pdf_id: str) -> str:
logger.info(f"PDF ID๋กœ ํŒŒ์ผ ์กฐํšŒ: {pdf_id}")
# 1. ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ์—์„œ ์ง์ ‘ ID๋กœ ๊ฒ€์ƒ‰
if pdf_id in pdf_metadata:
path = pdf_metadata[pdf_id]
# ํŒŒ์ผ ์กด์žฌ ํ™•์ธ
if os.path.exists(path):
return path
# ํŒŒ์ผ์ด ์ด๋™ํ–ˆ์„ ์ˆ˜ ์žˆ์œผ๋ฏ€๋กœ ํŒŒ์ผ๋ช…์œผ๋กœ ๊ฒ€์ƒ‰
filename = os.path.basename(path)
# ์˜๊ตฌ ์ €์žฅ์†Œ์—์„œ ๊ฒ€์ƒ‰
perm_path = PERMANENT_PDF_DIR / filename
if perm_path.exists():
# ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ์—…๋ฐ์ดํŠธ
pdf_metadata[pdf_id] = str(perm_path)
save_pdf_metadata()
return str(perm_path)
# ๋ฉ”์ธ ๋””๋ ‰ํ† ๋ฆฌ์—์„œ ๊ฒ€์ƒ‰
main_path = PDF_DIR / filename
if main_path.exists():
# ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ์—…๋ฐ์ดํŠธ
pdf_metadata[pdf_id] = str(main_path)
save_pdf_metadata()
return str(main_path)
# 2. ํŒŒ์ผ๋ช… ๋ถ€๋ถ„๋งŒ ์ถ”์ถœํ•˜์—ฌ ๋ชจ๋“  PDF ํŒŒ์ผ ๊ฒ€์ƒ‰
try:
# ID ํ˜•์‹: filename_timestamp_random
# ํŒŒ์ผ๋ช… ๋ถ€๋ถ„๋งŒ ์ถ”์ถœ
name_part = pdf_id.split('_')[0] if '_' in pdf_id else pdf_id
# ๋ชจ๋“  PDF ํŒŒ์ผ ๊ฒ€์ƒ‰
for file_path in get_pdf_files() + get_permanent_pdf_files():
# ํŒŒ์ผ๋ช…์ด ID์˜ ์‹œ์ž‘ ๋ถ€๋ถ„๊ณผ ์ผ์น˜ํ•˜๋ฉด
file_basename = os.path.basename(file_path)
if file_basename.startswith(name_part) or file_path.stem.startswith(name_part):
# ID ๋งคํ•‘ ์—…๋ฐ์ดํŠธ
pdf_metadata[pdf_id] = str(file_path)
save_pdf_metadata()
return str(file_path)
except Exception as e:
logger.error(f"ํŒŒ์ผ๋ช… ๊ฒ€์ƒ‰ ์ค‘ ์˜ค๋ฅ˜: {e}")
# 3. ๋ชจ๋“  PDF ํŒŒ์ผ์— ๋Œ€ํ•ด ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ํ™•์ธ
for pid, path in pdf_metadata.items():
if os.path.exists(path):
file_basename = os.path.basename(path)
# ์œ ์‚ฌํ•œ ํŒŒ์ผ๋ช…์„ ๊ฐ€์ง„ ๊ฒฝ์šฐ
if pdf_id in pid or pid in pdf_id:
pdf_metadata[pdf_id] = path
save_pdf_metadata()
return path
return None
# ์‹œ์ž‘ ์‹œ ๋ชจ๋“  PDF ํŒŒ์ผ ์บ์‹ฑ
async def init_cache_all_pdfs():
logger.info("PDF ์บ์‹ฑ ์ž‘์—… ์‹œ์ž‘")
# PDF ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ๋กœ๋“œ
load_pdf_metadata()
# ๋ฉ”์ธ ๋ฐ ์˜๊ตฌ ๋””๋ ‰ํ† ๋ฆฌ์—์„œ PDF ํŒŒ์ผ ๋ชจ๋‘ ๊ฐ€์ ธ์˜ค๊ธฐ
pdf_files = get_pdf_files() + get_permanent_pdf_files()
# ์ค‘๋ณต ์ œ๊ฑฐ
unique_pdf_paths = set(str(p) for p in pdf_files)
pdf_files = [pathlib.Path(p) for p in unique_pdf_paths]
# ํŒŒ์ผ ๊ธฐ๋ฐ˜ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ์—…๋ฐ์ดํŠธ
for pdf_file in pdf_files:
# ID๊ฐ€ ์—†๋Š” ํŒŒ์ผ์— ๋Œ€ํ•ด ID ์ƒ์„ฑ
found = False
for pid, path in pdf_metadata.items():
if os.path.basename(path) == pdf_file.name:
found = True
# ๊ฒฝ๋กœ ์—…๋ฐ์ดํŠธ ํ•„์š”ํ•œ ๊ฒฝ์šฐ
if not os.path.exists(path):
pdf_metadata[pid] = str(pdf_file)
break
if not found:
pdf_id = generate_pdf_id(pdf_file.name)
pdf_metadata[pdf_id] = str(pdf_file)
# ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ์ €์žฅ
save_pdf_metadata()
# ์ด๋ฏธ ์บ์‹œ๋œ PDF ํŒŒ์ผ ๋กœ๋“œ (๋น ๋ฅธ ์‹œ์ž‘์„ ์œ„ํ•ด ๋จผ์ € ์ˆ˜ํ–‰)
for cache_file in CACHE_DIR.glob("*_cache.json"):
try:
pdf_name = cache_file.stem.replace("_cache", "")
with open(cache_file, "r") as f:
cached_data = json.load(f)
if cached_data.get("status") == "completed" and cached_data.get("pages"):
pdf_cache[pdf_name] = cached_data
pdf_cache[pdf_name]["status"] = "completed"
logger.info(f"๊ธฐ์กด ์บ์‹œ ๋กœ๋“œ: {pdf_name}")
except Exception as e:
logger.error(f"์บ์‹œ ํŒŒ์ผ ๋กœ๋“œ ์˜ค๋ฅ˜: {str(e)}")
# ์บ์‹ฑ๋˜์ง€ ์•Š์€ PDF ํŒŒ์ผ ๋ณ‘๋ ฌ ์ฒ˜๋ฆฌ
await asyncio.gather(*[asyncio.create_task(cache_pdf(str(pdf_file)))
for pdf_file in pdf_files
if pdf_file.stem not in pdf_cache
or pdf_cache[pdf_file.stem].get("status") != "completed"])
# ๋ฐฑ๊ทธ๋ผ์šด๋“œ ์ž‘์—… ์‹œ์ž‘ ํ•จ์ˆ˜
@app.on_event("startup")
async def startup_event():
# PDF ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ๋กœ๋“œ
load_pdf_metadata()
# ๋ˆ„๋ฝ๋œ PDF ํŒŒ์ผ์— ๋Œ€ํ•œ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ์ƒ์„ฑ
for pdf_file in get_pdf_files() + get_permanent_pdf_files():
found = False
for pid, path in pdf_metadata.items():
if os.path.basename(path) == pdf_file.name:
found = True
# ๊ฒฝ๋กœ ์—…๋ฐ์ดํŠธ
if not os.path.exists(path):
pdf_metadata[pid] = str(pdf_file)
break
if not found:
# ์ƒˆ ID ์ƒ์„ฑ ๋ฐ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ์— ์ถ”๊ฐ€
pdf_id = generate_pdf_id(pdf_file.name)
pdf_metadata[pdf_id] = str(pdf_file)
# ๋ณ€๊ฒฝ์‚ฌํ•ญ ์ €์žฅ
save_pdf_metadata()
# ๋ฐฑ๊ทธ๋ผ์šด๋“œ ํƒœ์Šคํฌ๋กœ ์บ์‹ฑ ์‹คํ–‰
asyncio.create_task(init_cache_all_pdfs())
# API ์—”๋“œํฌ์ธํŠธ: PDF ํ”„๋กœ์ ํŠธ ๋ชฉ๋ก
@app.get("/api/pdf-projects")
async def get_pdf_projects_api():
return generate_pdf_projects()
# API ์—”๋“œํฌ์ธํŠธ: ์˜๊ตฌ ์ €์žฅ๋œ PDF ํ”„๋กœ์ ํŠธ ๋ชฉ๋ก
@app.get("/api/permanent-pdf-projects")
async def get_permanent_pdf_projects_api():
pdf_files = get_permanent_pdf_files()
projects_data = []
for pdf_file in pdf_files:
# PDF ID ์ฐพ๊ธฐ
pdf_id = None
for pid, path in pdf_metadata.items():
if os.path.basename(path) == pdf_file.name:
pdf_id = pid
break
# ID๊ฐ€ ์—†์œผ๋ฉด ์ƒ์„ฑ
if not pdf_id:
pdf_id = generate_pdf_id(pdf_file.name)
pdf_metadata[pdf_id] = str(pdf_file)
save_pdf_metadata()
projects_data.append({
"path": str(pdf_file),
"name": pdf_file.stem,
"id": pdf_id,
"cached": pdf_file.stem in pdf_cache and pdf_cache[pdf_file.stem].get("status") == "completed"
})
return projects_data
# API ์—”๋“œํฌ์ธํŠธ: PDF ID๋กœ ์ •๋ณด ๊ฐ€์ ธ์˜ค๊ธฐ
@app.get("/api/pdf-info-by-id/{pdf_id}")
async def get_pdf_info_by_id(pdf_id: str):
pdf_path = get_pdf_path_by_id(pdf_id)
if pdf_path:
pdf_file = pathlib.Path(pdf_path)
return {
"path": pdf_path,
"name": pdf_file.stem,
"id": pdf_id,
"exists": True,
"cached": pdf_file.stem in pdf_cache and pdf_cache[pdf_file.stem].get("status") == "completed"
}
return {"exists": False, "error": "PDF๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค"}
# API ์—”๋“œํฌ์ธํŠธ: PDF ์ธ๋„ค์ผ ์ œ๊ณต (์ตœ์ ํ™”)
@app.get("/api/pdf-thumbnail")
async def get_pdf_thumbnail(path: str):
try:
pdf_file = pathlib.Path(path)
pdf_name = pdf_file.stem
# ์บ์‹œ์—์„œ ์ธ๋„ค์ผ ๊ฐ€์ ธ์˜ค๊ธฐ
if pdf_name in pdf_cache and pdf_cache[pdf_name].get("pages"):
if pdf_cache[pdf_name]["pages"][0].get("thumb"):
return {"thumbnail": pdf_cache[pdf_name]["pages"][0]["thumb"]}
# ์บ์‹œ์— ์—†์œผ๋ฉด ์ƒ์„ฑ (๋” ์ž‘๊ณ  ๋น ๋ฅธ ์ธ๋„ค์ผ)
import fitz
doc = fitz.open(path)
if doc.page_count > 0:
page = doc[0]
pix = page.get_pixmap(matrix=fitz.Matrix(0.2, 0.2)) # ๋” ์ž‘์€ ์ธ๋„ค์ผ
img_data = pix.tobytes("jpeg", 70) # JPEG ์••์ถ• ์‚ฌ์šฉ
b64_img = base64.b64encode(img_data).decode('utf-8')
# ๋ฐฑ๊ทธ๋ผ์šด๋“œ์—์„œ ์บ์‹ฑ ์‹œ์ž‘
asyncio.create_task(cache_pdf(path))
return {"thumbnail": f"data:image/jpeg;base64,{b64_img}"}
return {"thumbnail": None}
except Exception as e:
logger.error(f"์ธ๋„ค์ผ ์ƒ์„ฑ ์˜ค๋ฅ˜: {str(e)}")
return {"error": str(e), "thumbnail": None}
# API ์—”๋“œํฌ์ธํŠธ: ์บ์‹œ ์ƒํƒœ ํ™•์ธ
@app.get("/api/cache-status")
async def get_cache_status(path: str = None):
if path:
pdf_file = pathlib.Path(path)
pdf_name = pdf_file.stem
if pdf_name in pdf_cache:
return pdf_cache[pdf_name]
return {"status": "not_cached"}
else:
return {name: {"status": info["status"], "progress": info.get("progress", 0)}
for name, info in pdf_cache.items()}
# API ์—”๋“œํฌ์ธํŠธ: PDF์— ๋Œ€ํ•œ ์งˆ์˜์‘๋‹ต
@app.post("/api/ai/query-pdf/{pdf_id}")
async def api_query_pdf(pdf_id: str, query: Dict[str, str]):
try:
user_query = query.get("query", "")
if not user_query:
return JSONResponse(content={"error": "์งˆ๋ฌธ์ด ์ œ๊ณต๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค"}, status_code=400)
# PDF ๊ฒฝ๋กœ ํ™•์ธ
pdf_path = get_pdf_path_by_id(pdf_id)
if not pdf_path:
return JSONResponse(content={"error": f"PDF ID {pdf_id}์— ํ•ด๋‹นํ•˜๋Š” ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค"}, status_code=404)
# ์งˆ์˜์‘๋‹ต ์ฒ˜๋ฆฌ
result = await query_pdf(pdf_id, user_query)
if "error" in result:
return JSONResponse(content={"error": result["error"]}, status_code=500)
return result
except Exception as e:
logger.error(f"์งˆ์˜์‘๋‹ต API ์˜ค๋ฅ˜: {e}")
return JSONResponse(content={"error": str(e)}, status_code=500)
# API ์—”๋“œํฌ์ธํŠธ: PDF ์š”์•ฝ
@app.get("/api/ai/summarize-pdf/{pdf_id}")
async def api_summarize_pdf(pdf_id: str):
try:
# PDF ๊ฒฝ๋กœ ํ™•์ธ
pdf_path = get_pdf_path_by_id(pdf_id)
if not pdf_path:
return JSONResponse(content={"error": f"PDF ID {pdf_id}์— ํ•ด๋‹นํ•˜๋Š” ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค"}, status_code=404)
# ์š”์•ฝ ์ฒ˜๋ฆฌ
result = await summarize_pdf(pdf_id)
if "error" in result:
return JSONResponse(content={"error": result["error"]}, status_code=500)
return result
except Exception as e:
logger.error(f"PDF ์š”์•ฝ API ์˜ค๋ฅ˜: {e}")
return JSONResponse(content={"error": str(e)}, status_code=500)
# API ์—”๋“œํฌ์ธํŠธ: ์บ์‹œ๋œ PDF ์ฝ˜ํ…์ธ  ์ œ๊ณต (์ ์ง„์  ๋กœ๋”ฉ ์ง€์›)
@app.get("/api/cached-pdf")
async def get_cached_pdf(path: str, background_tasks: BackgroundTasks):
try:
pdf_file = pathlib.Path(path)
pdf_name = pdf_file.stem
# ์บ์‹œ ํ™•์ธ
if pdf_name in pdf_cache:
status = pdf_cache[pdf_name].get("status", "")
# ์™„๋ฃŒ๋œ ๊ฒฝ์šฐ ์ „์ฒด ๋ฐ์ดํ„ฐ ๋ฐ˜ํ™˜
if status == "completed":
return pdf_cache[pdf_name]
# ์ฒ˜๋ฆฌ ์ค‘์ธ ๊ฒฝ์šฐ ํ˜„์žฌ๊นŒ์ง€์˜ ํŽ˜์ด์ง€ ๋ฐ์ดํ„ฐ ํฌํ•จ (์ ์ง„์  ๋กœ๋”ฉ)
elif status == "processing":
progress = pdf_cache[pdf_name].get("progress", 0)
pages = pdf_cache[pdf_name].get("pages", [])
total_pages = pdf_cache[pdf_name].get("total_pages", 0)
# ์ผ๋ถ€๋งŒ ์ฒ˜๋ฆฌ๋œ ๊ฒฝ์šฐ์—๋„ ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ํŽ˜์ด์ง€ ์ œ๊ณต
return {
"status": "processing",
"progress": progress,
"pages": pages,
"total_pages": total_pages,
"available_pages": len([p for p in pages if p and p.get("src")])
}
# ์บ์‹œ๊ฐ€ ์—†๋Š” ๊ฒฝ์šฐ ๋ฐฑ๊ทธ๋ผ์šด๋“œ์—์„œ ์บ์‹ฑ ์‹œ์ž‘
background_tasks.add_task(cache_pdf, path)
return {"status": "started", "progress": 0}
except Exception as e:
logger.error(f"์บ์‹œ๋œ PDF ์ œ๊ณต ์˜ค๋ฅ˜: {str(e)}")
return {"error": str(e), "status": "error"}
# API ์—”๋“œํฌ์ธํŠธ: PDF ์›๋ณธ ์ฝ˜ํ…์ธ  ์ œ๊ณต(์บ์‹œ๊ฐ€ ์—†๋Š” ๊ฒฝ์šฐ)
@app.get("/api/pdf-content")
async def get_pdf_content(path: str, background_tasks: BackgroundTasks):
try:
# ์บ์‹ฑ ์ƒํƒœ ํ™•์ธ
pdf_file = pathlib.Path(path)
if not pdf_file.exists():
return JSONResponse(content={"error": f"ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {path}"}, status_code=404)
pdf_name = pdf_file.stem
# ์บ์‹œ๋œ ๊ฒฝ์šฐ ๋ฆฌ๋‹ค์ด๋ ‰ํŠธ
if pdf_name in pdf_cache and (pdf_cache[pdf_name].get("status") == "completed"
or (pdf_cache[pdf_name].get("status") == "processing"
and pdf_cache[pdf_name].get("progress", 0) > 10)):
return JSONResponse(content={"redirect": f"/api/cached-pdf?path={path}"})
# ํŒŒ์ผ ์ฝ๊ธฐ
with open(path, "rb") as pdf_file:
content = pdf_file.read()
# ํŒŒ์ผ๋ช… ์ฒ˜๋ฆฌ
import urllib.parse
filename = pdf_file.name
encoded_filename = urllib.parse.quote(filename)
# ๋ฐฑ๊ทธ๋ผ์šด๋“œ์—์„œ ์บ์‹ฑ ์‹œ์ž‘
background_tasks.add_task(cache_pdf, path)
# ์‘๋‹ต ํ—ค๋” ์„ค์ •
headers = {
"Content-Type": "application/pdf",
"Content-Disposition": f"inline; filename=\"{encoded_filename}\"; filename*=UTF-8''{encoded_filename}"
}
return Response(content=content, media_type="application/pdf", headers=headers)
except Exception as e:
import traceback
error_details = traceback.format_exc()
logger.error(f"PDF ์ฝ˜ํ…์ธ  ๋กœ๋“œ ์˜ค๋ฅ˜: {str(e)}\n{error_details}")
return JSONResponse(content={"error": str(e)}, status_code=500)
# PDF ์—…๋กœ๋“œ ์—”๋“œํฌ์ธํŠธ - ์˜๊ตฌ ์ €์žฅ์†Œ์— ์ €์žฅ ๋ฐ ๋ฉ”์ธ ํ™”๋ฉด์— ์ž๋™ ํ‘œ์‹œ
@app.post("/api/upload-pdf")
async def upload_pdf(file: UploadFile = File(...)):
try:
# ํŒŒ์ผ ์ด๋ฆ„ ํ™•์ธ
if not file.filename.lower().endswith('.pdf'):
return JSONResponse(
content={"success": False, "message": "PDF ํŒŒ์ผ๋งŒ ์—…๋กœ๋“œ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค"},
status_code=400
)
# ์˜๊ตฌ ์ €์žฅ์†Œ์— ํŒŒ์ผ ์ €์žฅ
file_path = PERMANENT_PDF_DIR / file.filename
# ํŒŒ์ผ ์ฝ๊ธฐ ๋ฐ ์ €์žฅ
content = await file.read()
with open(file_path, "wb") as buffer:
buffer.write(content)
# ๋ฉ”์ธ ๋””๋ ‰ํ† ๋ฆฌ์—๋„ ์ž๋™์œผ๋กœ ๋ณต์‚ฌ (์ž๋™ ํ‘œ์‹œ)
with open(PDF_DIR / file.filename, "wb") as buffer:
buffer.write(content)
# PDF ID ์ƒ์„ฑ ๋ฐ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ์ €์žฅ
pdf_id = generate_pdf_id(file.filename)
pdf_metadata[pdf_id] = str(file_path)
save_pdf_metadata()
# ๋ฐฑ๊ทธ๋ผ์šด๋“œ์—์„œ ์บ์‹ฑ ์‹œ์ž‘
asyncio.create_task(cache_pdf(str(file_path)))
return JSONResponse(
content={
"success": True,
"path": str(file_path),
"name": file_path.stem,
"id": pdf_id,
"viewUrl": f"/view/{pdf_id}"
},
status_code=200
)
except Exception as e:
import traceback
error_details = traceback.format_exc()
logger.error(f"PDF ์—…๋กœ๋“œ ์˜ค๋ฅ˜: {str(e)}\n{error_details}")
return JSONResponse(
content={"success": False, "message": str(e)},
status_code=500
)
# ํ…์ŠคํŠธ ํŒŒ์ผ์„ PDF๋กœ ๋ณ€ํ™˜ํ•˜๋Š” ํ•จ์ˆ˜
async def convert_text_to_pdf(text_content: str, title: str) -> str:
try:
# ์ œ๋ชฉ์—์„œ ์œ ํšจํ•œ ํŒŒ์ผ๋ช… ์ƒ์„ฑ
import re
safe_title = re.sub(r'[^\w\-_\. ]', '_', title)
if not safe_title:
safe_title = "aibook"
# ํƒ€์ž„์Šคํƒฌํ”„ ์ถ”๊ฐ€๋กœ ๊ณ ์œ ํ•œ ํŒŒ์ผ๋ช… ์ƒ์„ฑ
timestamp = int(time.time())
filename = f"{safe_title}_{timestamp}.pdf"
# ์˜๊ตฌ ์ €์žฅ์†Œ์˜ ํŒŒ์ผ ๊ฒฝ๋กœ
file_path = PERMANENT_PDF_DIR / filename
# ํ•œ๊ธ€ ํฐํŠธ ๋“ฑ๋ก - ์—…๋กœ๋“œ๋œ MaruBuri-SemiBold.ttf ์‚ฌ์šฉ
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
# ํฐํŠธ ๊ฒฝ๋กœ ์„ค์ • (app.py์™€ ๊ฐ™์€ ๋””๋ ‰ํ† ๋ฆฌ์— ์žˆ๋Š” ํฐํŠธ ์‚ฌ์šฉ)
font_path = BASE / "MaruBuri-SemiBold.ttf"
# ํฐํŠธ ๋“ฑ๋ก
font_name = "MaruBuri"
if font_path.exists():
pdfmetrics.registerFont(TTFont(font_name, str(font_path)))
logger.info(f"ํ•œ๊ธ€ ํฐํŠธ ๋“ฑ๋ก ์„ฑ๊ณต: {font_path}")
else:
font_name = "Helvetica"
logger.warning(f"ํ•œ๊ธ€ ํฐํŠธ ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {font_path}. ๊ธฐ๋ณธ ํฐํŠธ๋ฅผ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.")
# ์ž„์‹œ PDF ํŒŒ์ผ ์ƒ์„ฑ
pdf_buffer = io.BytesIO()
# ํ•œ๊ธ€ ์ง€์›์„ ์œ„ํ•œ ์Šคํƒ€์ผ ์„ค์ •
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.enums import TA_CENTER, TA_LEFT
doc = SimpleDocTemplate(pdf_buffer, pagesize=letter, encoding='utf-8')
# ์‚ฌ์šฉ์ž ์ •์˜ ์Šคํƒ€์ผ ์ƒ์„ฑ
title_style = ParagraphStyle(
name='CustomTitle',
fontName=font_name,
fontSize=18,
leading=22,
alignment=TA_CENTER,
spaceAfter=20
)
normal_style = ParagraphStyle(
name='CustomNormal',
fontName=font_name,
fontSize=12,
leading=15,
alignment=TA_LEFT,
spaceBefore=6,
spaceAfter=6
)
# ๋‚ด์šฉ์„ ๋ฌธ๋‹จ์œผ๋กœ ๋ถ„ํ• 
content = []
# ์ œ๋ชฉ ์ถ”๊ฐ€
content.append(Paragraph(title, title_style))
content.append(Spacer(1, 20))
# ํ…์ŠคํŠธ๋ฅผ ๋‹จ๋ฝ์œผ๋กœ ๋ถ„๋ฆฌํ•˜์—ฌ ์ถ”๊ฐ€
paragraphs = text_content.split('\n\n')
for para in paragraphs:
if para.strip():
# XML ํŠน์ˆ˜๋ฌธ์ž ์ด์Šค์ผ€์ดํ”„ ์ฒ˜๋ฆฌ
from xml.sax.saxutils import escape
safe_para = escape(para.replace('\n', '<br/>'))
p = Paragraph(safe_para, normal_style)
content.append(p)
content.append(Spacer(1, 10))
# PDF ์ƒ์„ฑ
doc.build(content)
# ํŒŒ์ผ๋กœ ์ €์žฅ
with open(file_path, 'wb') as f:
f.write(pdf_buffer.getvalue())
# ๋ฉ”์ธ ๋””๋ ‰ํ† ๋ฆฌ์—๋„ ๋ณต์‚ฌ
with open(PDF_DIR / filename, 'wb') as f:
f.write(pdf_buffer.getvalue())
# PDF ID ์ƒ์„ฑ ๋ฐ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ์ €์žฅ
pdf_id = generate_pdf_id(filename)
pdf_metadata[pdf_id] = str(file_path)
save_pdf_metadata()
# ๋ฐฑ๊ทธ๋ผ์šด๋“œ์—์„œ ์บ์‹ฑ ์‹œ์ž‘
asyncio.create_task(cache_pdf(str(file_path)))
return {
"path": str(file_path),
"filename": filename,
"id": pdf_id
}
except Exception as e:
logger.error(f"ํ…์ŠคํŠธ๋ฅผ PDF๋กœ ๋ณ€ํ™˜ ์ค‘ ์˜ค๋ฅ˜: {e}")
raise e
# AI๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ํ…์ŠคํŠธ๋ฅผ ๋” ๊ตฌ์กฐํ™”๋œ ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜ (OpenAI ์ œ๊ฑฐ ๋ฒ„์ „)
async def enhance_text_with_ai(text_content: str, title: str) -> str:
# ์›๋ณธ ํ…์ŠคํŠธ ๊ทธ๋Œ€๋กœ ๋ฐ˜ํ™˜ (AI ํ–ฅ์ƒ ๊ธฐ๋Šฅ ๋น„ํ™œ์„ฑํ™”)
return text_content
# ํ…์ŠคํŠธ ํŒŒ์ผ์„ PDF๋กœ ๋ณ€ํ™˜ํ•˜๋Š” ์—”๋“œํฌ์ธํŠธ
@app.post("/api/text-to-pdf")
async def text_to_pdf(file: UploadFile = File(...)):
try:
# ์ง€์›ํ•˜๋Š” ํŒŒ์ผ ํ˜•์‹ ํ™•์ธ
filename = file.filename.lower()
if not (filename.endswith('.txt') or filename.endswith('.docx') or filename.endswith('.doc')):
return JSONResponse(
content={"success": False, "message": "์ง€์›ํ•˜๋Š” ํŒŒ์ผ ํ˜•์‹์€ .txt, .docx, .doc์ž…๋‹ˆ๋‹ค."},
status_code=400
)
# ํŒŒ์ผ ๋‚ด์šฉ ์ฝ๊ธฐ
content = await file.read()
# ํŒŒ์ผ ํƒ€์ž…์— ๋”ฐ๋ผ ํ…์ŠคํŠธ ์ถ”์ถœ
if filename.endswith('.txt'):
# ์ธ์ฝ”๋”ฉ ์ž๋™ ๊ฐ์ง€ ์‹œ๋„
encodings = ['utf-8', 'euc-kr', 'cp949', 'latin1']
text_content = None
for encoding in encodings:
try:
text_content = content.decode(encoding, errors='strict')
logger.info(f"ํ…์ŠคํŠธ ํŒŒ์ผ ์ธ์ฝ”๋”ฉ ๊ฐ์ง€: {encoding}")
break
except UnicodeDecodeError:
continue
if text_content is None:
# ๋ชจ๋“  ์ธ์ฝ”๋”ฉ ์‹œ๋„ ์‹คํŒจ ์‹œ ๊ธฐ๋ณธ์ ์œผ๋กœ UTF-8๋กœ ์‹œ๋„ํ•˜๊ณ  ์˜ค๋ฅ˜๋Š” ๋Œ€์ฒด ๋ฌธ์ž๋กœ ์ฒ˜๋ฆฌ
text_content = content.decode('utf-8', errors='replace')
logger.warning("ํ…์ŠคํŠธ ํŒŒ์ผ ์ธ์ฝ”๋”ฉ์„ ๊ฐ์ง€ํ•  ์ˆ˜ ์—†์–ด UTF-8์œผ๋กœ ์‹œ๋„ํ•ฉ๋‹ˆ๋‹ค.")
elif filename.endswith('.docx') or filename.endswith('.doc'):
# ์ž„์‹œ ํŒŒ์ผ๋กœ ์ €์žฅ
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(filename)[1]) as temp_file:
temp_file.write(content)
temp_path = temp_file.name
try:
# docx2txt๋กœ ํ…์ŠคํŠธ ์ถ”์ถœ
text_content = docx2txt.process(temp_path)
finally:
# ์ž„์‹œ ํŒŒ์ผ ์‚ญ์ œ
os.unlink(temp_path)
# ํŒŒ์ผ๋ช…์—์„œ ์ œ๋ชฉ ์ถ”์ถœ (ํ™•์žฅ์ž ์ œ์™ธ)
title = os.path.splitext(filename)[0]
# AI๋กœ ํ…์ŠคํŠธ ๋‚ด์šฉ ํ–ฅ์ƒ
enhanced_text = await enhance_text_with_ai(text_content, title)
# ํ…์ŠคํŠธ๋ฅผ PDF๋กœ ๋ณ€ํ™˜
pdf_info = await convert_text_to_pdf(enhanced_text, title)
return JSONResponse(
content={
"success": True,
"path": pdf_info["path"],
"name": os.path.splitext(pdf_info["filename"])[0],
"id": pdf_info["id"],
"viewUrl": f"/view/{pdf_info['id']}"
},
status_code=200
)
except Exception as e:
import traceback
error_details = traceback.format_exc()
logger.error(f"ํ…์ŠคํŠธ๋ฅผ PDF๋กœ ๋ณ€ํ™˜ ์ค‘ ์˜ค๋ฅ˜: {str(e)}\n{error_details}")
return JSONResponse(
content={"success": False, "message": str(e)},
status_code=500
)
# ๊ด€๋ฆฌ์ž ์ธ์ฆ ์—”๋“œํฌ์ธํŠธ
@app.post("/api/admin-login")
async def admin_login(password: str = Form(...)):
if password == ADMIN_PASSWORD:
return {"success": True}
return {"success": False, "message": "์ธ์ฆ ์‹คํŒจ"}
# ๊ด€๋ฆฌ์ž์šฉ PDF ์‚ญ์ œ ์—”๋“œํฌ์ธํŠธ
@app.delete("/api/admin/delete-pdf")
async def delete_pdf(path: str):
try:
pdf_file = pathlib.Path(path)
if not pdf_file.exists():
return {"success": False, "message": "ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค"}
# PDF ํŒŒ์ผ๋ช… ๊ฐ€์ ธ์˜ค๊ธฐ
filename = pdf_file.name
# PDF ํŒŒ์ผ ์‚ญ์ œ (์˜๊ตฌ ์ €์žฅ์†Œ์—์„œ)
pdf_file.unlink()
# ๋ฉ”์ธ ๋””๋ ‰ํ† ๋ฆฌ์—์„œ๋„ ๋™์ผํ•œ ํŒŒ์ผ์ด ์žˆ์œผ๋ฉด ์‚ญ์ œ (๋ฒ„๊ทธ ์ˆ˜์ •)
main_file_path = PDF_DIR / filename
if main_file_path.exists():
main_file_path.unlink()
# ๊ด€๋ จ ์บ์‹œ ํŒŒ์ผ ์‚ญ์ œ
pdf_name = pdf_file.stem
cache_path = get_cache_path(pdf_name)
if cache_path.exists():
cache_path.unlink()
# ์บ์‹œ ๋ฉ”๋ชจ๋ฆฌ์—์„œ๋„ ์ œ๊ฑฐ
if pdf_name in pdf_cache:
del pdf_cache[pdf_name]
# ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ์—์„œ ํ•ด๋‹น ํŒŒ์ผ ID ์ œ๊ฑฐ
to_remove = []
for pid, fpath in pdf_metadata.items():
if os.path.basename(fpath) == filename:
to_remove.append(pid)
for pid in to_remove:
del pdf_metadata[pid]
save_pdf_metadata()
return {"success": True}
except Exception as e:
logger.error(f"PDF ์‚ญ์ œ ์˜ค๋ฅ˜: {str(e)}")
return {"success": False, "message": str(e)}
# PDF๋ฅผ ๋ฉ”์ธ ๋””๋ ‰ํ† ๋ฆฌ์— ํ‘œ์‹œ ์„ค์ •
@app.post("/api/admin/feature-pdf")
async def feature_pdf(path: str):
try:
pdf_file = pathlib.Path(path)
if not pdf_file.exists():
return {"success": False, "message": "ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค"}
# ๋ฉ”์ธ ๋””๋ ‰ํ† ๋ฆฌ์— ๋ณต์‚ฌ
target_path = PDF_DIR / pdf_file.name
shutil.copy2(pdf_file, target_path)
return {"success": True}
except Exception as e:
logger.error(f"PDF ํ‘œ์‹œ ์„ค์ • ์˜ค๋ฅ˜: {str(e)}")
return {"success": False, "message": str(e)}
# PDF๋ฅผ ๋ฉ”์ธ ๋””๋ ‰ํ† ๋ฆฌ์—์„œ ์ œ๊ฑฐ (์˜๊ตฌ ์ €์žฅ์†Œ์—์„œ๋Š” ์œ ์ง€)
@app.delete("/api/admin/unfeature-pdf")
async def unfeature_pdf(path: str):
try:
pdf_name = pathlib.Path(path).name
target_path = PDF_DIR / pdf_name
if target_path.exists():
target_path.unlink()
return {"success": True}
except Exception as e:
logger.error(f"PDF ํ‘œ์‹œ ํ•ด์ œ ์˜ค๋ฅ˜: {str(e)}")
return {"success": False, "message": str(e)}
# ์ง์ ‘ PDF ๋ทฐ์–ด URL ์ ‘๊ทผ์šฉ ๋ผ์šฐํŠธ
@app.get("/view/{pdf_id}")
async def view_pdf_by_id(pdf_id: str):
# PDF ID ์œ ํšจํ•œ์ง€ ํ™•์ธ
pdf_path = get_pdf_path_by_id(pdf_id)
if not pdf_path:
# ์ผ๋‹จ ๋ชจ๋“  PDF ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ๋ฅผ ๋‹ค์‹œ ๋กœ๋“œํ•˜๊ณ  ์žฌ์‹œ๋„
load_pdf_metadata()
pdf_path = get_pdf_path_by_id(pdf_id)
if not pdf_path:
# ๋ชจ๋“  PDF ํŒŒ์ผ์„ ์ง์ ‘ ์Šค์บ”ํ•˜์—ฌ ์œ ์‚ฌํ•œ ์ด๋ฆ„ ์ฐพ๊ธฐ
for file_path in get_pdf_files() + get_permanent_pdf_files():
name_part = pdf_id.split('_')[0] if '_' in pdf_id else pdf_id
if file_path.stem.startswith(name_part):
pdf_metadata[pdf_id] = str(file_path)
save_pdf_metadata()
pdf_path = str(file_path)
break
if not pdf_path:
return HTMLResponse(
content=f"<html><body><h1>PDF๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค</h1><p>ID: {pdf_id}</p><a href='/'>ํ™ˆ์œผ๋กœ ๋Œ์•„๊ฐ€๊ธฐ</a></body></html>",
status_code=404
)
# ๋ฉ”์ธ ํŽ˜์ด์ง€๋กœ ๋ฆฌ๋‹ค์ด๋ ‰ํŠธํ•˜๋˜, PDF ID ํŒŒ๋ผ๋ฏธํ„ฐ ์ถ”๊ฐ€
return get_html_content(pdf_id=pdf_id)
# HTML ํŒŒ์ผ ์ฝ๊ธฐ ํ•จ์ˆ˜
def get_html_content(pdf_id: str = None):
html_path = BASE / "flipbook_template.html"
content = ""
if html_path.exists():
with open(html_path, "r", encoding="utf-8") as f:
content = f.read()
else:
content = HTML # ๊ธฐ๋ณธ HTML ์‚ฌ์šฉ
# PDF ID๊ฐ€ ์ œ๊ณต๋œ ๊ฒฝ์šฐ, ์ž๋™ ๋กœ๋“œ ์Šคํฌ๋ฆฝํŠธ ์ถ”๊ฐ€
if pdf_id:
auto_load_script = f"""
<script>
// ํŽ˜์ด์ง€ ๋กœ๋“œ ์‹œ ์ž๋™์œผ๋กœ ํ•ด๋‹น PDF ์—ด๊ธฐ
document.addEventListener('DOMContentLoaded', async function() {{
try {{
// PDF ์ •๋ณด ๊ฐ€์ ธ์˜ค๊ธฐ
const response = await fetch('/api/pdf-info-by-id/{pdf_id}');
const pdfInfo = await response.json();
if (pdfInfo.exists && pdfInfo.path) {{
// ์•ฝ๊ฐ„์˜ ์ง€์—ฐ ํ›„ PDF ๋ทฐ์–ด ์—ด๊ธฐ (UI๊ฐ€ ์ค€๋น„๋œ ํ›„)
setTimeout(() => {{
openPdfById('{pdf_id}', pdfInfo.path, pdfInfo.cached);
}}, 500);
}} else {{
showError("์š”์ฒญํ•œ PDF๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.");
}}
}} catch (e) {{
console.error("์ž๋™ PDF ๋กœ๋“œ ์˜ค๋ฅ˜:", e);
}}
}});
</script>
"""
# body ์ข…๋ฃŒ ํƒœ๊ทธ ์ „์— ์Šคํฌ๋ฆฝํŠธ ์‚ฝ์ž…
content = content.replace("</body>", auto_load_script + "</body>")
return HTMLResponse(content=content)
@app.get("/", response_class=HTMLResponse)
async def root(request: Request, pdf_id: Optional[str] = Query(None)):
# PDF ID๊ฐ€ ์ฟผ๋ฆฌ ํŒŒ๋ผ๋ฏธํ„ฐ๋กœ ์ œ๊ณต๋œ ๊ฒฝ์šฐ /view/{pdf_id}๋กœ ๋ฆฌ๋‹ค์ด๋ ‰ํŠธ
if pdf_id:
return RedirectResponse(url=f"/view/{pdf_id}")
return get_html_content()
# HTML ๋ฌธ์ž์—ด (AI ๋ฒ„ํŠผ ๋ฐ ์ฑ—๋ด‡ UI ์ถ”๊ฐ€)
# HTML ๋ฌธ์ž์—ด (AI ๋ฒ„ํŠผ ๋ฐ ์ฑ—๋ด‡ UI ์ถ”๊ฐ€)
import os
# Hugging Face Space์˜ secret์—์„œ HTML ํ…œํ”Œ๋ฆฟ ๋กœ๋“œ
HTML = os.getenv("HTML_TEMPLATE", "")
# HTML์ด ๋น„์–ด์žˆ์„ ๊ฒฝ์šฐ ๊ธฐ๋ณธ HTML ์‚ฌ์šฉ (fallback)
if not HTML:
logger.warning("HTML_TEMPLATE secret์ด ์„ค์ •๋˜์–ด ์žˆ์ง€ ์•Š์Šต๋‹ˆ๋‹ค. ๊ธฐ๋ณธ HTML์„ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.")
HTML = """
<!doctype html>
<html lang="ko">
<head>
<meta charset="utf-8">
<title>FlipBook Space</title>
<style>
body { font-family: Arial, sans-serif; text-align: center; padding: 50px; }
.error { color: red; }
</style>
</head>
<body>
<h1>HTML ํ…œํ”Œ๋ฆฟ์„ ๋ถˆ๋Ÿฌ์˜ฌ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค</h1>
<p class="error">HTML_TEMPLATE secret์ด ์„ค์ •๋˜์–ด ์žˆ์ง€ ์•Š์Šต๋‹ˆ๋‹ค.</p>
<p>Hugging Face Space์˜ secret ์˜์—ญ์— HTML_TEMPLATE์„ ์„ค์ •ํ•ด์ฃผ์„ธ์š”.</p>
</body>
</html>
"""
if __name__ == "__main__":
uvicorn.run("app:app", host="0.0.0.0", port=int(os.getenv("PORT", 7860)))