pdf-summarizer-app / pdfsum.py
3a05chatgpt's picture
Upload 8 files
50a43ab verified
raw
history blame
607 Bytes
import PyPDF2
from textsumm import 摘要
def pdf抽取文字(pdf_file):
# pdf_file 來自 st.file_uploader,為 BytesIO 物件
pdf_reader = PyPDF2.PdfReader(pdf_file)
all_text = ""
for page in pdf_reader.pages:
page_text = page.extract_text() or ""
all_text += page_text.strip() + "\n"
return all_text
def pdf摘要(pdf_file):
內容 = pdf抽取文字(pdf_file)
if not 內容.strip():
return "⚠️ PDF 無可讀文字或為掃描檔,請上傳可解析之 PDF"
# 可依需求切分多頁逐一摘要
return 摘要(內容)