Spaces:
Sleeping
Sleeping
import streamlit as st | |
import openai | |
from textsumm import 文字摘要 | |
from pdfsum import 提取_pdf文字, 分段, 摘要 | |
from pdfpass import 移除_pdf密碼 | |
from papersearch import 抓取論文, 篩選論文依年份 | |
from io import BytesIO | |
from datetime import datetime | |
from pypdf import PdfReader, PdfWriter | |
# ---- 一定要在所有 st.xxx 指令之前 ---- | |
st.set_page_config(page_title="PDF 工具箱", page_icon="📄", layout="wide") | |
# ---- 側邊欄(API Key 與模型選擇)---- | |
st.sidebar.title("📄 PDF 工具箱") | |
api_key = st.sidebar.text_input("請輸入 OpenAI API 金鑰", type="password", placeholder="sk-...") | |
selected_model = st.sidebar.radio("選擇 GPT 模型", ["gpt-4", "gpt-4.0", "gpt-4.1", "gpt-4.5"], index=0) | |
if api_key: | |
openai.api_key = api_key | |
else: | |
st.sidebar.warning("請輸入你的 OpenAI API Key(sk- 或 sk-proj- 開頭)") | |
# ---- 分頁功能 ---- | |
page = st.sidebar.radio( | |
"選擇功能", | |
[ | |
"文字摘要", | |
"PDF 摘要", | |
"PDF 密碼移除", | |
"論文搜尋", | |
"PDF 合併", | |
"PDF 拆頁", | |
"PDF 轉純文字" | |
] | |
) | |
# 文字摘要 | |
if page == "文字摘要": | |
st.title("📝 文字摘要") | |
user_input = st.text_area("請輸入要摘要的文字") | |
if st.button("生成摘要"): | |
if not api_key: | |
st.error("請先輸入 OpenAI API 金鑰!") | |
else: | |
結果 = 文字摘要(user_input) | |
st.subheader("摘要結果") | |
st.write(結果[0]["summary_text"]) | |
# PDF 摘要 | |
elif page == "PDF 摘要": | |
st.title("📜 PDF 摘要") | |
uploaded_file = st.file_uploader("上傳你的 PDF 檔案", type=["pdf"]) | |
if uploaded_file is not None and st.button("產生 PDF 摘要"): | |
pdf_text = 提取_pdf文字(uploaded_file) | |
段落們 = 分段(pdf_text) | |
全部摘要 = " ".join(摘要(段落們)) | |
st.subheader("摘要結果") | |
st.write(全部摘要) | |
# PDF 密碼移除 | |
elif page == "PDF 密碼移除": | |
st.title("🔑 PDF 密碼移除") | |
uploaded_file = st.file_uploader("選擇需要解鎖的 PDF 檔案", type=["pdf"]) | |
password = st.text_input("請輸入 PDF 密碼", type="password") | |
if uploaded_file and password and st.button("移除密碼"): | |
output = 移除_pdf密碼(uploaded_file, password) | |
if isinstance(output, BytesIO): | |
st.success("密碼移除成功!") | |
st.download_button("下載已解鎖的 PDF", data=output, file_name="unlocked_pdf.pdf", mime="application/pdf") | |
else: | |
st.error(f"錯誤:{output}") | |
# 論文搜尋 | |
elif page == "論文搜尋": | |
st.title("🔍 論文搜尋(arXiv)") | |
query = st.text_input("輸入主題或關鍵字", placeholder="例如:人工智慧、量子計算") | |
max_results = st.slider("結果數量", 1, 50, 10) | |
col1, col2 = st.columns(2) | |
with col1: | |
start_year = st.number_input("起始年份", min_value=1900, max_value=datetime.now().year, value=2000) | |
with col2: | |
end_year = st.number_input("結束年份", min_value=1900, max_value=datetime.now().year, value=datetime.now().year) | |
if st.button("搜尋論文"): | |
papers = 抓取論文(query, max_results) | |
篩選後 = 篩選論文依年份(papers, start_year, end_year) | |
if 篩選後: | |
for idx, 論文 in enumerate(篩選後, start=1): | |
st.write(f"### {idx}. {論文['標題']}") | |
st.write(f"**作者**: {', '.join(論文['作者'])}") | |
st.write(f"**發表時間**: {論文['發表時間']}") | |
st.write(f"[閱讀全文]({論文['連結']})") | |
st.write("---") | |
else: | |
st.warning("在所選年份範圍內沒有找到相關論文。") | |
# PDF 合併 | |
elif page == "PDF 合併": | |
st.title("📎 多檔 PDF 合併") | |
uploaded_files = st.file_uploader("上傳多個 PDF 檔案", type=["pdf"], accept_multiple_files=True) | |
if uploaded_files and st.button("合併 PDF"): | |
pdf_writer = PdfWriter() | |
for file in uploaded_files: | |
pdf_reader = PdfReader(file) | |
for page in pdf_reader.pages: | |
pdf_writer.add_page(page) | |
output = BytesIO() | |
pdf_writer.write(output) | |
output.seek(0) | |
st.download_button("下載合併後的 PDF", data=output, file_name="merged.pdf", mime="application/pdf") | |
# PDF 拆頁 | |
elif page == "PDF 拆頁": | |
st.title("✂️ PDF 拆頁") | |
uploaded_file = st.file_uploader("上傳一個 PDF", type=["pdf"]) | |
if uploaded_file: | |
pdf_reader = PdfReader(uploaded_file) | |
for i, page in enumerate(pdf_reader.pages): | |
pdf_writer = PdfWriter() | |
pdf_writer.add_page(page) | |
output = BytesIO() | |
pdf_writer.write(output) | |
output.seek(0) | |
st.download_button(f"下載第 {i+1} 頁", data=output, file_name=f"page_{i+1}.pdf", mime="application/pdf") | |
# PDF 轉純文字 | |
elif page == "PDF 轉純文字": | |
st.title("📜 PDF 轉純文字") | |
uploaded_file = st.file_uploader("上傳 PDF", type=["pdf"]) | |
if uploaded_file: | |
pdf_text = 提取_pdf文字(uploaded_file) | |
st.text_area("擷取內容", pdf_text, height=300) | |