Spaces:

3a05chatgpt
/

pdf-summarizer-app

Sleeping

App Files Files Community

3a05chatgpt commited on 5 days ago

Commit

b11b466

verified ·

1 Parent(s): 40774bb

Update app.py

Browse files

Files changed (1) hide show

app.py +219 -50

app.py CHANGED Viewed

@@ -1,76 +1,245 @@
 import openai
 import gradio as gr
 import fitz  # PyMuPDF
-from openai import OpenAI  # ✅ 新增正確 Client
 api_key = ""
-selected_model = ""
 summary_text = ""
-client = None  # ✅ 全域 client 物件
 def set_api_key(user_api_key):
     global api_key, client
-    api_key = user_api_key
-    client = OpenAI(api_key=api_key)  # ✅ 正確初始化新版 Client
-    return "✅ API Key 已設定"
 def set_model(model_name):
     global selected_model
     selected_model = model_name
-    return f"✅ 模型已選：{model_name}"
 def extract_pdf_text(file_path):
-    doc = fitz.open(file_path)
-    text = ""
-    for page in doc:
-        text += page.get_text()
-    return text
 def generate_summary(pdf_file):
-    global summary_text
-    pdf_text = extract_pdf_text(pdf_file)
-    if not pdf_text.strip():
-        return "⚠️ 無法解析 PDF 文字，可能為純圖片 PDF。"
-    response = client.chat.completions.create(
-        model=selected_model,
-        messages=[
-            {"role": "system", "content": "請將以下 PDF 內容整理為條列式摘要重點。"},
-            {"role": "user", "content": pdf_text[:4000]}
-        ]
-    )
-    summary_text = response.choices[0].message.content
-    return summary_text
 def ask_question(user_question):
-    response = client.chat.completions.create(
-        model=selected_model,
-        messages=[
-            {"role": "system", "content": f"根據以下 PDF 摘要內容回答問題：\n{summary_text}"},
-            {"role": "user", "content": user_question}
-        ]
-    )
-    return response.choices[0].message.content
-with gr.Blocks() as demo:
-    gr.Markdown("# 📄 PDF 摘要 & 問答助手 (Hugging Face 版)")
-    api_key_input = gr.Textbox(label="輸入 OpenAI API Key", type="password")
-    api_key_status = gr.Textbox(label="狀態", interactive=False)
-    api_key_input.submit(set_api_key, inputs=api_key_input, outputs=api_key_status)
-    model_choice = gr.Radio(["gpt-3.5-turbo", "gpt-4", "gpt-4o"], label="選擇模型")
-    model_status = gr.Textbox(label="模型狀態", interactive=False)
-    model_choice.change(set_model, inputs=model_choice, outputs=model_status)
-    pdf_upload = gr.File(label="上傳 PDF")
-    summary_output = gr.Textbox(label="PDF 摘要", lines=10)
-    summary_btn = gr.Button("生成摘要")
     summary_btn.click(generate_summary, inputs=pdf_upload, outputs=summary_output)
-    question_input = gr.Textbox(label="請輸入您的問題")
-    answer_output = gr.Textbox(label="AI 回答", lines=5)
-    question_btn = gr.Button("送出問題")
     question_btn.click(ask_question, inputs=question_input, outputs=answer_output)
-demo.launch()

 import openai
 import gradio as gr
 import fitz  # PyMuPDF
+from openai import OpenAI
+import os
+import tempfile
+import traceback
+# 全域變數
 api_key = ""
+selected_model = "gpt-4"
 summary_text = ""
+client = None
+pdf_text = ""
 def set_api_key(user_api_key):
+    """設定 OpenAI API Key 並初始化客戶端"""
     global api_key, client
+    try:
+        api_key = user_api_key.strip()
+        if not api_key:
+            return "❌ API Key 不能為空"
+        client = OpenAI(api_key=api_key)
+        # 測試 API Key 是否有效
+        test_response = client.chat.completions.create(
+            model="gpt-4",
+            messages=[{"role": "user", "content": "你好"}],
+            max_tokens=5
+        )
+        return "✅ API Key 已設定並驗證成功"
+    except Exception as e:
+        return f"❌ API Key 設定失敗: {str(e)}"
 def set_model(model_name):
+    """設定選擇的模型"""
     global selected_model
     selected_model = model_name
+    return f"✅ 模型已選擇：{model_name}"
 def extract_pdf_text(file_path):
+    """從 PDF 文件中提取文字"""
+    try:
+        doc = fitz.open(file_path)
+        text = ""
+        for page_num, page in enumerate(doc):
+            page_text = page.get_text()
+            if page_text.strip():  # 只添加非空白頁面
+                text += f"\n--- 第 {page_num + 1} 頁 ---\n"
+                text += page_text
+        doc.close()
+        return text
+    except Exception as e:
+        return f"❌ PDF 解析錯誤: {str(e)}"
 def generate_summary(pdf_file):
+    """從 PDF 內容生成摘要"""
+    global summary_text, pdf_text
+    if not client:
+        return "❌ 請先設定 OpenAI API Key"
+    if not pdf_file:
+        return "❌ 請先上傳 PDF 文件"
+    try:
+        # 從 PDF 提取文字
+        pdf_text = extract_pdf_text(pdf_file.name)
+        if not pdf_text.strip():
+            return "⚠️ 無法解析 PDF 文字，可能為純圖片 PDF 或空白文件。"
+        # 檢查文字長度，必要時截斷
+        max_chars = 8000  # 為系統提示留出空間
+        if len(pdf_text) > max_chars:
+            pdf_text_truncated = pdf_text[:max_chars] + "\n\n[文本已截斷，僅顯示前 8000 字符]"
+        else:
+            pdf_text_truncated = pdf_text
+        # 生成摘要
+        response = client.chat.completions.create(
+            model=selected_model,
+            messages=[
+                {
+                    "role": "system",
+                    "content": """你是一個專業的文檔摘要助手。請將以下 PDF 內容整理為結構化的摘要：
+1. 首先提供一個簡短的總體概述
+2. 然後按照重要性列出主要重點（使用項目符號）
+3. 如果有數據或統計信息，請特別標注
+4. 如果有結論或建議，請單獨列出
+請用繁體中文回答，保持專業且易於理解的語調。"""
+                },
+                {"role": "user", "content": pdf_text_truncated}
+            ],
+            temperature=0.3
+        )
+        summary_text = response.choices[0].message.content
+        return summary_text
+    except Exception as e:
+        error_msg = f"❌ 摘要生成失敗: {str(e)}"
+        print(f"錯誤詳情: {traceback.format_exc()}")
+        return error_msg
 def ask_question(user_question):
+    """基於 PDF 內容回答問題"""
+    if not client:
+        return "❌ 請先設定 OpenAI API Key"
+    if not summary_text and not pdf_text:
+        return "❌ 請先生成 PDF 摘要"
+    if not user_question.strip():
+        return "❌ 請輸入問題"
+    try:
+        # 使用��要和原始文本來提供更好的上下文
+        context = f"PDF 摘要:\n{summary_text}\n\n原始內容（部分）:\n{pdf_text[:2000]}"
+        response = client.chat.completions.create(
+            model=selected_model,
+            messages=[
+                {
+                    "role": "system",
+                    "content": f"""你是一個專業的文檔問答助手。請基於提供的 PDF 內容回答用戶問題。
+規則：
+1. 只根據提供的文檔內容回答
+2. 如果文檔中沒有相關信息，請明確說明
+3. 引用具體的文檔內容來支持你的回答
+4. 用繁體中文回答
+5. 保持客觀和準確
+文檔內容：
+{context}"""
+                },
+                {"role": "user", "content": user_question}
+            ],
+            temperature=0.2
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        error_msg = f"❌ 問答生成失敗: {str(e)}"
+        print(f"錯誤詳情: {traceback.format_exc()}")
+        return error_msg
+def clear_all():
+    """清除所有資料"""
+    global summary_text, pdf_text
+    summary_text = ""
+    pdf_text = ""
+    return "", "", ""
+# 創建 Gradio 介面
+with gr.Blocks(theme=gr.themes.Soft(), title="PDF 摘要助手") as demo:
+    gr.Markdown("""
+    # 📄 PDF 摘要 & 問答助手
+    這個工具可以幫助您：
+    - 📋 自動生成 PDF 文檔摘要
+    - 🤖 基於文檔內容回答問題
+    - 💡 快速理解長篇文檔的核心內容
+    """)
+    with gr.Tab("🔧 設定"):
+        with gr.Row():
+            with gr.Column():
+                api_key_input = gr.Textbox(
+                    label="🔑 輸入 OpenAI API Key",
+                    type="password",
+                    placeholder="請輸入您的 OpenAI API Key"
+                )
+                api_key_status = gr.Textbox(
+                    label="API 狀態",
+                    interactive=False,
+                    value="等待設定 API Key..."
+                )
+            with gr.Column():
+                model_choice = gr.Radio(
+                    ["gpt-4", "gpt-4.1", "gpt-4.5"],
+                    label="🤖 選擇模型",
+                    value="gpt-4"
+                )
+                model_status = gr.Textbox(
+                    label="模型狀態",
+                    interactive=False,
+                    value="✅ 模型已選擇：gpt-4"
+                )
+    with gr.Tab("📄 PDF 處理"):
+        with gr.Row():
+            with gr.Column():
+                pdf_upload = gr.File(
+                    label="📁 上傳 PDF 文件",
+                    file_types=[".pdf"]
+                )
+                with gr.Row():
+                    summary_btn = gr.Button("🔄 生成摘要", variant="primary")
+                    clear_btn = gr.Button("🗑️ 清除資料", variant="secondary")
+            with gr.Column():
+                summary_output = gr.Textbox(
+                    label="📋 PDF 摘要",
+                    lines=15,
+                    placeholder="上傳 PDF 文件並點擊 '生成摘要' 按鈕"
+                )
+    with gr.Tab("❓ 問答"):
+        with gr.Row():
+            with gr.Column():
+                question_input = gr.Textbox(
+                    label="💬 請輸入您的問題",
+                    placeholder="例如：這份文件的主要結論是什麼？"
+                )
+                question_btn = gr.Button("📤 送出問題", variant="primary")
+            with gr.Column():
+                answer_output = gr.Textbox(
+                    label="🤖 AI 回答",
+                    lines=10,
+                    placeholder="AI 回答將顯示在這裡"
+                )
+    # 事件處理器
+    api_key_input.submit(set_api_key, inputs=api_key_input, outputs=api_key_status)
+    model_choice.change(set_model, inputs=model_choice, outputs=model_status)
     summary_btn.click(generate_summary, inputs=pdf_upload, outputs=summary_output)
     question_btn.click(ask_question, inputs=question_input, outputs=answer_output)
+    question_input.submit(ask_question, inputs=question_input, outputs=answer_output)
+    clear_btn.click(clear_all, outputs=[summary_output, question_input, answer_output])
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_api=False
+    )