Spaces:

3a05chatgpt
/

pdf-summarizer-app

Sleeping

App Files Files Community

3a05chatgpt commited on 5 days ago

Commit

e064173

verified ·

1 Parent(s): 94d38c2

Update app.py

Browse files

Files changed (1) hide show

app.py +183 -36

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import fitz
 from openai import OpenAI
 import traceback
 api_key = ""
 selected_model = "gpt-4"
 summary_text = ""
@@ -11,27 +12,41 @@ client = None
 pdf_text = ""
 def set_api_key(user_api_key):
     global api_key, client
     try:
         api_key = user_api_key.strip()
         if not api_key:
             return "❌ API Key 不能為空"
         client = OpenAI(api_key=api_key)
-        client.chat.completions.create(
-            model="gpt-4",
-            messages=[{"role": "user", "content": "你好"}],
             max_tokens=5
         )
-        return "✅ API Key 已設定並驗證成功"
     except Exception as e:
-        return f"❌ API Key 設定失敗: {str(e)}"
 def set_model(model_name):
     global selected_model
     selected_model = model_name
     return f"✅ 模型已選擇：{model_name}"
 def extract_pdf_text(file_path):
     try:
         doc = fitz.open(file_path)
         text = ""
@@ -45,86 +60,218 @@ def extract_pdf_text(file_path):
         return f"❌ PDF 解析錯誤: {str(e)}"
 def generate_summary(pdf_file):
     global summary_text, pdf_text
     if not client:
         return "❌ 請先設定 OpenAI API Key"
     if not pdf_file:
         return "❌ 請先上傳 PDF 文件"
     try:
         pdf_text = extract_pdf_text(pdf_file.name)
         if not pdf_text.strip():
             return "⚠️ 無法解析 PDF 文字，可能為純圖片 PDF 或空白文件。"
-        pdf_text_truncated = pdf_text[:8000]
         response = client.chat.completions.create(
             model=selected_model,
             messages=[
-                {"role": "system", "content": "請將以下 PDF 內容整理為條列式摘要，用繁體中文回答："},
                 {"role": "user", "content": pdf_text_truncated}
             ],
             temperature=0.3
         )
         summary_text = response.choices[0].message.content
         return summary_text
     except Exception as e:
-        print(traceback.format_exc())
         return f"❌ 摘要生成失敗: {str(e)}"
 def ask_question(user_question):
     if not client:
         return "❌ 請先設定 OpenAI API Key"
     if not summary_text and not pdf_text:
         return "❌ 請先生成 PDF 摘要"
     if not user_question.strip():
         return "❌ 請輸入問題"
     try:
         context = f"PDF 摘要:\n{summary_text}\n\n原始內容（部分）:\n{pdf_text[:2000]}"
         response = client.chat.completions.create(
             model=selected_model,
             messages=[
-                {"role": "system", "content": f"根據以下 PDF 內容回答問題，請用繁體中文回答：\n{context}"},
                 {"role": "user", "content": user_question}
             ],
             temperature=0.2
         )
         return response.choices[0].message.content
     except Exception as e:
-        print(traceback.format_exc())
         return f"❌ 問答生成失敗: {str(e)}"
 def clear_all():
     global summary_text, pdf_text
     summary_text = ""
     pdf_text = ""
     return "", "", ""
-with gr.Blocks(title="PDF 摘要助手") as demo:
-    gr.Markdown("## 📄 PDF 摘要 & 問答助手")
     with gr.Tab("🔧 設定"):
-        api_key_input = gr.Textbox(label="🔑 輸入 OpenAI API Key", type="password")
-        api_key_status = gr.Textbox(label="API 狀態", interactive=False, value="等待設定 API Key...")
-        api_key_btn = gr.Button("確認 API Key")
-        api_key_btn.click(set_api_key, inputs=api_key_input, outputs=api_key_status)
-        model_choice = gr.Radio(["gpt-4", "gpt-4.1", "gpt-4.5"], label="選擇 AI 模型", value="gpt-4")
-        model_status = gr.Textbox(label="模型狀態", interactive=False, value="✅ 已選擇：gpt-4")
-        model_choice.change(set_model, inputs=model_choice, outputs=model_status)
-    with gr.Tab("📄 摘要"):
-        pdf_upload = gr.File(label="上傳 PDF", file_types=[".pdf"])
-        summary_btn = gr.Button("生成摘要")
-        summary_output = gr.Textbox(label="PDF 摘要", lines=12)
-        summary_btn.click(generate_summary, inputs=pdf_upload, outputs=summary_output)
-    with gr.Tab("❓ 問答"):
-        question_input = gr.Textbox(label="請輸入問題", lines=2)
-        question_btn = gr.Button("送出問題")
-        answer_output = gr.Textbox(label="AI 回答", lines=8)
-        question_btn.click(ask_question, inputs=question_input, outputs=answer_output)
-        question_input.submit(ask_question, inputs=question_input, outputs=answer_output)
-    clear_btn = gr.Button("🗑️ 清除所有資料")
     clear_btn.click(clear_all, outputs=[summary_output, question_input, answer_output])
 if __name__ == "__main__":
-    demo.launch(show_error=True)

 from openai import OpenAI
 import traceback
+# 全域變數
 api_key = ""
 selected_model = "gpt-4"
 summary_text = ""
 pdf_text = ""
 def set_api_key(user_api_key):
+    """設定 OpenAI API Key 並初始化客戶端"""
     global api_key, client
     try:
         api_key = user_api_key.strip()
         if not api_key:
             return "❌ API Key 不能為空"
+        if not api_key.startswith('sk-'):
+            return "❌ API Key 格式錯誤，應該以 'sk-' 開頭"
         client = OpenAI(api_key=api_key)
+        # 測試 API Key 是否有效
+        test_response = client.chat.completions.create(
+            model="gpt-3.5-turbo",  # 使用較便宜的模型測試
+            messages=[{"role": "user", "content": "測試"}],
             max_tokens=5
         )
+        return "✅ API Key 已設定並驗證成功！"
     except Exception as e:
+        if "incorrect_api_key" in str(e).lower():
+            return "❌ API Key 無效，請檢查是否正確"
+        elif "quota" in str(e).lower():
+            return "⚠️ API Key 有效，但配額不足"
+        else:
+            return f"❌ API Key 設定失敗: {str(e)}"
 def set_model(model_name):
+    """設定選擇的模型"""
     global selected_model
     selected_model = model_name
     return f"✅ 模型已選擇：{model_name}"
 def extract_pdf_text(file_path):
+    """從 PDF 文件中提取文字"""
     try:
         doc = fitz.open(file_path)
         text = ""
         return f"❌ PDF 解析錯誤: {str(e)}"
 def generate_summary(pdf_file):
+    """從 PDF 內容生成摘要"""
     global summary_text, pdf_text
     if not client:
         return "❌ 請先設定 OpenAI API Key"
     if not pdf_file:
         return "❌ 請先上傳 PDF 文件"
     try:
+        # 從 PDF 提取文字
         pdf_text = extract_pdf_text(pdf_file.name)
         if not pdf_text.strip():
             return "⚠️ 無法解析 PDF 文字，可能為純圖片 PDF 或空白文件。"
+        # 截斷過長的文字
+        max_chars = 8000
+        if len(pdf_text) > max_chars:
+            pdf_text_truncated = pdf_text[:max_chars] + "\n\n[文本已截斷，僅顯示前 8000 字符]"
+        else:
+            pdf_text_truncated = pdf_text
+        # 生成摘要
         response = client.chat.completions.create(
             model=selected_model,
             messages=[
+                {
+                    "role": "system",
+                    "content": """你是一個專業的文檔摘要助手。請將以下 PDF 內容整理為結構化的摘要：
+1. 首先提供一個簡短的總體概述
+2. 然後按照重要性列出主要重點（使用項目符號）
+3. 如果有數據或統計信息，請特別標注
+4. 如果有結論或建議，請單獨列出
+請用繁體中文回答，保持專業且易於理解的語調。"""
+                },
                 {"role": "user", "content": pdf_text_truncated}
             ],
             temperature=0.3
         )
         summary_text = response.choices[0].message.content
         return summary_text
     except Exception as e:
+        print(f"錯誤詳情: {traceback.format_exc()}")
         return f"❌ 摘要生成失敗: {str(e)}"
 def ask_question(user_question):
+    """基於 PDF 內容回答問題"""
     if not client:
         return "❌ 請先設定 OpenAI API Key"
     if not summary_text and not pdf_text:
         return "❌ 請先生成 PDF 摘要"
     if not user_question.strip():
         return "❌ 請輸入問題"
     try:
+        # 組合上下文
         context = f"PDF 摘要:\n{summary_text}\n\n原始內容（部分）:\n{pdf_text[:2000]}"
         response = client.chat.completions.create(
             model=selected_model,
             messages=[
+                {
+                    "role": "system",
+                    "content": f"""你是一個專業的文檔問答助手。請基於提供的 PDF 內容回答用戶問題。
+規則：
+1. 只根據提供的文檔內容回答
+2. 如果文檔中沒有相關信息，請明確說明
+3. 引用具體的文檔內容來支持你的回答
+4. 用繁體中文回答
+5. 保持客觀和準確
+文檔內容：
+{context}"""
+                },
                 {"role": "user", "content": user_question}
             ],
             temperature=0.2
         )
         return response.choices[0].message.content
     except Exception as e:
+        print(f"錯誤詳情: {traceback.format_exc()}")
         return f"❌ 問答生成失敗: {str(e)}"
 def clear_all():
+    """清除所有資料"""
     global summary_text, pdf_text
     summary_text = ""
     pdf_text = ""
     return "", "", ""
+# 創建 Gradio 介面
+with gr.Blocks(
+    title="PDF 摘要助手",
+    css="""
+    /* 隱藏 Gradio footer 和 logo */
+    footer { display: none !important; }
+    .gradio-container footer { display: none !important; }
+    div[class*="footer"] { display: none !important; }
+    div[class*="Footer"] { display: none !important; }
+    .gr-footer { display: none !important; }
+    """
+) as demo:
+    gr.Markdown("""
+    # 📄 PDF 摘要 & 問答助手
+    🚀 **歡迎使用 PDF 智能分析工具！**
+    **主要功能：**
+    - 📋 自動生成 PDF 文檔摘要
+    - 🤖 基於文檔內容回答問題
+    - 💡 快速理解長篇文檔的核心內容
+    **使用步驟：**
+    1. 先在「設定」頁面輸入您的 OpenAI API Key
+    2. 選擇適合的 AI 模型
+    3. 在「摘要」頁面上傳 PDF 文件並生成摘要
+    4. 在「問答」頁面提出關於文件的問題
+    ---
+    """)
     with gr.Tab("🔧 設定"):
+        gr.Markdown("### API Key 設定")
+        api_key_input = gr.Textbox(
+            label="🔑 輸入 OpenAI API Key",
+            type="password",
+            placeholder="請輸入您的 OpenAI API Key (sk-...)"
+        )
+        api_key_btn = gr.Button("確認 API Key", variant="primary")
+        api_key_status = gr.Textbox(
+            label="📊 API 狀態",
+            interactive=False,
+            value="🔄 等待設定 API Key..."
+        )
+        gr.Markdown("### 模型選擇")
+        model_choice = gr.Radio(
+            ["gpt-4", "gpt-4.1", "gpt-4.5"],
+            label="🤖 選擇 AI 模型",
+            value="gpt-4"
+        )
+        model_status = gr.Textbox(
+            label="🎯 模型狀態",
+            interactive=False,
+            value="✅ 已選擇：gpt-4"
+        )
+    with gr.Tab("📄 PDF 摘要"):
+        gr.Markdown("### 文件上傳與摘要生成")
+        pdf_upload = gr.File(
+            label="📁 上傳 PDF 文件",
+            file_types=[".pdf"]
+        )
+        with gr.Row():
+            summary_btn = gr.Button("🔄 生成摘要", variant="primary")
+            clear_btn = gr.Button("🗑️ 清除資料", variant="secondary")
+        summary_output = gr.Textbox(
+            label="📋 PDF 摘要",
+            lines=15,
+            placeholder="上傳 PDF 文件並點擊「生成摘要」按鈕，AI 將為您分析文檔內容..."
+        )
+    with gr.Tab("❓ 智能問答"):
+        gr.Markdown("### 基於文檔內容的問答")
+        question_input = gr.Textbox(
+            label="💭 請輸入您的問題",
+            lines=3,
+            placeholder="例如：這份文件的主要結論是什麼？文中提到的關鍵數據有哪些？"
+        )
+        question_btn = gr.Button("📤 送出問題", variant="primary")
+        answer_output = gr.Textbox(
+            label="🤖 AI 回答",
+            lines=12,
+            placeholder="請先上傳並生成 PDF 摘要，然後輸入問題，AI 將基於文檔內容為您���供回答..."
+        )
+        gr.Markdown("""
+        **💡 問題範例：**
+        - 這份文件討論的主要議題是什麼？
+        - 文中有哪些重要的統計數據？
+        - 作者的主要觀點和結論是什麼？
+        - 文件中提到的建議有哪些？
+        """)
+    # 事件綁定 - 保持原有的簡單方式
+    api_key_btn.click(set_api_key, inputs=api_key_input, outputs=api_key_status)
+    api_key_input.submit(set_api_key, inputs=api_key_input, outputs=api_key_status)
+    model_choice.change(set_model, inputs=model_choice, outputs=model_status)
+    summary_btn.click(generate_summary, inputs=pdf_upload, outputs=summary_output)
+    question_btn.click(ask_question, inputs=question_input, outputs=answer_output)
+    question_input.submit(ask_question, inputs=question_input, outputs=answer_output)
     clear_btn.click(clear_all, outputs=[summary_output, question_input, answer_output])
 if __name__ == "__main__":
+    demo.launch(
+        show_error=True,
+        share=False
+    )