Spaces:

3a05chatgpt
/

pdf-summarizer-app

Sleeping

App Files Files Community

3a05chatgpt commited on 5 days ago

Commit

1f07c6f

verified ·

1 Parent(s): 364e767

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -225

app.py CHANGED Viewed

@@ -1,12 +1,9 @@
 import openai
 import gradio as gr
-import fitz  # PyMuPDF
 from openai import OpenAI
-import os
-import tempfile
 import traceback
-# 全域變數
 api_key = ""
 selected_model = "gpt-4"
 summary_text = ""
@@ -14,314 +11,138 @@ client = None
 pdf_text = ""
 def set_api_key(user_api_key):
-    """設定 OpenAI API Key 並初始化客戶端"""
     global api_key, client
     try:
         api_key = user_api_key.strip()
         if not api_key:
             return "❌ API Key 不能為空"
         client = OpenAI(api_key=api_key)
-        # 測試 API Key 是否有效
-        test_response = client.chat.completions.create(
             model="gpt-4",
             messages=[{"role": "user", "content": "你好"}],
             max_tokens=5
         )
         return "✅ API Key 已設定並驗證成功"
     except Exception as e:
         return f"❌ API Key 設定失敗: {str(e)}"
 def set_model(model_name):
-    """設定選擇的模型"""
     global selected_model
     selected_model = model_name
     return f"✅ 模型已選擇：{model_name}"
 def extract_pdf_text(file_path):
-    """從 PDF 文件中提取文字"""
     try:
         doc = fitz.open(file_path)
         text = ""
         for page_num, page in enumerate(doc):
             page_text = page.get_text()
-            if page_text.strip():  # 只添加非空白頁面
-                text += f"\n--- 第 {page_num + 1} 頁 ---\n"
-                text += page_text
         doc.close()
         return text
     except Exception as e:
         return f"❌ PDF 解析錯誤: {str(e)}"
 def generate_summary(pdf_file):
-    """從 PDF 內容生成摘要"""
     global summary_text, pdf_text
     if not client:
-        return "❌ 請先設定 OpenAI API Key"
     if not pdf_file:
         return "❌ 請先上傳 PDF 文件"
     try:
-        # 從 PDF 提取文字
         pdf_text = extract_pdf_text(pdf_file.name)
         if not pdf_text.strip():
-            return "⚠️ 無法解析 PDF 文字，可能為純圖片 PDF 或空白文件。"
-        # 檢查文字長度，必要時截斷
-        max_chars = 8000  # 為系統提示留出空間
-        if len(pdf_text) > max_chars:
-            pdf_text_truncated = pdf_text[:max_chars] + "\n\n[文本已截斷，僅顯示前 8000 字符]"
-        else:
-            pdf_text_truncated = pdf_text
-        # 生成摘要
         response = client.chat.completions.create(
             model=selected_model,
             messages=[
-                {
-                    "role": "system",
-                    "content": """你是一個專業的文檔摘要助手。請將以下 PDF 內容整理為結構化的摘要：
-1. 首先提供一個簡短的總體概述
-2. 然後按照重要性列出主要重點（使用項目符號）
-3. 如果有數據或統計信息，請特別標注
-4. 如果有結論或建議，請單獨列出
-請用繁體中文回答，保持專業且易於理解的語調。"""
-                },
                 {"role": "user", "content": pdf_text_truncated}
             ],
             temperature=0.3
         )
         summary_text = response.choices[0].message.content
         return summary_text
     except Exception as e:
-        error_msg = f"❌ 摘要生成失敗: {str(e)}"
-        print(f"錯誤詳情: {traceback.format_exc()}")
-        return error_msg
 def ask_question(user_question):
-    """基於 PDF 內容回答問題"""
     if not client:
-        return "❌ 請先設定 OpenAI API Key"
     if not summary_text and not pdf_text:
         return "❌ 請先生成 PDF 摘要"
     if not user_question.strip():
         return "❌ 請輸入問題"
     try:
-        # 使用摘要和原始文本來提供更好的上下文
         context = f"PDF 摘要:\n{summary_text}\n\n原始內容（部分）:\n{pdf_text[:2000]}"
         response = client.chat.completions.create(
             model=selected_model,
             messages=[
-                {
-                    "role": "system",
-                    "content": f"""你是一個專業的文檔問答助手。請基於提供的 PDF 內容回答用戶問題。
-規則：
-1. 只根據提供的文檔內容回答
-2. 如果文檔中沒有相關信息，請明確說明
-3. 引用具體的文檔內容來支持你的回答
-4. 用繁體中文回答
-5. 保持客觀和準確
-文檔內容：
-{context}"""
-                },
                 {"role": "user", "content": user_question}
             ],
             temperature=0.2
         )
         return response.choices[0].message.content
     except Exception as e:
-        error_msg = f"❌ 問答生成失敗: {str(e)}"
-        print(f"錯誤詳情: {traceback.format_exc()}")
-        return error_msg
 def clear_all():
-    """清除所有資料"""
     global summary_text, pdf_text
     summary_text = ""
     pdf_text = ""
     return "", "", ""
-# 創建 Gradio 介面 - 美觀設計 + 簡單功能
 with gr.Blocks(
-    theme=gr.themes.Soft(),
     title="PDF 摘要助手",
     css="""
-    /* 全螢幕美觀設計 */
     .gradio-container {
         max-width: none !important;
-        width: 100vw !important;
-        height: 100vh !important;
         background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
-        margin: 0 !important;
-        padding: 0 !important;
     }
-    /* 主要內容區域 */
     .main-content {
         background: rgba(255, 255, 255, 0.95) !important;
         border-radius: 20px !important;
-        margin: 15px !important;
-        padding: 30px !important;
-        box-shadow: 0 20px 40px rgba(0, 0, 0, 0.1) !important;
-        backdrop-filter: blur(10px) !important;
-        width: calc(100vw - 30px) !important;
-        min-height: calc(100vh - 30px) !important;
-        box-sizing: border-box !important;
-    }
-    /* 標題樣式 */
-    .main-header {
-        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
-        -webkit-background-clip: text !important;
-        -webkit-text-fill-color: transparent !important;
-        text-align: center !important;
-        font-size: 2.8em !important;
-        font-weight: bold !important;
-        margin-bottom: 25px !important;
-    }
-    /* 隱藏所有 footer 和 logo */
-    footer,
-    .gradio-container footer,
-    div[class*="footer"],
-    div[class*="Footer"],
-    .gr-footer,
-    .gradio-footer {
-        display: none !important;
-    }
-    /* 響應式設計 */
-    @media (max-width: 768px) {
-        .main-content {
-            margin: 5px !important;
-            padding: 15px !important;
-            width: calc(100vw - 10px) !important;
-        }
-        .main-header {
-            font-size: 2em !important;
-        }
     }
     """
 ) as demo:
-    with gr.Column(elem_classes="main-content"):
-        gr.HTML("""
-        <div class="main-header">📄 PDF 摘要 & 問答助手</div>
-        <div style="text-align: center; margin-bottom: 30px; padding: 25px; background: linear-gradient(135deg, rgba(102, 126, 234, 0.1) 0%, rgba(118, 75, 162, 0.1) 100%); border-radius: 15px; border-left: 5px solid #667eea;">
-            <h3 style="color: #667eea; margin-bottom: 15px;">🚀 歡迎使用 PDF 智能分析工具！</h3>
-            <div style="display: flex; justify-content: space-around; flex-wrap: wrap; margin: 20px 0;">
-                <div style="margin: 10px; padding: 15px; background: white; border-radius: 10px; box-shadow: 0 3px 10px rgba(0,0,0,0.1); min-width: 200px; flex: 1; max-width: 300px;">
-                    <div style="font-size: 24px; margin-bottom: 10px;">📋</div>
-                    <strong>智能摘要生成</strong><br>
-                    <span style="color: #666;">自動分析 PDF 內容並生成結構化摘要</span>
-                </div>
-                <div style="margin: 10px; padding: 15px; background: white; border-radius: 10px; box-shadow: 0 3px 10px rgba(0,0,0,0.1); min-width: 200px; flex: 1; max-width: 300px;">
-                    <div style="font-size: 24px; margin-bottom: 10px;">🤖</div>
-                    <strong>AI 問答系統</strong><br>
-                    <span style="color: #666;">基於文檔內容回答您的問題</span>
-                </div>
-                <div style="margin: 10px; padding: 15px; background: white; border-radius: 10px; box-shadow: 0 3px 10px rgba(0,0,0,0.1); min-width: 200px; flex: 1; max-width: 300px;">
-                    <div style="font-size: 24px; margin-bottom: 10px;">💡</div>
-                    <strong>快速理解</strong><br>
-                    <span style="color: #666;">快速掌握長篇文檔的核心內容</span>
-                </div>
-            </div>
-            <div style="background: rgba(255, 193, 7, 0.1); padding: 15px; border-radius: 10px; border-left: 4px solid #ffc107; margin-top: 20px;">
-                <strong style="color: #e65100;">⚠️ 重要提醒：</strong> 使用前請先在「🔧 設定」頁面輸入您的 OpenAI API Key
-            </div>
-        </div>
-        """)
         with gr.Tab("🔧 設定"):
-            with gr.Row():
-                with gr.Column():
-                    api_key_input = gr.Textbox(
-                        label="🔑 輸入 OpenAI API Key",
-                        type="password",
-                        placeholder="請輸入您的 OpenAI API Key"
-                    )
-                    api_key_status = gr.Textbox(
-                        label="API 狀態",
-                        interactive=False,
-                        value="等待設定 API Key..."
-                    )
-                with gr.Column():
-                    model_choice = gr.Radio(
-                        ["gpt-4", "gpt-4.1", "gpt-4.5"],
-                        label="🤖 選擇模型",
-                        value="gpt-4"
-                    )
-                    model_status = gr.Textbox(
-                        label="模型狀態",
-                        interactive=False,
-                        value="✅ 模型已選擇：gpt-4"
-                    )
-        with gr.Tab("📄 PDF 處理"):
-            with gr.Row():
-                with gr.Column():
-                    pdf_upload = gr.File(
-                        label="📁 上傳 PDF 文件",
-                        file_types=[".pdf"]
-                    )
-                    with gr.Row():
-                        summary_btn = gr.Button("🔄 生成摘要", variant="primary")
-                        clear_btn = gr.Button("🗑️ 清除資料", variant="secondary")
-                with gr.Column():
-                    summary_output = gr.Textbox(
-                        label="📋 PDF 摘要",
-                        lines=20,
-                        placeholder="上傳 PDF 文件並點擊 '生成摘要' 按鈕"
-                    )
         with gr.Tab("❓ 問答"):
-            with gr.Row():
-                with gr.Column():
-                    question_input = gr.Textbox(
-                        label="💬 請輸入您的問題",
-                        placeholder="例如：這份文件的主要結論是什麼？",
-                        lines=4
-                    )
-                    question_btn = gr.Button("📤 送出問題", variant="primary")
-                with gr.Column():
-                    answer_output = gr.Textbox(
-                        label="🤖 AI 回答",
-                        lines=18,
-                        placeholder="AI 回答將顯示在這裡"
-                    )
-    # 事件處理器 - 使用舊版的簡單方式
-    api_key_input.submit(set_api_key, inputs=api_key_input, outputs=api_key_status)
-    model_choice.change(set_model, inputs=model_choice, outputs=model_status)
-    summary_btn.click(generate_summary, inputs=pdf_upload, outputs=summary_output)
-    question_btn.click(ask_question, inputs=question_input, outputs=answer_output)
-    question_input.submit(ask_question, inputs=question_input, outputs=answer_output)
-    clear_btn.click(clear_all, outputs=[summary_output, question_input, answer_output])
 if __name__ == "__main__":
-    demo.launch(
-        share=False,
-        show_api=False,
-        show_error=True
-    )

 import openai
 import gradio as gr
+import fitz
 from openai import OpenAI
 import traceback
 api_key = ""
 selected_model = "gpt-4"
 summary_text = ""
 pdf_text = ""
 def set_api_key(user_api_key):
     global api_key, client
     try:
         api_key = user_api_key.strip()
         if not api_key:
             return "❌ API Key 不能為空"
         client = OpenAI(api_key=api_key)
+        client.chat.completions.create(
             model="gpt-4",
             messages=[{"role": "user", "content": "你好"}],
             max_tokens=5
         )
         return "✅ API Key 已設定並驗證成功"
     except Exception as e:
         return f"❌ API Key 設定失敗: {str(e)}"
 def set_model(model_name):
     global selected_model
     selected_model = model_name
     return f"✅ 模型已選擇：{model_name}"
 def extract_pdf_text(file_path):
     try:
         doc = fitz.open(file_path)
         text = ""
         for page_num, page in enumerate(doc):
             page_text = page.get_text()
+            if page_text.strip():
+                text += f"\n--- 第 {page_num + 1} 頁 ---\n{page_text}"
         doc.close()
         return text
     except Exception as e:
         return f"❌ PDF 解析錯誤: {str(e)}"
 def generate_summary(pdf_file):
     global summary_text, pdf_text
     if not client:
+        return "❌ 請先設定 API Key"
     if not pdf_file:
         return "❌ 請先上傳 PDF 文件"
     try:
         pdf_text = extract_pdf_text(pdf_file.name)
         if not pdf_text.strip():
+            return "⚠️ 無法解析 PDF 文字"
+        pdf_text_truncated = pdf_text[:8000]
         response = client.chat.completions.create(
             model=selected_model,
             messages=[
+                {"role": "system", "content": "請用繁體中文整理以下 PDF 內容摘要。"},
                 {"role": "user", "content": pdf_text_truncated}
             ],
             temperature=0.3
         )
         summary_text = response.choices[0].message.content
         return summary_text
     except Exception as e:
+        print(traceback.format_exc())
+        return f"❌ 摘要生成失敗: {str(e)}"
 def ask_question(user_question):
     if not client:
+        return "❌ 請先設定 API Key"
     if not summary_text and not pdf_text:
         return "❌ 請先生成 PDF 摘要"
     if not user_question.strip():
         return "❌ 請輸入問題"
     try:
         context = f"PDF 摘要:\n{summary_text}\n\n原始內容（部分）:\n{pdf_text[:2000]}"
         response = client.chat.completions.create(
             model=selected_model,
             messages=[
+                {"role": "system", "content": f"根據以下 PDF 內容回答問題，請用繁體中文回答：\n{context}"},
                 {"role": "user", "content": user_question}
             ],
             temperature=0.2
         )
         return response.choices[0].message.content
     except Exception as e:
+        print(traceback.format_exc())
+        return f"❌ 問答生成失敗: {str(e)}"
 def clear_all():
     global summary_text, pdf_text
     summary_text = ""
     pdf_text = ""
     return "", "", ""
 with gr.Blocks(
     title="PDF 摘要助手",
     css="""
     .gradio-container {
         max-width: none !important;
+        width: 100% !important;
         background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
+        min-height: 100vh;
     }
     .main-content {
+        max-width: 1600px !important;
+        margin: 20px auto !important;
+        padding: 30px !important;
         background: rgba(255, 255, 255, 0.95) !important;
         border-radius: 20px !important;
     }
     """
 ) as demo:
+    with gr.Column():
+        gr.Markdown("## 📄 PDF 摘要 & 問答助手")
         with gr.Tab("🔧 設定"):
+            api_key_input = gr.Textbox(label="🔑 輸入 OpenAI API Key", type="password")
+            api_key_status = gr.Textbox(label="API 狀態", interactive=False, value="等待設定 API Key...")
+            api_key_btn = gr.Button("確認 API Key")
+            api_key_btn.click(set_api_key, inputs=api_key_input, outputs=api_key_status)
+            model_choice = gr.Radio(["gpt-4", "gpt-4.1", "gpt-4.5"], label="選擇 AI 模型", value="gpt-4")
+            model_status = gr.Textbox(label="模型狀態", interactive=False, value="✅ 已選擇：gpt-4")
+            model_choice.change(set_model, inputs=model_choice, outputs=model_status)
+        with gr.Tab("📄 摘要"):
+            pdf_upload = gr.File(label="上傳 PDF", file_types=[".pdf"])
+            summary_btn = gr.Button("生成摘要")
+            summary_output = gr.Textbox(label="PDF 摘要", lines=12)
+            summary_btn.click(generate_summary, inputs=pdf_upload, outputs=summary_output)
         with gr.Tab("❓ 問答"):
+            question_input = gr.Textbox(label="請輸入問題", lines=2)
+            question_btn = gr.Button("送出問題")
+            answer_output = gr.Textbox(label="AI 回答", lines=8)
+            question_btn.click(ask_question, inputs=question_input, outputs=answer_output)
+            question_input.submit(ask_question, inputs=question_input, outputs=answer_output)
+        clear_btn = gr.Button("🗑️ 清除所有資料")
+        clear_btn.click(clear_all, outputs=[summary_output, question_input, answer_output])
 if __name__ == "__main__":
+    demo.launch(show_error=True)