Spaces:

3a05chatgpt
/

pdf-summarizer-app

Sleeping

App Files Files Community

pdf-summarizer-app / app.py

3a05chatgpt

Update app.py

7332235 verified 7 days ago

raw

history blame

13.2 kB

	import openai
	import gradio as gr
	import fitz
	from openai import OpenAI
	import traceback

	# 全域變數
	api_key = ""
	selected_model = "gpt-4"
	summary_text = ""
	client = None
	pdf_text = ""

	def set_api_key(user_api_key):
	"""設定 OpenAI API Key 並初始化客戶端"""
	global api_key, client
	try:
	api_key = user_api_key.strip()
	if not api_key:
	return "❌ API Key 不能為空"

	if not api_key.startswith('sk-'):
	return "❌ API Key 格式錯誤，應該以 'sk-' 開頭"

	client = OpenAI(api_key=api_key)

	# 測試 API Key 是否有效
	test_response = client.chat.completions.create(
	model="gpt-3.5-turbo", # 使用較便宜的模型測試
	messages=[{"role": "user", "content": "測試"}],
	max_tokens=5
	)
	return "✅ API Key 已設定並驗證成功！"
	except Exception as e:
	if "incorrect_api_key" in str(e).lower():
	return "❌ API Key 無效，請檢查是否正確"
	elif "quota" in str(e).lower():
	return "⚠️ API Key 有效，但配額不足"
	else:
	return f"❌ API Key 設定失敗: {str(e)}"

	def set_model(model_name):
	"""設定選擇的模型"""
	global selected_model
	selected_model = model_name
	return f"✅ 模型已選擇：{model_name}"

	def extract_pdf_text(file_path):
	"""從 PDF 文件中提取文字"""
	try:
	doc = fitz.open(file_path)
	text = ""
	for page_num, page in enumerate(doc):
	page_text = page.get_text()
	if page_text.strip():
	text += f"\n--- 第 {page_num + 1} 頁 ---\n{page_text}"
	doc.close()
	return text
	except Exception as e:
	return f"❌ PDF 解析錯誤: {str(e)}"

	def generate_summary(pdf_file):
	"""從 PDF 內容生成摘要"""
	global summary_text, pdf_text

	if not client:
	return "❌ 請先設定 OpenAI API Key"

	if not pdf_file:
	return "❌ 請先上傳 PDF 文件"

	try:
	# 從 PDF 提取文字
	pdf_text = extract_pdf_text(pdf_file.name)

	if not pdf_text.strip():
	return "⚠️ 無法解析 PDF 文字，可能為純圖片 PDF 或空白文件。"

	# 截斷過長的文字
	max_chars = 8000
	if len(pdf_text) > max_chars:
	pdf_text_truncated = pdf_text[:max_chars] + "\n\n[文本已截斷，僅顯示前 8000 字符]"
	else:
	pdf_text_truncated = pdf_text

	# 生成摘要
	response = client.chat.completions.create(
	model=selected_model,
	messages=[
	{
	"role": "system",
	"content": """你是一個專業的文檔摘要助手。請將以下 PDF 內容整理為結構化的摘要：

	1. 首先提供一個簡短的總體概述
	2. 然後按照重要性列出主要重點（使用項目符號）
	3. 如果有數據或統計信息，請特別標注
	4. 如果有結論或建議，請單獨列出

	請用繁體中文回答，保持專業且易於理解的語調。"""
	},
	{"role": "user", "content": pdf_text_truncated}
	],
	temperature=0.3
	)

	summary_text = response.choices[0].message.content
	return summary_text

	except Exception as e:
	print(f"錯誤詳情: {traceback.format_exc()}")
	return f"❌ 摘要生成失敗: {str(e)}"

	def ask_question(user_question):
	"""基於 PDF 內容回答問題"""
	if not client:
	return "❌ 請先設定 OpenAI API Key"

	if not summary_text and not pdf_text:
	return "❌ 請先生成 PDF 摘要"

	if not user_question.strip():
	return "❌ 請輸入問題"

	try:
	# 組合上下文
	context = f"PDF 摘要:\n{summary_text}\n\n原始內容（部分）:\n{pdf_text[:2000]}"

	response = client.chat.completions.create(
	model=selected_model,
	messages=[
	{
	"role": "system",
	"content": f"""你是一個專業的文檔問答助手。請基於提供的 PDF 內容回答用戶問題。

	規則：
	1. 只根據提供的文檔內容回答
	2. 如果文檔中沒有相關信息，請明確說明
	3. 引用具體的文檔內容來支持你的回答
	4. 用繁體中文回答
	5. 保持客觀和準確

	文檔內容：
	{context}"""
	},
	{"role": "user", "content": user_question}
	],
	temperature=0.2
	)

	return response.choices[0].message.content

	except Exception as e:
	print(f"錯誤詳情: {traceback.format_exc()}")
	return f"❌ 問答生成失敗: {str(e)}"

	def clear_all():
	"""清除所有資料"""
	global summary_text, pdf_text
	summary_text = ""
	pdf_text = ""
	return "", "", ""

	# 自定義 CSS 樣式 - 藍紫色主題
	custom_css = """
	/* 主要容器背景 */
	.gradio-container {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
	min-height: 100vh;
	}

	/* 隱藏 Gradio footer 和 logo */
	footer { display: none !important; }
	.gradio-container footer { display: none !important; }
	div[class*="footer"] { display: none !important; }
	div[class*="Footer"] { display: none !important; }
	.gr-footer { display: none !important; }

	/* 標籤頁樣式 */
	.tab-nav {
	background: rgba(255, 255, 255, 0.1) !important;
	border-radius: 15px !important;
	backdrop-filter: blur(10px) !important;
	margin-bottom: 20px !important;
	}

	.tab-nav button {
	background: rgba(255, 255, 255, 0.1) !important;
	color: white !important;
	border: none !important;
	border-radius: 10px !important;
	margin: 5px !important;
	font-weight: 600 !important;
	transition: all 0.3s ease !important;
	}

	.tab-nav button:hover {
	background: rgba(255, 255, 255, 0.2) !important;
	transform: translateY(-2px) !important;
	}

	.tab-nav button.selected {
	background: rgba(255, 255, 255, 0.3) !important;
	box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2) !important;
	}

	/* 卡片樣式 */
	.block {
	background: rgba(255, 255, 255, 0.95) !important;
	border-radius: 20px !important;
	box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1) !important;
	backdrop-filter: blur(10px) !important;
	border: 1px solid rgba(255, 255, 255, 0.2) !important;
	margin: 10px 0 !important;
	padding: 20px !important;
	}

	/* 按鈕樣式 */
	.btn {
	background: linear-gradient(45deg, #667eea, #764ba2) !important;
	color: white !important;
	border: none !important;
	border-radius: 15px !important;
	padding: 12px 24px !important;
	font-weight: 600 !important;
	transition: all 0.3s ease !important;
	box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4) !important;
	}

	.btn:hover {
	transform: translateY(-3px) !important;
	box-shadow: 0 6px 20px rgba(102, 126, 234, 0.6) !important;
	}

	.btn-secondary {
	background: linear-gradient(45deg, #a8a8a8, #6c757d) !important;
	box-shadow: 0 4px 15px rgba(168, 168, 168, 0.4) !important;
	}

	.btn-secondary:hover {
	box-shadow: 0 6px 20px rgba(168, 168, 168, 0.6) !important;
	}

	/* 輸入框樣式 */
	.gr-textbox, .gr-file, .gr-radio {
	border-radius: 15px !important;
	border: 2px solid rgba(102, 126, 234, 0.3) !important;
	background: rgba(255, 255, 255, 0.9) !important;
	transition: all 0.3s ease !important;
	}

	.gr-textbox:focus, .gr-file:focus {
	border-color: #667eea !important;
	box-shadow: 0 0 20px rgba(102, 126, 234, 0.3) !important;
	}

	/* 標題樣式 */
	h1, h2, h3 {
	color: white !important;
	text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.3) !important;
	font-weight: 700 !important;
	}

	/* Markdown 內容樣式 */
	.markdown {
	background: rgba(255, 255, 255, 0.95) !important;
	border-radius: 15px !important;
	padding: 20px !important;
	margin: 10px 0 !important;
	box-shadow: 0 4px 15px rgba(0, 0, 0, 0.1) !important;
	}

	/* 進度條樣式 */
	.progress {
	background: linear-gradient(45deg, #667eea, #764ba2) !important;
	border-radius: 10px !important;
	}

	/* 滾動條樣式 */
	::-webkit-scrollbar {
	width: 8px;
	}

	::-webkit-scrollbar-track {
	background: rgba(255, 255, 255, 0.1);
	border-radius: 10px;
	}

	::-webkit-scrollbar-thumb {
	background: linear-gradient(45deg, #667eea, #764ba2);
	border-radius: 10px;
	}

	::-webkit-scrollbar-thumb:hover {
	background: linear-gradient(45deg, #5a6fd8, #6a4190);
	}

	/* 動畫效果 */
	@keyframes fadeIn {
	from { opacity: 0; transform: translateY(20px); }
	to { opacity: 1; transform: translateY(0); }
	}

	.block {
	animation: fadeIn 0.6s ease-out !important;
	}
	"""

	# 創建 Gradio 介面
	with gr.Blocks(
	title="PDF 摘要助手",
	css=custom_css,
	theme=gr.themes.Soft(
	primary_hue="blue",
	secondary_hue="purple",
	neutral_hue="slate",
	)
	) as demo:

	gr.Markdown("""
	# 📄 PDF 摘要 & 問答助手

	🚀 歡迎使用 PDF 智能分析工具！

	主要功能：
	- 📋 自動生成 PDF 文檔摘要
	- 🤖 基於文檔內容回答問題
	- 💡 快速理解長篇文檔的核心內容

	使用步驟：
	1. 先在「設定」頁面輸入您的 OpenAI API Key
	2. 選擇適合的 AI 模型
	3. 在「摘要」頁面上傳 PDF 文件並生成摘要
	4. 在「問答」頁面提出關於文件的問題

	---
	""")

	with gr.Tab("🔧 設定"):
	gr.Markdown("### API Key 設定")
	api_key_input = gr.Textbox(
	label="🔑 輸入 OpenAI API Key",
	type="password",
	placeholder="請輸入您的 OpenAI API Key (sk-...)",
	elem_classes=["gr-textbox"]
	)
	api_key_btn = gr.Button("確認 API Key", variant="primary", elem_classes=["btn"])
	api_key_status = gr.Textbox(
	label="📊 API 狀態",
	interactive=False,
	value="🔄 等待設定 API Key...",
	elem_classes=["gr-textbox"]
	)

	gr.Markdown("### 模型選擇")
	model_choice = gr.Radio(
	["gpt-3.5-turbo", "gpt-4", "gpt-4-turbo"],
	label="🤖 選擇 AI 模型",
	value="gpt-4",
	elem_classes=["gr-radio"]
	)
	model_status = gr.Textbox(
	label="🎯 模型狀態",
	interactive=False,
	value="✅ 已選擇：gpt-4",
	elem_classes=["gr-textbox"]
	)

	with gr.Tab("📄 PDF 摘要"):
	gr.Markdown("### 文件上傳與摘要生成")
	pdf_upload = gr.File(
	label="📁 上傳 PDF 文件",
	file_types=[".pdf"],
	elem_classes=["gr-file"]
	)
	with gr.Row():
	summary_btn = gr.Button("🔄 生成摘要", variant="primary", elem_classes=["btn"])
	clear_btn = gr.Button("🗑️ 清除資料", variant="secondary", elem_classes=["btn", "btn-secondary"])

	summary_output = gr.Textbox(
	label="📋 PDF 摘要",
	lines=15,
	placeholder="上傳 PDF 文件並點擊「生成摘要」按鈕，AI 將為您分析文檔內容...",
	elem_classes=["gr-textbox"]
	)

	with gr.Tab("❓ 智能問答"):
	gr.Markdown("### 基於文檔內容的問答")
	question_input = gr.Textbox(
	label="💭 請輸入您的問題",
	lines=3,
	placeholder="例如：這份文件的主要結論是什麼？文中提到的關鍵數據有哪些？",
	elem_classes=["gr-textbox"]
	)
	question_btn = gr.Button("📤 送出問題", variant="primary", elem_classes=["btn"])

	answer_output = gr.Textbox(
	label="🤖 AI 回答",
	lines=12,
	placeholder="請先上傳並生成 PDF 摘要，然後輸入問題，AI 將基於文檔內容為您提供回答...",
	elem_classes=["gr-textbox"]
	)

	gr.Markdown("""
	💡 問題範例：
	- 這份文件討論的主要議題是什麼？
	- 文中有哪些重要的統計數據？
	- 作者的主要觀點和結論是什麼？
	- 文件中提到的建議有哪些？
	""")

	# 事件綁定
	api_key_btn.click(set_api_key, inputs=api_key_input, outputs=api_key_status)
	api_key_input.submit(set_api_key, inputs=api_key_input, outputs=api_key_status)

	model_choice.change(set_model, inputs=model_choice, outputs=model_status)

	summary_btn.click(generate_summary, inputs=pdf_upload, outputs=summary_output)

	question_btn.click(ask_question, inputs=question_input, outputs=answer_output)
	question_input.submit(ask_question, inputs=question_input, outputs=answer_output)

	clear_btn.click(clear_all, outputs=[summary_output, question_input, answer_output])

	if __name__ == "__main__":
	demo.launch(
	show_error=True,
	share=True,
	server_name="0.0.0.0",
	server_port=7860
	)