Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,12 +1,9 @@
|
|
1 |
import openai
|
2 |
import gradio as gr
|
3 |
-
import fitz
|
4 |
from openai import OpenAI
|
5 |
-
import os
|
6 |
-
import tempfile
|
7 |
import traceback
|
8 |
|
9 |
-
# 全域變數
|
10 |
api_key = ""
|
11 |
selected_model = "gpt-4"
|
12 |
summary_text = ""
|
@@ -14,314 +11,138 @@ client = None
|
|
14 |
pdf_text = ""
|
15 |
|
16 |
def set_api_key(user_api_key):
|
17 |
-
"""設定 OpenAI API Key 並初始化客戶端"""
|
18 |
global api_key, client
|
19 |
try:
|
20 |
api_key = user_api_key.strip()
|
21 |
if not api_key:
|
22 |
return "❌ API Key 不能為空"
|
23 |
-
|
24 |
client = OpenAI(api_key=api_key)
|
25 |
-
|
26 |
-
# 測試 API Key 是否有效
|
27 |
-
test_response = client.chat.completions.create(
|
28 |
model="gpt-4",
|
29 |
messages=[{"role": "user", "content": "你好"}],
|
30 |
max_tokens=5
|
31 |
)
|
32 |
-
|
33 |
return "✅ API Key 已設定並驗證成功"
|
34 |
except Exception as e:
|
35 |
return f"❌ API Key 設定失敗: {str(e)}"
|
36 |
|
37 |
def set_model(model_name):
|
38 |
-
"""設定選擇的模型"""
|
39 |
global selected_model
|
40 |
selected_model = model_name
|
41 |
return f"✅ 模型已選擇:{model_name}"
|
42 |
|
43 |
def extract_pdf_text(file_path):
|
44 |
-
"""從 PDF 文件中提取文字"""
|
45 |
try:
|
46 |
doc = fitz.open(file_path)
|
47 |
text = ""
|
48 |
for page_num, page in enumerate(doc):
|
49 |
page_text = page.get_text()
|
50 |
-
if page_text.strip():
|
51 |
-
text += f"\n--- 第 {page_num + 1} 頁 ---\n"
|
52 |
-
text += page_text
|
53 |
doc.close()
|
54 |
return text
|
55 |
except Exception as e:
|
56 |
return f"❌ PDF 解析錯誤: {str(e)}"
|
57 |
|
58 |
def generate_summary(pdf_file):
|
59 |
-
"""從 PDF 內容生成摘要"""
|
60 |
global summary_text, pdf_text
|
61 |
-
|
62 |
if not client:
|
63 |
-
return "❌ 請先設定
|
64 |
-
|
65 |
if not pdf_file:
|
66 |
return "❌ 請先上傳 PDF 文件"
|
67 |
-
|
68 |
try:
|
69 |
-
# 從 PDF 提取文字
|
70 |
pdf_text = extract_pdf_text(pdf_file.name)
|
71 |
-
|
72 |
if not pdf_text.strip():
|
73 |
-
return "⚠️ 無法解析 PDF
|
74 |
-
|
75 |
-
# 檢查文字長度,必要時截斷
|
76 |
-
max_chars = 8000 # 為系統提示留出空間
|
77 |
-
if len(pdf_text) > max_chars:
|
78 |
-
pdf_text_truncated = pdf_text[:max_chars] + "\n\n[文本已截斷,僅顯示前 8000 字符]"
|
79 |
-
else:
|
80 |
-
pdf_text_truncated = pdf_text
|
81 |
-
|
82 |
-
# 生成摘要
|
83 |
response = client.chat.completions.create(
|
84 |
model=selected_model,
|
85 |
messages=[
|
86 |
-
{
|
87 |
-
"role": "system",
|
88 |
-
"content": """你是一個專業的文檔摘要助手。請將以下 PDF 內容整理為結構化的摘要:
|
89 |
-
|
90 |
-
1. 首先提供一個簡短的總體概述
|
91 |
-
2. 然後按照重要性列出主要重點(使用項目符號)
|
92 |
-
3. 如果有數據或統計信息,請特別標注
|
93 |
-
4. 如果有結論或建議,請單獨列出
|
94 |
-
|
95 |
-
請用繁體中文回答,保持專業且易於理解的語調。"""
|
96 |
-
},
|
97 |
{"role": "user", "content": pdf_text_truncated}
|
98 |
],
|
99 |
temperature=0.3
|
100 |
)
|
101 |
-
|
102 |
summary_text = response.choices[0].message.content
|
103 |
return summary_text
|
104 |
-
|
105 |
except Exception as e:
|
106 |
-
|
107 |
-
|
108 |
-
return error_msg
|
109 |
|
110 |
def ask_question(user_question):
|
111 |
-
"""基於 PDF 內容回答問題"""
|
112 |
if not client:
|
113 |
-
return "❌ 請先設定
|
114 |
-
|
115 |
if not summary_text and not pdf_text:
|
116 |
return "❌ 請先生成 PDF 摘要"
|
117 |
-
|
118 |
if not user_question.strip():
|
119 |
return "❌ 請輸入問題"
|
120 |
-
|
121 |
try:
|
122 |
-
# 使用摘要和原始文本來提供更好的上下文
|
123 |
context = f"PDF 摘要:\n{summary_text}\n\n原始內容(部分):\n{pdf_text[:2000]}"
|
124 |
-
|
125 |
response = client.chat.completions.create(
|
126 |
model=selected_model,
|
127 |
messages=[
|
128 |
-
{
|
129 |
-
"role": "system",
|
130 |
-
"content": f"""你是一個專業的文檔問答助手。請基於提供的 PDF 內容回答用戶問題。
|
131 |
-
|
132 |
-
規則:
|
133 |
-
1. 只根據提供的文檔內容回答
|
134 |
-
2. 如果文檔中沒有相關信息,請明確說明
|
135 |
-
3. 引用具體的文檔內容來支持你的回答
|
136 |
-
4. 用繁體中文回答
|
137 |
-
5. 保持客觀和準確
|
138 |
-
|
139 |
-
文檔內容:
|
140 |
-
{context}"""
|
141 |
-
},
|
142 |
{"role": "user", "content": user_question}
|
143 |
],
|
144 |
temperature=0.2
|
145 |
)
|
146 |
-
|
147 |
return response.choices[0].message.content
|
148 |
-
|
149 |
except Exception as e:
|
150 |
-
|
151 |
-
|
152 |
-
return error_msg
|
153 |
|
154 |
def clear_all():
|
155 |
-
"""清除所有資料"""
|
156 |
global summary_text, pdf_text
|
157 |
summary_text = ""
|
158 |
pdf_text = ""
|
159 |
return "", "", ""
|
160 |
|
161 |
-
# 創建 Gradio 介面 - 美觀設計 + 簡單功能
|
162 |
with gr.Blocks(
|
163 |
-
theme=gr.themes.Soft(),
|
164 |
title="PDF 摘要助手",
|
165 |
css="""
|
166 |
-
/* 全螢幕美觀設計 */
|
167 |
.gradio-container {
|
168 |
max-width: none !important;
|
169 |
-
width:
|
170 |
-
height: 100vh !important;
|
171 |
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
|
172 |
-
|
173 |
-
padding: 0 !important;
|
174 |
}
|
175 |
-
|
176 |
-
/* 主要內容區域 */
|
177 |
.main-content {
|
|
|
|
|
|
|
178 |
background: rgba(255, 255, 255, 0.95) !important;
|
179 |
border-radius: 20px !important;
|
180 |
-
margin: 15px !important;
|
181 |
-
padding: 30px !important;
|
182 |
-
box-shadow: 0 20px 40px rgba(0, 0, 0, 0.1) !important;
|
183 |
-
backdrop-filter: blur(10px) !important;
|
184 |
-
width: calc(100vw - 30px) !important;
|
185 |
-
min-height: calc(100vh - 30px) !important;
|
186 |
-
box-sizing: border-box !important;
|
187 |
-
}
|
188 |
-
|
189 |
-
/* 標題樣式 */
|
190 |
-
.main-header {
|
191 |
-
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
|
192 |
-
-webkit-background-clip: text !important;
|
193 |
-
-webkit-text-fill-color: transparent !important;
|
194 |
-
text-align: center !important;
|
195 |
-
font-size: 2.8em !important;
|
196 |
-
font-weight: bold !important;
|
197 |
-
margin-bottom: 25px !important;
|
198 |
-
}
|
199 |
-
|
200 |
-
/* 隱藏所有 footer 和 logo */
|
201 |
-
footer,
|
202 |
-
.gradio-container footer,
|
203 |
-
div[class*="footer"],
|
204 |
-
div[class*="Footer"],
|
205 |
-
.gr-footer,
|
206 |
-
.gradio-footer {
|
207 |
-
display: none !important;
|
208 |
-
}
|
209 |
-
|
210 |
-
/* 響應式設計 */
|
211 |
-
@media (max-width: 768px) {
|
212 |
-
.main-content {
|
213 |
-
margin: 5px !important;
|
214 |
-
padding: 15px !important;
|
215 |
-
width: calc(100vw - 10px) !important;
|
216 |
-
}
|
217 |
-
|
218 |
-
.main-header {
|
219 |
-
font-size: 2em !important;
|
220 |
-
}
|
221 |
}
|
222 |
"""
|
223 |
) as demo:
|
224 |
-
with gr.Column(
|
225 |
-
gr.
|
226 |
-
|
227 |
-
|
228 |
-
<div style="text-align: center; margin-bottom: 30px; padding: 25px; background: linear-gradient(135deg, rgba(102, 126, 234, 0.1) 0%, rgba(118, 75, 162, 0.1) 100%); border-radius: 15px; border-left: 5px solid #667eea;">
|
229 |
-
<h3 style="color: #667eea; margin-bottom: 15px;">🚀 歡迎使用 PDF 智能分析工具!</h3>
|
230 |
-
<div style="display: flex; justify-content: space-around; flex-wrap: wrap; margin: 20px 0;">
|
231 |
-
<div style="margin: 10px; padding: 15px; background: white; border-radius: 10px; box-shadow: 0 3px 10px rgba(0,0,0,0.1); min-width: 200px; flex: 1; max-width: 300px;">
|
232 |
-
<div style="font-size: 24px; margin-bottom: 10px;">📋</div>
|
233 |
-
<strong>智能摘要生成</strong><br>
|
234 |
-
<span style="color: #666;">自動分析 PDF 內容並生成結構化摘要</span>
|
235 |
-
</div>
|
236 |
-
<div style="margin: 10px; padding: 15px; background: white; border-radius: 10px; box-shadow: 0 3px 10px rgba(0,0,0,0.1); min-width: 200px; flex: 1; max-width: 300px;">
|
237 |
-
<div style="font-size: 24px; margin-bottom: 10px;">🤖</div>
|
238 |
-
<strong>AI 問答系統</strong><br>
|
239 |
-
<span style="color: #666;">基於文檔內容回答您的問題</span>
|
240 |
-
</div>
|
241 |
-
<div style="margin: 10px; padding: 15px; background: white; border-radius: 10px; box-shadow: 0 3px 10px rgba(0,0,0,0.1); min-width: 200px; flex: 1; max-width: 300px;">
|
242 |
-
<div style="font-size: 24px; margin-bottom: 10px;">💡</div>
|
243 |
-
<strong>快速理解</strong><br>
|
244 |
-
<span style="color: #666;">快速掌握長篇文檔的核心內容</span>
|
245 |
-
</div>
|
246 |
-
</div>
|
247 |
-
<div style="background: rgba(255, 193, 7, 0.1); padding: 15px; border-radius: 10px; border-left: 4px solid #ffc107; margin-top: 20px;">
|
248 |
-
<strong style="color: #e65100;">⚠️ 重要提醒:</strong> 使用前請先在「🔧 設定」頁面輸入您的 OpenAI API Key
|
249 |
-
</div>
|
250 |
-
</div>
|
251 |
-
""")
|
252 |
-
|
253 |
with gr.Tab("🔧 設定"):
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
["gpt-4", "gpt-4.1", "gpt-4.5"],
|
270 |
-
label="🤖 選擇模型",
|
271 |
-
value="gpt-4"
|
272 |
-
)
|
273 |
-
model_status = gr.Textbox(
|
274 |
-
label="模型狀態",
|
275 |
-
interactive=False,
|
276 |
-
value="✅ 模型已選擇:gpt-4"
|
277 |
-
)
|
278 |
-
|
279 |
-
with gr.Tab("📄 PDF 處理"):
|
280 |
-
with gr.Row():
|
281 |
-
with gr.Column():
|
282 |
-
pdf_upload = gr.File(
|
283 |
-
label="📁 上傳 PDF 文件",
|
284 |
-
file_types=[".pdf"]
|
285 |
-
)
|
286 |
-
with gr.Row():
|
287 |
-
summary_btn = gr.Button("🔄 生成摘要", variant="primary")
|
288 |
-
clear_btn = gr.Button("🗑️ 清除資料", variant="secondary")
|
289 |
-
|
290 |
-
with gr.Column():
|
291 |
-
summary_output = gr.Textbox(
|
292 |
-
label="📋 PDF 摘要",
|
293 |
-
lines=20,
|
294 |
-
placeholder="上傳 PDF 文件並點擊 '生成摘要' 按鈕"
|
295 |
-
)
|
296 |
-
|
297 |
with gr.Tab("❓ 問答"):
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
with gr.Column():
|
308 |
-
answer_output = gr.Textbox(
|
309 |
-
label="🤖 AI 回答",
|
310 |
-
lines=18,
|
311 |
-
placeholder="AI 回答將顯示在這裡"
|
312 |
-
)
|
313 |
-
|
314 |
-
# 事件處理器 - 使用舊版的簡單方式
|
315 |
-
api_key_input.submit(set_api_key, inputs=api_key_input, outputs=api_key_status)
|
316 |
-
model_choice.change(set_model, inputs=model_choice, outputs=model_status)
|
317 |
-
summary_btn.click(generate_summary, inputs=pdf_upload, outputs=summary_output)
|
318 |
-
question_btn.click(ask_question, inputs=question_input, outputs=answer_output)
|
319 |
-
question_input.submit(ask_question, inputs=question_input, outputs=answer_output)
|
320 |
-
clear_btn.click(clear_all, outputs=[summary_output, question_input, answer_output])
|
321 |
|
322 |
if __name__ == "__main__":
|
323 |
-
demo.launch(
|
324 |
-
share=False,
|
325 |
-
show_api=False,
|
326 |
-
show_error=True
|
327 |
-
)
|
|
|
1 |
import openai
|
2 |
import gradio as gr
|
3 |
+
import fitz
|
4 |
from openai import OpenAI
|
|
|
|
|
5 |
import traceback
|
6 |
|
|
|
7 |
api_key = ""
|
8 |
selected_model = "gpt-4"
|
9 |
summary_text = ""
|
|
|
11 |
pdf_text = ""
|
12 |
|
13 |
def set_api_key(user_api_key):
|
|
|
14 |
global api_key, client
|
15 |
try:
|
16 |
api_key = user_api_key.strip()
|
17 |
if not api_key:
|
18 |
return "❌ API Key 不能為空"
|
|
|
19 |
client = OpenAI(api_key=api_key)
|
20 |
+
client.chat.completions.create(
|
|
|
|
|
21 |
model="gpt-4",
|
22 |
messages=[{"role": "user", "content": "你好"}],
|
23 |
max_tokens=5
|
24 |
)
|
|
|
25 |
return "✅ API Key 已設定並驗證成功"
|
26 |
except Exception as e:
|
27 |
return f"❌ API Key 設定失敗: {str(e)}"
|
28 |
|
29 |
def set_model(model_name):
|
|
|
30 |
global selected_model
|
31 |
selected_model = model_name
|
32 |
return f"✅ 模型已選擇:{model_name}"
|
33 |
|
34 |
def extract_pdf_text(file_path):
|
|
|
35 |
try:
|
36 |
doc = fitz.open(file_path)
|
37 |
text = ""
|
38 |
for page_num, page in enumerate(doc):
|
39 |
page_text = page.get_text()
|
40 |
+
if page_text.strip():
|
41 |
+
text += f"\n--- 第 {page_num + 1} 頁 ---\n{page_text}"
|
|
|
42 |
doc.close()
|
43 |
return text
|
44 |
except Exception as e:
|
45 |
return f"❌ PDF 解析錯誤: {str(e)}"
|
46 |
|
47 |
def generate_summary(pdf_file):
|
|
|
48 |
global summary_text, pdf_text
|
|
|
49 |
if not client:
|
50 |
+
return "❌ 請先設定 API Key"
|
|
|
51 |
if not pdf_file:
|
52 |
return "❌ 請先上傳 PDF 文件"
|
|
|
53 |
try:
|
|
|
54 |
pdf_text = extract_pdf_text(pdf_file.name)
|
|
|
55 |
if not pdf_text.strip():
|
56 |
+
return "⚠️ 無法解析 PDF 文字"
|
57 |
+
pdf_text_truncated = pdf_text[:8000]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
response = client.chat.completions.create(
|
59 |
model=selected_model,
|
60 |
messages=[
|
61 |
+
{"role": "system", "content": "請用繁體中文整理以下 PDF 內容摘要。"},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
{"role": "user", "content": pdf_text_truncated}
|
63 |
],
|
64 |
temperature=0.3
|
65 |
)
|
|
|
66 |
summary_text = response.choices[0].message.content
|
67 |
return summary_text
|
|
|
68 |
except Exception as e:
|
69 |
+
print(traceback.format_exc())
|
70 |
+
return f"❌ 摘要生成失敗: {str(e)}"
|
|
|
71 |
|
72 |
def ask_question(user_question):
|
|
|
73 |
if not client:
|
74 |
+
return "❌ 請先設定 API Key"
|
|
|
75 |
if not summary_text and not pdf_text:
|
76 |
return "❌ 請先生成 PDF 摘要"
|
|
|
77 |
if not user_question.strip():
|
78 |
return "❌ 請輸入問題"
|
|
|
79 |
try:
|
|
|
80 |
context = f"PDF 摘要:\n{summary_text}\n\n原始內容(部分):\n{pdf_text[:2000]}"
|
|
|
81 |
response = client.chat.completions.create(
|
82 |
model=selected_model,
|
83 |
messages=[
|
84 |
+
{"role": "system", "content": f"根據以下 PDF 內容回答問題,請用繁體中文回答:\n{context}"},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
{"role": "user", "content": user_question}
|
86 |
],
|
87 |
temperature=0.2
|
88 |
)
|
|
|
89 |
return response.choices[0].message.content
|
|
|
90 |
except Exception as e:
|
91 |
+
print(traceback.format_exc())
|
92 |
+
return f"❌ 問答生成失敗: {str(e)}"
|
|
|
93 |
|
94 |
def clear_all():
|
|
|
95 |
global summary_text, pdf_text
|
96 |
summary_text = ""
|
97 |
pdf_text = ""
|
98 |
return "", "", ""
|
99 |
|
|
|
100 |
with gr.Blocks(
|
|
|
101 |
title="PDF 摘要助手",
|
102 |
css="""
|
|
|
103 |
.gradio-container {
|
104 |
max-width: none !important;
|
105 |
+
width: 100% !important;
|
|
|
106 |
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
|
107 |
+
min-height: 100vh;
|
|
|
108 |
}
|
|
|
|
|
109 |
.main-content {
|
110 |
+
max-width: 1600px !important;
|
111 |
+
margin: 20px auto !important;
|
112 |
+
padding: 30px !important;
|
113 |
background: rgba(255, 255, 255, 0.95) !important;
|
114 |
border-radius: 20px !important;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
}
|
116 |
"""
|
117 |
) as demo:
|
118 |
+
with gr.Column():
|
119 |
+
gr.Markdown("## 📄 PDF 摘要 & 問答助手")
|
120 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
with gr.Tab("🔧 設定"):
|
122 |
+
api_key_input = gr.Textbox(label="🔑 輸入 OpenAI API Key", type="password")
|
123 |
+
api_key_status = gr.Textbox(label="API 狀態", interactive=False, value="等待設定 API Key...")
|
124 |
+
api_key_btn = gr.Button("確認 API Key")
|
125 |
+
api_key_btn.click(set_api_key, inputs=api_key_input, outputs=api_key_status)
|
126 |
+
|
127 |
+
model_choice = gr.Radio(["gpt-4", "gpt-4.1", "gpt-4.5"], label="選擇 AI 模型", value="gpt-4")
|
128 |
+
model_status = gr.Textbox(label="模型狀態", interactive=False, value="✅ 已選擇:gpt-4")
|
129 |
+
model_choice.change(set_model, inputs=model_choice, outputs=model_status)
|
130 |
+
|
131 |
+
with gr.Tab("📄 摘要"):
|
132 |
+
pdf_upload = gr.File(label="上傳 PDF", file_types=[".pdf"])
|
133 |
+
summary_btn = gr.Button("生成摘要")
|
134 |
+
summary_output = gr.Textbox(label="PDF 摘要", lines=12)
|
135 |
+
summary_btn.click(generate_summary, inputs=pdf_upload, outputs=summary_output)
|
136 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
with gr.Tab("❓ 問答"):
|
138 |
+
question_input = gr.Textbox(label="請輸入問題", lines=2)
|
139 |
+
question_btn = gr.Button("送出問題")
|
140 |
+
answer_output = gr.Textbox(label="AI 回答", lines=8)
|
141 |
+
question_btn.click(ask_question, inputs=question_input, outputs=answer_output)
|
142 |
+
question_input.submit(ask_question, inputs=question_input, outputs=answer_output)
|
143 |
+
|
144 |
+
clear_btn = gr.Button("🗑️ 清除所有資料")
|
145 |
+
clear_btn.click(clear_all, outputs=[summary_output, question_input, answer_output])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
|
147 |
if __name__ == "__main__":
|
148 |
+
demo.launch(show_error=True)
|
|
|
|
|
|
|
|