Spaces:
Running
Running
File size: 6,956 Bytes
67add1d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
# app.py
import os
import threading
from functools import wraps
import google.generativeai as genai
from flask import Flask, request, jsonify, send_from_directory
from flask_cors import CORS
from dotenv import load_dotenv
import chromadb
# 从您的核心逻辑文件中导入类
from app_chromadb import MarkdownKnowledgeBase
# --- 初始化与配置 ---
load_dotenv()
app = Flask(__name__, static_folder='.', static_url_path='')
CORS(app)
# --- API 密钥认证配置 ---
VALID_API_KEYS_STR = os.environ.get("KNOWLEDGE_BASE_API_KEYS", "")
VALID_API_KEYS = {key.strip() for key in VALID_API_KEYS_STR.split(',') if key.strip()}
if not VALID_API_KEYS:
print("⚠️ 警告: 未配置 KNOWLEDGE_BASE_API_KEYS。API 将对所有人开放!")
# --- ChromaDB, Gemini, SiliconFlow 实例配置 ---
try:
CHROMA_DATA_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), "chroma_db")
COLLECTION_NAME = "markdown_knowledge_base_m3"
chroma_client = chromadb.PersistentClient(path=CHROMA_DATA_PATH)
collection = chroma_client.get_or_create_collection(name=COLLECTION_NAME)
print(f"✅ ChromaDB 客户端已连接,数据存储在 '{CHROMA_DATA_PATH}'")
except Exception as e:
chroma_client = None
collection = None
print(f"❌ 初始化 ChromaDB 失败: {e}")
try:
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
genai.configure(api_key=GEMINI_API_KEY)
gemini_model = genai.GenerativeModel('gemini-1.5-flash')
print("✅ Gemini API 已配置。")
except Exception as e:
gemini_model = None
print(f"❌ Gemini API 配置失败: {e}")
try:
SF_API_TOKEN = os.environ.get("SILICONFLOW_API_TOKEN")
kb_instance = MarkdownKnowledgeBase(api_token=SF_API_TOKEN, chroma_collection=collection)
print("✅ SiliconFlow 与知识库实例已配置。")
except Exception as e:
kb_instance = None
print(f"❌ 知识库实例配置失败: {e}")
kb_status = { "is_building": False }
# --- API 密钥认证装饰器 ---
def require_api_key(f):
@wraps(f)
def decorated_function(*args, **kwargs):
if not VALID_API_KEYS: return f(*args, **kwargs)
api_key = request.headers.get('X-API-Key')
if api_key and api_key in VALID_API_KEYS:
return f(*args, **kwargs)
else:
return jsonify({"error": "授权失败。请提供有效'X-API-Key'请求头。"}), 403
return decorated_function
# --- 前端页面路由 ---
@app.route('/')
def serve_index():
return send_from_directory('.', 'index.html')
# --- API 端点 ---
@app.route('/status', methods=['GET'])
def get_status():
if collection:
kb_status['total_items'] = collection.count()
kb_status['is_built'] = kb_status['total_items'] > 0
if not kb_status['is_building']:
kb_status['message'] = f"知识库已就绪,共有 {kb_status['total_items']} 个条目。"
else:
kb_status['message'] = "ChromaDB 未连接。"
return jsonify(kb_status)
@app.route('/build', methods=['POST'])
@require_api_key
def build_knowledge_base():
if kb_status['is_building']:
return jsonify({"error": "知识库已在构建中,请稍后。"}), 409
if not kb_instance:
return jsonify({"error": "知识库实例未初始化,无法构建。"}), 500
data = request.get_json()
clear_existing = data.get('clear_existing', False)
build_params = {
'folder_path': data.get('folder_path'),
'chunk_size': data.get('chunk_size', 4096),
'overlap': data.get('overlap', 400),
'max_files': data.get('max_files', 500),
'sample_mode': data.get('sample_mode', 'largest')
}
def build_in_background():
global kb_status, collection
kb_status['is_building'] = True
kb_status['message'] = "构建任务开始..."
try:
if clear_existing and chroma_client:
print(f"正在清空现有集合: {COLLECTION_NAME}")
chroma_client.delete_collection(name=COLLECTION_NAME)
collection = chroma_client.get_or_create_collection(name=COLLECTION_NAME)
kb_instance.collection = collection
print("集合已清空并重建。")
kb_instance.build_knowledge_base(**build_params)
kb_status['message'] = f"构建完成!知识库现有 {collection.count()} 个条目。"
except Exception as e:
kb_status['message'] = f"构建时出错: {e}"
print(f"Error during build: {e}")
finally:
kb_status['is_building'] = False
thread = threading.Thread(target=build_in_background)
thread.start()
return jsonify({"message": "知识库构建任务已在后台启动。"}), 202
@app.route('/search', methods=['GET'])
@require_api_key
def search_in_kb():
if not (collection and collection.count() > 0):
return jsonify({"error": "知识库为空,请先构建。"}), 400
if not kb_instance:
return jsonify({"error": "知识库实例未初始化,无法搜索。"}), 500
query = request.args.get('query')
top_k = request.args.get('top_k', default=5, type=int)
if not query:
return jsonify({"error": "必须提供 'query' 参数"}), 400
try:
results = kb_instance.search(query, top_k=top_k)
return jsonify(results)
except Exception as e:
return jsonify({"error": f"搜索时发生错误: {e}"}), 500
@app.route('/summarize', methods=['POST'])
@require_api_key
def summarize_results():
if not gemini_model:
return jsonify({"error": "Gemini API 未配置或初始化失败。"}), 500
data = request.get_json()
query = data.get('query')
search_results = data.get('results')
if not query or not search_results:
return jsonify({"error": "必须提供查询和搜索结果。"}), 400
context = "\n\n---\n\n".join([item['content'] for item in search_results])
prompt = f"""
根据以下本地知识库中搜索到的内容,请用清晰、简洁的中文直接回答用户的问题。
如果内容不足以回答,请说明现有信息无法直接回答。
用户问题: "{query}"
搜索到的内容:
---
{context}
---
你的回答:
"""
try:
print(f"正在向 Gemini 模型 '{gemini_model.model_name}' 发送请求...")
response = gemini_model.generate_content(prompt)
summary = response.text
return jsonify({"summary": summary})
except Exception as e:
print(f"调用 Gemini API 时出错: {e}")
return jsonify({"error": f"调用 AI 服务时出错: {e}"}), 500
if __name__ == '__main__':
print("知识库后端服务 (最终版) 启动...")
print("✅ 服务已启动!请在浏览器中打开 http://127.0.0.1:5000")
# 使用生产级服务器时应移除 debug=False
app.run(host='0.0.0.0', port=5000, debug=False)
|