from flask import Flask, request, Response, stream_with_context, jsonify from openai import OpenAI import json app = Flask(__name__) @app.route('/') def index(): return "Hello, this is the root page of your Flask application!" @app.route('/hf/v1/chat/completions', methods=['POST']) def chat(): try: # 验证请求头中的API密钥 auth_header = request.headers.get('Authorization') if not auth_header or not auth_header.startswith('Bearer '): return jsonify({"error": "Unauthorized"}), 401 api_key = auth_header.split(" ")[1] base_url= auth_header.split(" ")[2] data = request.json #print("Received data:", data) # 打印请求体以进行调试 # 验证请求格式 if not data or 'messages' not in data or 'model' not in data: return jsonify({"error": "Missing 'messages' or 'model' in request body"}), 400 model = data['model'] messages = data['messages'] temperature = data.get('temperature', 0.7) # 默认值0.7 #max_tokens = calculate_max_tokens(model, messages, requested_max_tokens) top_p = data.get('top_p', 1.0) # 默认值1.0 n = data.get('n', 1) # 默认值1 stream = data.get('stream', False) # 默认值False functions = data.get('functions', None) # Functions for function calling function_call = data.get('function_call', None) # Specific function call request # 创建每个请求的 OpenAI 客户端实例 client = OpenAI( api_key=api_key, base_url=base_url, ) # 处理模型响应 if stream: # 处理流式响应 def generate(): response = client.chat.completions.create( model=model, messages=messages, temperature=temperature, #max_tokens=max_tokens, top_p=top_p, n=n, stream=True, functions=functions, function_call=function_call ) for chunk in response: yield f"data: {json.dumps(chunk.to_dict())}\n\n" return Response(stream_with_context(generate()), content_type='text/event-stream') else: # 处理非流式响应 response = client.chat.completions.create( model=model, messages=messages, temperature=temperature, #max_tokens=max_tokens, top_p=top_p, n=n, functions=functions, function_call=function_call, ) return jsonify(response.to_dict()) except Exception as e: print("Exception:", e) return jsonify({"error": str(e)}), 500 if __name__ == "__main__": app.run(host='0.0.0.0', port=7860, threaded=True)