File size: 1,504 Bytes
8ce7317
9549030
 
461845f
8ce7317
461845f
0fd5f68
9549030
8ce7317
9549030
20e3a90
ba65e9c
c60c816
 
 
 
8ce7317
20e3a90
c60c816
 
9549030
c60c816
9549030
 
b438a2d
c60c816
 
 
20e3a90
c60c816
8b5fc5d
c60c816
8b5fc5d
 
 
c60c816
8b5fc5d
20e3a90
 
 
 
8b5fc5d
c60c816
8ce7317
 
 
20e3a90
 
c60c816
20e3a90
b438a2d
c60c816
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
from flask import Flask, request, jsonify
from app import generate_chat_completion
import time

app = Flask(__name__)

@app.route('/v1/chat/completions', methods=['POST'])
def chat_completions():
    data = request.json
    messages = data.get('messages', [])
    max_tokens = data.get('max_tokens', 560)
    temperature = data.get('temperature', 0.8)

    if not messages or not isinstance(messages, list):
        return jsonify({"error": "A valid 'messages' list is required."}), 400

    try:
        start_time = time.time()

        # Expecting plain role-content dicts (not Gradio tuples)
        result = generate_chat_completion(
            message_history=messages,
            max_tokens=max_tokens,
            temperature=temperature
        )

        # Get only the assistant's latest message
        assistant_msg = result[-1] if isinstance(result, list) else result
        elapsed = time.time() - start_time

        return jsonify({
            "model": "mistralai/Mistral-7B-Instruct-v0.2",
            "choices": [{
                "message": {
                    "role": "assistant",
                    "content": assistant_msg
                }
            }],
            "usage": {
                "generation_time": round(elapsed, 2)
            }
        })

    except Exception as e:
        return jsonify({"error": str(e)}), 500

@app.route('/')
def health_check():
    return "LLM API is running", 200

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=8081)