Spaces:
Runtime error
Runtime error
from flask import Flask, request, jsonify | |
from app import generate_chat_completion | |
import time | |
app = Flask(__name__) | |
def chat_completions(): | |
data = request.json | |
messages = data.get('messages', []) | |
max_tokens = data.get('max_tokens', 560) | |
temperature = data.get('temperature', 0.8) | |
if not messages or not isinstance(messages, list): | |
return jsonify({"error": "A valid 'messages' list is required."}), 400 | |
try: | |
start_time = time.time() | |
# Expecting plain role-content dicts (not Gradio tuples) | |
result = generate_chat_completion( | |
message_history=messages, | |
max_tokens=max_tokens, | |
temperature=temperature | |
) | |
# Get only the assistant's latest message | |
assistant_msg = result[-1] if isinstance(result, list) else result | |
elapsed = time.time() - start_time | |
return jsonify({ | |
"model": "mistralai/Mistral-7B-Instruct-v0.2", | |
"choices": [{ | |
"message": { | |
"role": "assistant", | |
"content": assistant_msg | |
} | |
}], | |
"usage": { | |
"generation_time": round(elapsed, 2) | |
} | |
}) | |
except Exception as e: | |
return jsonify({"error": str(e)}), 500 | |
def health_check(): | |
return "LLM API is running", 200 | |
if __name__ == '__main__': | |
app.run(host='0.0.0.0', port=8081) | |