Spaces:
Paused
Paused
import random | |
import json | |
import aiohttp | |
import asyncio | |
from aiohttp import web | |
from datetime import datetime | |
# Debug mode switch | |
DEBUG_MODE = False | |
# Define fixed model information | |
DEFAULT_MODEL = "llama3.1-8b" | |
ALTERNATE_MODEL = "llama3.1-70b" | |
FIXED_URL = "https://api.cerebras.ai/v1/chat/completions" | |
FIXED_TEMPERATURE = 0.2 | |
FIXED_TOP_P = 1 | |
FIXED_MAX_TOKENS = 4096 | |
# Log function for basic information | |
def log_basic_info(message): | |
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') | |
print(f"[{timestamp}] {message}") | |
# Asynchronous function to send request and print debug information | |
async def send_request(auth_tokens, data): | |
try: | |
headers = { | |
"accept": "application/json", | |
"authorization": f"Bearer {auth_tokens[0]}", | |
"content-type": "application/json" | |
} | |
requested_model = data.get("model", DEFAULT_MODEL) | |
model_to_use = ALTERNATE_MODEL if requested_model == ALTERNATE_MODEL else DEFAULT_MODEL | |
log_basic_info(f"Requested model: {requested_model}, Using model: {model_to_use}") | |
payload = { | |
"messages": data.get("messages", []), | |
"model": model_to_use, | |
"temperature": FIXED_TEMPERATURE, | |
"top_p": FIXED_TOP_P, | |
"max_tokens": FIXED_MAX_TOKENS | |
} | |
if DEBUG_MODE: | |
print("Request Payload:", json.dumps(payload, indent=4)) | |
print("Request Headers:", headers) | |
async with aiohttp.ClientSession() as session: | |
async with session.post(FIXED_URL, headers=headers, json=payload) as resp: | |
response_text = await resp.text() | |
response_json = json.loads(response_text) | |
total_tokens = response_json.get('usage', {}).get('total_tokens', 'N/A') | |
total_time = response_json.get('time_info', {}).get('total_time', 'N/A') | |
log_basic_info(f"Path: {FIXED_URL}, Status Code: {resp.status}, Total Tokens Used: {total_tokens}, Total Time: {total_time:.3f} seconds") | |
return response_text | |
except Exception as e: | |
log_basic_info(f"Exception occurred: {str(e)}") | |
# Main handler function | |
async def handle_request(request): | |
try: | |
request_data = await request.json() | |
headers = dict(request.headers) | |
authorization_header = headers.get('Authorization', '') | |
auth_tokens = [auth.strip() for auth in authorization_header.replace('Bearer ', '').split(',')] | |
if not auth_tokens: | |
return web.json_response({"error": "Missing Authorization token"}, status=400) | |
auth_token = random.choice(auth_tokens) | |
headers['Authorization'] = f"Bearer {auth_token}" | |
log_basic_info(f"Received request for path: {request.path}") | |
if DEBUG_MODE: | |
print("Received Request Data:", json.dumps(request_data, indent=4)) | |
print("Received Headers:", headers) | |
response_text = await send_request(auth_tokens, request_data) | |
return web.json_response(json.loads(response_text)) | |
except Exception as e: | |
log_basic_info(f"Exception occurred in handling request: {str(e)}") | |
return web.json_response({"error": str(e)}, status=500) | |
# Set up routes | |
app = web.Application() | |
app.router.add_post('/hf/v1/chat/completions', handle_request) | |
# Run the server | |
if __name__ == '__main__': | |
web.run_app(app, host='0.0.0.0', port=7860) | |