Spaces:

tianlong12
/

cerebras2api

Paused

App Files Files Community

cerebras2api / app.py

tianlong12

Create app.py

97ce33c verified about 1 year ago

raw

history blame contribute delete

3.47 kB


	import random
	import json
	import aiohttp
	import asyncio
	from aiohttp import web
	from datetime import datetime

	# Debug mode switch
	DEBUG_MODE = False

	# Define fixed model information
	DEFAULT_MODEL = "llama3.1-8b"
	ALTERNATE_MODEL = "llama3.1-70b"
	FIXED_URL = "https://api.cerebras.ai/v1/chat/completions"
	FIXED_TEMPERATURE = 0.2
	FIXED_TOP_P = 1
	FIXED_MAX_TOKENS = 4096

	# Log function for basic information
	def log_basic_info(message):
	timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
	print(f"[{timestamp}] {message}")

	# Asynchronous function to send request and print debug information
	async def send_request(auth_tokens, data):
	try:
	headers = {
	"accept": "application/json",
	"authorization": f"Bearer {auth_tokens[0]}",
	"content-type": "application/json"
	}

	requested_model = data.get("model", DEFAULT_MODEL)
	model_to_use = ALTERNATE_MODEL if requested_model == ALTERNATE_MODEL else DEFAULT_MODEL

	log_basic_info(f"Requested model: {requested_model}, Using model: {model_to_use}")

	payload = {
	"messages": data.get("messages", []),
	"model": model_to_use,
	"temperature": FIXED_TEMPERATURE,
	"top_p": FIXED_TOP_P,
	"max_tokens": FIXED_MAX_TOKENS
	}

	if DEBUG_MODE:
	print("Request Payload:", json.dumps(payload, indent=4))
	print("Request Headers:", headers)

	async with aiohttp.ClientSession() as session:
	async with session.post(FIXED_URL, headers=headers, json=payload) as resp:
	response_text = await resp.text()
	response_json = json.loads(response_text)

	total_tokens = response_json.get('usage', {}).get('total_tokens', 'N/A')
	total_time = response_json.get('time_info', {}).get('total_time', 'N/A')

	log_basic_info(f"Path: {FIXED_URL}, Status Code: {resp.status}, Total Tokens Used: {total_tokens}, Total Time: {total_time:.3f} seconds")

	return response_text

	except Exception as e:
	log_basic_info(f"Exception occurred: {str(e)}")

	# Main handler function
	async def handle_request(request):
	try:
	request_data = await request.json()
	headers = dict(request.headers)

	authorization_header = headers.get('Authorization', '')
	auth_tokens = [auth.strip() for auth in authorization_header.replace('Bearer ', '').split(',')]

	if not auth_tokens:
	return web.json_response({"error": "Missing Authorization token"}, status=400)

	auth_token = random.choice(auth_tokens)
	headers['Authorization'] = f"Bearer {auth_token}"

	log_basic_info(f"Received request for path: {request.path}")

	if DEBUG_MODE:
	print("Received Request Data:", json.dumps(request_data, indent=4))
	print("Received Headers:", headers)

	response_text = await send_request(auth_tokens, request_data)

	return web.json_response(json.loads(response_text))

	except Exception as e:
	log_basic_info(f"Exception occurred in handling request: {str(e)}")
	return web.json_response({"error": str(e)}, status=500)

	# Set up routes
	app = web.Application()
	app.router.add_post('/hf/v1/chat/completions', handle_request)

	# Run the server
	if __name__ == '__main__':
	web.run_app(app, host='0.0.0.0', port=7860)