Spaces:

huggingchat
/

chat-ui

Running

coyotte508 HF Staff commited on Apr 25, 2023

Commit

9be5ab5

unverified ·

1 Parent(s): c2e468f

⚡️ Load balance endpoints (#106)

Files changed (4) hide show

.env CHANGED Viewed

@@ -3,15 +3,17 @@
 MONGODB_URL=#your mongodb URL here
 MONGODB_DB_NAME=chat-ui
-HF_TOKEN=#your huggingface token here
 COOKIE_NAME=hf-chat
 PUBLIC_MAX_INPUT_TOKENS=1024
 PUBLIC_ORIGIN=#https://hf.co
-PUBLIC_MODEL_ENDPOINT=https://api-inference.huggingface.co/models/OpenAssistant/oasst-sft-6-llama-30b
 PUBLIC_MODEL_NAME=OpenAssistant/oasst-sft-6-llama-30b # public facing link
 PUBLIC_MODEL_ID=OpenAssistant/oasst-sft-6-llama-30b-xor # used to link to model page
 PUBLIC_DISABLE_INTRO_TILES=false
 PUBLIC_USER_MESSAGE_TOKEN=<|prompter|>
 PUBLIC_ASSISTANT_MESSAGE_TOKEN=<|assistant|>
 PUBLIC_SEP_TOKEN=<|endoftext|>

 MONGODB_URL=#your mongodb URL here
 MONGODB_DB_NAME=chat-ui
 COOKIE_NAME=hf-chat
 PUBLIC_MAX_INPUT_TOKENS=1024
 PUBLIC_ORIGIN=#https://hf.co
 PUBLIC_MODEL_NAME=OpenAssistant/oasst-sft-6-llama-30b # public facing link
 PUBLIC_MODEL_ID=OpenAssistant/oasst-sft-6-llama-30b-xor # used to link to model page
 PUBLIC_DISABLE_INTRO_TILES=false
 PUBLIC_USER_MESSAGE_TOKEN=<|prompter|>
 PUBLIC_ASSISTANT_MESSAGE_TOKEN=<|assistant|>
 PUBLIC_SEP_TOKEN=<|endoftext|>
+# Array<{endpoint: string, authorization: "Bearer XXX", weight: number}> to load balance
+# Eg if one endpoint has weight 2 and the other has weight 1, the first endpoint will be called twice as often
+MODEL_ENDPOINTS=`[]`

src/lib/server/modelEndpoint.ts ADDED Viewed

+import { MODEL_ENDPOINTS } from "$env/static/private";
+import { sum } from "$lib/utils/sum";
+const endpoints: Array<{ endpoint: string; authorization: string; weight: number }> =
+	JSON.parse(MODEL_ENDPOINTS);
+const totalWeight = sum(endpoints.map((e) => e.weight));
+/**
+ * Find a random load-balanced endpoint
+ */
+export function modelEndpoint(): { endpoint: string; authorization: string; weight: number } {
+	let random = Math.random() * totalWeight;
+	for (const endpoint of endpoints) {
+		if (random < endpoint.weight) {
+			return endpoint;
+		}
+		random -= endpoint.weight;
+	}
+	throw new Error("Invalid config, no endpoint found");
+}

src/routes/conversation/[id]/+server.ts CHANGED Viewed

@@ -1,7 +1,7 @@
-import { HF_TOKEN } from "$env/static/private";
-import { PUBLIC_MODEL_ENDPOINT, PUBLIC_SEP_TOKEN } from "$env/static/public";
 import { buildPrompt } from "$lib/buildPrompt.js";
 import { collections } from "$lib/server/database.js";
 import type { Message } from "$lib/types/Message.js";
 import { streamToAsyncIterable } from "$lib/utils/streamToAsyncIterable";
 import { sum } from "$lib/utils/sum";
@@ -29,10 +29,12 @@ export async function POST({ request, fetch, locals, params }) {
 	const messages = [...conv.messages, { from: "user", content: json.inputs }] satisfies Message[];
 	const prompt = buildPrompt(messages);
-	const resp = await fetch(PUBLIC_MODEL_ENDPOINT, {
 		headers: {
 			"Content-Type": request.headers.get("Content-Type") ?? "application/json",
-			Authorization: `Bearer ${HF_TOKEN}`,
 		},
 		method: "POST",
 		body: JSON.stringify({

+import { PUBLIC_SEP_TOKEN } from "$env/static/public";
 import { buildPrompt } from "$lib/buildPrompt.js";
 import { collections } from "$lib/server/database.js";
+import { modelEndpoint } from "$lib/server/modelEndpoint.js";
 import type { Message } from "$lib/types/Message.js";
 import { streamToAsyncIterable } from "$lib/utils/streamToAsyncIterable";
 import { sum } from "$lib/utils/sum";
 	const messages = [...conv.messages, { from: "user", content: json.inputs }] satisfies Message[];
 	const prompt = buildPrompt(messages);
+	const randomEndpoint = modelEndpoint();
+	const resp = await fetch(randomEndpoint.endpoint, {
 		headers: {
 			"Content-Type": request.headers.get("Content-Type") ?? "application/json",
+			Authorization: randomEndpoint.authorization,
 		},
 		method: "POST",
 		body: JSON.stringify({

src/routes/conversation/[id]/summarize/+server.ts CHANGED Viewed

@@ -1,7 +1,7 @@
-import { HF_TOKEN } from "$env/static/private";
-import { PUBLIC_MAX_INPUT_TOKENS, PUBLIC_MODEL_ENDPOINT } from "$env/static/public";
 import { buildPrompt } from "$lib/buildPrompt";
 import { collections } from "$lib/server/database.js";
 import { textGeneration } from "@huggingface/inference";
 import { error } from "@sveltejs/kit";
 import { ObjectId } from "mongodb";
@@ -38,14 +38,20 @@ export async function POST({ params, locals, fetch }) {
 		return_full_text: false,
 	};
 	const { generated_text } = await textGeneration(
 		{
-			model: PUBLIC_MODEL_ENDPOINT,
 			inputs: prompt,
 			parameters,
-			accessToken: HF_TOKEN,
 		},
-		{ fetch }
 	);
 	if (generated_text) {

+import { PUBLIC_MAX_INPUT_TOKENS } from "$env/static/public";
 import { buildPrompt } from "$lib/buildPrompt";
 import { collections } from "$lib/server/database.js";
+import { modelEndpoint } from "$lib/server/modelEndpoint.js";
 import { textGeneration } from "@huggingface/inference";
 import { error } from "@sveltejs/kit";
 import { ObjectId } from "mongodb";
 		return_full_text: false,
 	};
+	const endpoint = modelEndpoint();
 	const { generated_text } = await textGeneration(
 		{
+			model: endpoint.endpoint,
 			inputs: prompt,
 			parameters,
 		},
+		{
+			fetch: (url, options) =>
+				fetch(url, {
+					...options,
+					headers: { ...options?.headers, Authorization: endpoint.authorization },
+				}),
+		}
 	);
 	if (generated_text) {