nn-ui-v2 / build /server /chunks /LlamaCppService-fS5DnT85.js
muryshev's picture
update
9923f3d
import { existsSync, mkdirSync, writeFileSync } from 'fs';
import { resolve } from 'path';
import { d as private_env } from './shared-server-49TKSBDM.js';
import dns from 'node:dns';
dns.setDefaultResultOrder("ipv4first");
class LlamaCppService {
url = "";
llmParams;
constructor(params) {
this.url = params.url;
this.llmParams = params;
}
async health() {
try {
const r = await fetch(`${this.url}/health`, {
method: "GET",
headers: {
"Content-Type": "application/json"
}
});
const data = await r.json();
if (data.status === "ok" || data.status === "error" || data.status === "loading model") {
return data.status;
}
} catch (error) {
console.log(JSON.parse(JSON.stringify(error)));
}
return "unavailable";
}
async tokenize(prompt, abortController) {
const response = await fetch(`${this.url}/tokenize`, {
method: "POST",
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({
"content": prompt
}),
signal: abortController.signal
});
if (response.ok) {
const data = await response.json();
if (data.tokens) {
return data.tokens;
}
} else if (response.status === 404) {
console.log("Tokenization endpoint not found (404).");
} else {
console.log(`llama.cpp: Failed to tokenize: ${await response.text()}`);
}
return null;
}
async detokenize(tokens, abortController) {
const response = await fetch(`${this.url}/detokenize`, {
method: "POST",
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({
"tokens": tokens
}),
signal: abortController.signal
});
if (response.ok) {
const data = await response.json();
if (data.content) {
return data.content.trim();
}
} else if (response.status === 404) {
console.log("Detokenization endpoint not found (404).");
} else {
console.log(`Failed to detokenize: ${await response.text()}`);
}
return null;
}
createRequest(prompt, llmParams) {
const actualPrompt = this.applyLlmTemplateToPrompt(prompt);
const request = {
"stream": true,
"stop": llmParams.predict_params?.stop || ["</s>", "bot:", "user:"],
"prompt": actualPrompt
};
request["n_predict"] = llmParams.predict_params?.n_predict || -1;
request["temperature"] = llmParams.predict_params?.temperature || 0;
if (llmParams.predict_params?.top_k != null) {
request["top_k"] = Number(llmParams.predict_params.top_k);
}
if (llmParams.predict_params?.top_p != null) {
request["top_p"] = Number(llmParams.predict_params.top_p);
}
if (llmParams.predict_params?.min_p != null) {
request["min_p"] = Number(llmParams.predict_params.min_p);
}
if (llmParams.predict_params?.seed != null) {
request["seed"] = Number(llmParams.predict_params.seed);
}
if (llmParams.predict_params?.n_keep != null) {
request["n_keep"] = Number(llmParams.predict_params.n_keep);
}
if (llmParams.predict_params?.cache_prompt != null) {
request["cache_prompt"] = Boolean(llmParams.predict_params.cache_prompt);
}
if (llmParams.predict_params?.repeat_penalty != null) {
request["repeat_penalty"] = Boolean(llmParams.predict_params.repeat_penalty);
}
if (llmParams.predict_params?.repeat_last_n != null) {
request["repeat_last_n"] = Boolean(llmParams.predict_params.repeat_last_n);
}
if (llmParams.predict_params?.frequency_penalty != null) {
request["frequency_penalty"] = Boolean(llmParams.predict_params.frequency_penalty);
}
if (llmParams.predict_params?.presence_penalty != null) {
request["presence_penalty"] = Boolean(llmParams.predict_params.presence_penalty);
}
return request;
}
applyLlmTemplateToPrompt(prompt) {
let actualPrompt = prompt;
if (this.llmParams.template != void 0) {
actualPrompt = this.llmParams.template.replace("{{PROMPT}}", actualPrompt);
}
return actualPrompt;
}
async trimTokenizedText(text, userRequest, { abortController }) {
let tokens = await this.tokenize(text, abortController);
let systemPromptTokenCount = 0;
if (this.llmParams.predict_params?.system_prompt) {
systemPromptTokenCount = (await this.tokenize(this.llmParams.predict_params?.system_prompt, abortController))?.length || 0;
}
const originalTokenCount = tokens?.length || -1;
const auxTokens = await this.tokenize(this.applyLlmTemplateToPrompt(userRequest), abortController);
const maxLength = Number(this.llmParams.context) - Number(this.llmParams.predict_params?.n_predict) - (auxTokens?.length ?? 0) - systemPromptTokenCount;
if (tokens !== null) {
tokens = tokens.slice(0, maxLength);
const detokenizedPrompt = await this.detokenize(tokens, abortController);
if (detokenizedPrompt !== null) {
text = detokenizedPrompt;
} else {
text = text.substring(0, maxLength);
}
} else {
text = text.substring(0, maxLength);
}
return { result: text, originalTokenCount, slicedTokenCount: tokens?.length || -1 };
}
predict({ abortController }) {
return async ({ prompt, llmParams }) => {
const request = this.createRequest(prompt, llmParams);
console.log(`Predict request. Url: ${this.url}`);
console.log(`Prompt: ${request.prompt.substring(0, 30)}`);
let r;
while (true) {
r = await fetch(`${this.url}/completion`, {
method: "POST",
headers: {
"Content-Type": "application/json",
"Accept": "text/event-stream"
},
body: JSON.stringify(request),
signal: abortController.signal
});
if (r.status === 404) {
if (!private_env.LLM_API_404_RETRY_INTERVAL) {
break;
}
console.log(`Received 404, retrying after ${private_env.LLM_API_404_RETRY_INTERVAL} seconds...`);
await new Promise((resolve2) => setTimeout(resolve2, Number(private_env.LLM_API_404_RETRY_INTERVAL) * 1e3));
} else {
break;
}
}
if (!r.ok) {
throw new Error(`Failed to generate text: ${await r.text()}`);
}
const encoder = new TextDecoderStream();
const reader = await r.body?.pipeThrough(encoder).getReader();
let t = this;
return async function* () {
let stop = false;
let generatedText = "";
let tokenId = 0;
while (!stop) {
const out = await reader?.read() ?? { done: false, value: void 0 };
if (out.done) {
stop = true;
reader?.cancel();
t.createLogFile(JSON.stringify({
"request": prompt,
"response": generatedText
}), "llm-service");
return;
}
if (!out.value) {
stop = true;
reader?.cancel();
return;
}
let tokenValue = "";
if (out.value.startsWith("data: ")) {
try {
const data = JSON.parse(out.value.slice(6));
tokenValue = data.content;
} catch (e) {
}
} else if (private_env.LLM_API_VERSION == "v1") {
tokenValue = out.value;
}
generatedText += tokenValue.replace("</s>", "") ?? "";
yield {
token: {
id: tokenId++,
text: tokenValue.replace("</s>", "") ?? "",
logprob: 0,
special: false
},
generated_text: null,
details: null
};
}
}();
};
}
conversation({ abortController }) {
return async ({ history, llmParams }) => {
let prompt = history[history.length - 1][0];
const request = this.createRequest(prompt, llmParams);
let tokens = await this.tokenize(prompt, abortController);
if (tokens !== null) {
tokens = tokens.slice(0, Number(this.llmParams.context) - Number(this.llmParams.predict_params?.n_predict));
const detokenizedPrompt = await this.detokenize(tokens, abortController);
if (detokenizedPrompt !== null) {
prompt = detokenizedPrompt;
} else {
prompt = prompt.substring(0, Number(this.llmParams.context) - Number(this.llmParams.predict_params?.n_predict));
}
} else {
prompt = prompt.substring(0, Number(this.llmParams.context) - Number(this.llmParams.predict_params?.n_predict));
}
const r = await fetch(`${this.url}/completion`, {
method: "POST",
headers: {
"Content-Type": "application/json",
"Accept": "text/event-stream"
},
body: JSON.stringify(request),
signal: abortController.signal
});
if (!r.ok) {
throw new Error(`Failed to generate text: ${await r.text()}`);
}
const encoder = new TextDecoderStream();
const reader = r.body?.pipeThrough(encoder).getReader();
return async function* () {
let stop = false;
let generatedText = "";
let tokenId = 0;
while (!stop) {
const out = await reader?.read() ?? { done: false, value: void 0 };
if (out.done) {
reader?.cancel();
return;
}
if (!out.value) {
reader?.cancel();
return;
}
let tokenValue = "";
if (out.value.startsWith("data: ")) {
try {
let data2 = JSON.parse(out.value.slice(6));
tokenValue = data2.content;
} catch (e) {
}
} else if (private_env.LLM_API_VERSION == "v1") {
tokenValue = out.value;
}
generatedText += tokenValue.replace("</s>", "") ?? "";
yield {
token: {
id: tokenId++,
text: tokenValue.replace("</s>", "") ?? "",
logprob: 0,
special: false
},
generated_text: null,
details: null
};
}
}();
};
}
formatPrompt(history) {
let prompt = "";
for (const [userPrompt, botResponse] of history) {
prompt += ` [INST] ${userPrompt} [/INST]`;
if (botResponse) {
prompt += `${botResponse}`;
}
}
return prompt;
}
createLogFile(text, namePrefix = "") {
if (!private_env.LOGS_ROOT_FOLDER) {
return;
}
try {
const logsDirectory = resolve(private_env.LOGS_ROOT_FOLDER + "/llama");
if (!existsSync(logsDirectory)) {
mkdirSync(logsDirectory, {
recursive: true
});
}
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "");
const logFilePath = resolve(logsDirectory, `${namePrefix}${timestamp}.json`);
writeFileSync(logFilePath, text);
console.log(`Log file created: ${logFilePath}`);
} catch (e) {
console.log(`Failed to create log file in llama service`);
console.log(e);
}
}
}
export { LlamaCppService as L };
//# sourceMappingURL=LlamaCppService-fS5DnT85.js.map