import { existsSync, mkdirSync, writeFileSync } from 'fs'; import { resolve } from 'path'; import { d as private_env } from './shared-server-49TKSBDM.js'; import dns from 'node:dns'; dns.setDefaultResultOrder("ipv4first"); class LlamaCppService { url = ""; llmParams; constructor(params) { this.url = params.url; this.llmParams = params; } async health() { try { const r = await fetch(`${this.url}/health`, { method: "GET", headers: { "Content-Type": "application/json" } }); const data = await r.json(); if (data.status === "ok" || data.status === "error" || data.status === "loading model") { return data.status; } } catch (error) { console.log(JSON.parse(JSON.stringify(error))); } return "unavailable"; } async tokenize(prompt, abortController) { const response = await fetch(`${this.url}/tokenize`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ "content": prompt }), signal: abortController.signal }); if (response.ok) { const data = await response.json(); if (data.tokens) { return data.tokens; } } else if (response.status === 404) { console.log("Tokenization endpoint not found (404)."); } else { console.log(`llama.cpp: Failed to tokenize: ${await response.text()}`); } return null; } async detokenize(tokens, abortController) { const response = await fetch(`${this.url}/detokenize`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ "tokens": tokens }), signal: abortController.signal }); if (response.ok) { const data = await response.json(); if (data.content) { return data.content.trim(); } } else if (response.status === 404) { console.log("Detokenization endpoint not found (404)."); } else { console.log(`Failed to detokenize: ${await response.text()}`); } return null; } createRequest(prompt, llmParams) { const actualPrompt = this.applyLlmTemplateToPrompt(prompt); const request = { "stream": true, "stop": llmParams.predict_params?.stop || ["", "bot:", "user:"], "prompt": actualPrompt }; request["n_predict"] = llmParams.predict_params?.n_predict || -1; request["temperature"] = llmParams.predict_params?.temperature || 0; if (llmParams.predict_params?.top_k != null) { request["top_k"] = Number(llmParams.predict_params.top_k); } if (llmParams.predict_params?.top_p != null) { request["top_p"] = Number(llmParams.predict_params.top_p); } if (llmParams.predict_params?.min_p != null) { request["min_p"] = Number(llmParams.predict_params.min_p); } if (llmParams.predict_params?.seed != null) { request["seed"] = Number(llmParams.predict_params.seed); } if (llmParams.predict_params?.n_keep != null) { request["n_keep"] = Number(llmParams.predict_params.n_keep); } if (llmParams.predict_params?.cache_prompt != null) { request["cache_prompt"] = Boolean(llmParams.predict_params.cache_prompt); } if (llmParams.predict_params?.repeat_penalty != null) { request["repeat_penalty"] = Boolean(llmParams.predict_params.repeat_penalty); } if (llmParams.predict_params?.repeat_last_n != null) { request["repeat_last_n"] = Boolean(llmParams.predict_params.repeat_last_n); } if (llmParams.predict_params?.frequency_penalty != null) { request["frequency_penalty"] = Boolean(llmParams.predict_params.frequency_penalty); } if (llmParams.predict_params?.presence_penalty != null) { request["presence_penalty"] = Boolean(llmParams.predict_params.presence_penalty); } return request; } applyLlmTemplateToPrompt(prompt) { let actualPrompt = prompt; if (this.llmParams.template != void 0) { actualPrompt = this.llmParams.template.replace("{{PROMPT}}", actualPrompt); } return actualPrompt; } async trimTokenizedText(text, userRequest, { abortController }) { let tokens = await this.tokenize(text, abortController); let systemPromptTokenCount = 0; if (this.llmParams.predict_params?.system_prompt) { systemPromptTokenCount = (await this.tokenize(this.llmParams.predict_params?.system_prompt, abortController))?.length || 0; } const originalTokenCount = tokens?.length || -1; const auxTokens = await this.tokenize(this.applyLlmTemplateToPrompt(userRequest), abortController); const maxLength = Number(this.llmParams.context) - Number(this.llmParams.predict_params?.n_predict) - (auxTokens?.length ?? 0) - systemPromptTokenCount; if (tokens !== null) { tokens = tokens.slice(0, maxLength); const detokenizedPrompt = await this.detokenize(tokens, abortController); if (detokenizedPrompt !== null) { text = detokenizedPrompt; } else { text = text.substring(0, maxLength); } } else { text = text.substring(0, maxLength); } return { result: text, originalTokenCount, slicedTokenCount: tokens?.length || -1 }; } predict({ abortController }) { return async ({ prompt, llmParams }) => { const request = this.createRequest(prompt, llmParams); console.log(`Predict request. Url: ${this.url}`); console.log(`Prompt: ${request.prompt.substring(0, 30)}`); let r; while (true) { r = await fetch(`${this.url}/completion`, { method: "POST", headers: { "Content-Type": "application/json", "Accept": "text/event-stream" }, body: JSON.stringify(request), signal: abortController.signal }); if (r.status === 404) { if (!private_env.LLM_API_404_RETRY_INTERVAL) { break; } console.log(`Received 404, retrying after ${private_env.LLM_API_404_RETRY_INTERVAL} seconds...`); await new Promise((resolve2) => setTimeout(resolve2, Number(private_env.LLM_API_404_RETRY_INTERVAL) * 1e3)); } else { break; } } if (!r.ok) { throw new Error(`Failed to generate text: ${await r.text()}`); } const encoder = new TextDecoderStream(); const reader = await r.body?.pipeThrough(encoder).getReader(); let t = this; return async function* () { let stop = false; let generatedText = ""; let tokenId = 0; while (!stop) { const out = await reader?.read() ?? { done: false, value: void 0 }; if (out.done) { stop = true; reader?.cancel(); t.createLogFile(JSON.stringify({ "request": prompt, "response": generatedText }), "llm-service"); return; } if (!out.value) { stop = true; reader?.cancel(); return; } let tokenValue = ""; if (out.value.startsWith("data: ")) { try { const data = JSON.parse(out.value.slice(6)); tokenValue = data.content; } catch (e) { } } else if (private_env.LLM_API_VERSION == "v1") { tokenValue = out.value; } generatedText += tokenValue.replace("", "") ?? ""; yield { token: { id: tokenId++, text: tokenValue.replace("", "") ?? "", logprob: 0, special: false }, generated_text: null, details: null }; } }(); }; } conversation({ abortController }) { return async ({ history, llmParams }) => { let prompt = history[history.length - 1][0]; const request = this.createRequest(prompt, llmParams); let tokens = await this.tokenize(prompt, abortController); if (tokens !== null) { tokens = tokens.slice(0, Number(this.llmParams.context) - Number(this.llmParams.predict_params?.n_predict)); const detokenizedPrompt = await this.detokenize(tokens, abortController); if (detokenizedPrompt !== null) { prompt = detokenizedPrompt; } else { prompt = prompt.substring(0, Number(this.llmParams.context) - Number(this.llmParams.predict_params?.n_predict)); } } else { prompt = prompt.substring(0, Number(this.llmParams.context) - Number(this.llmParams.predict_params?.n_predict)); } const r = await fetch(`${this.url}/completion`, { method: "POST", headers: { "Content-Type": "application/json", "Accept": "text/event-stream" }, body: JSON.stringify(request), signal: abortController.signal }); if (!r.ok) { throw new Error(`Failed to generate text: ${await r.text()}`); } const encoder = new TextDecoderStream(); const reader = r.body?.pipeThrough(encoder).getReader(); return async function* () { let stop = false; let generatedText = ""; let tokenId = 0; while (!stop) { const out = await reader?.read() ?? { done: false, value: void 0 }; if (out.done) { reader?.cancel(); return; } if (!out.value) { reader?.cancel(); return; } let tokenValue = ""; if (out.value.startsWith("data: ")) { try { let data2 = JSON.parse(out.value.slice(6)); tokenValue = data2.content; } catch (e) { } } else if (private_env.LLM_API_VERSION == "v1") { tokenValue = out.value; } generatedText += tokenValue.replace("", "") ?? ""; yield { token: { id: tokenId++, text: tokenValue.replace("", "") ?? "", logprob: 0, special: false }, generated_text: null, details: null }; } }(); }; } formatPrompt(history) { let prompt = ""; for (const [userPrompt, botResponse] of history) { prompt += ` [INST] ${userPrompt} [/INST]`; if (botResponse) { prompt += `${botResponse}`; } } return prompt; } createLogFile(text, namePrefix = "") { if (!private_env.LOGS_ROOT_FOLDER) { return; } try { const logsDirectory = resolve(private_env.LOGS_ROOT_FOLDER + "/llama"); if (!existsSync(logsDirectory)) { mkdirSync(logsDirectory, { recursive: true }); } const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, ""); const logFilePath = resolve(logsDirectory, `${namePrefix}${timestamp}.json`); writeFileSync(logFilePath, text); console.log(`Log file created: ${logFilePath}`); } catch (e) { console.log(`Failed to create log file in llama service`); console.log(e); } } } export { LlamaCppService as L }; //# sourceMappingURL=LlamaCppService-fS5DnT85.js.map