|
import { existsSync, mkdirSync, writeFileSync } from 'fs'; |
|
import { resolve } from 'path'; |
|
import { d as private_env } from './shared-server-49TKSBDM.js'; |
|
import dns from 'node:dns'; |
|
|
|
dns.setDefaultResultOrder("ipv4first"); |
|
class LlamaCppService { |
|
url = ""; |
|
llmParams; |
|
constructor(params) { |
|
this.url = params.url; |
|
this.llmParams = params; |
|
} |
|
async health() { |
|
try { |
|
const r = await fetch(`${this.url}/health`, { |
|
method: "GET", |
|
headers: { |
|
"Content-Type": "application/json" |
|
} |
|
}); |
|
const data = await r.json(); |
|
if (data.status === "ok" || data.status === "error" || data.status === "loading model") { |
|
return data.status; |
|
} |
|
} catch (error) { |
|
console.log(JSON.parse(JSON.stringify(error))); |
|
} |
|
return "unavailable"; |
|
} |
|
async tokenize(prompt, abortController) { |
|
const response = await fetch(`${this.url}/tokenize`, { |
|
method: "POST", |
|
headers: { |
|
"Content-Type": "application/json" |
|
}, |
|
body: JSON.stringify({ |
|
"content": prompt |
|
}), |
|
signal: abortController.signal |
|
}); |
|
if (response.ok) { |
|
const data = await response.json(); |
|
if (data.tokens) { |
|
return data.tokens; |
|
} |
|
} else if (response.status === 404) { |
|
console.log("Tokenization endpoint not found (404)."); |
|
} else { |
|
console.log(`llama.cpp: Failed to tokenize: ${await response.text()}`); |
|
} |
|
return null; |
|
} |
|
async detokenize(tokens, abortController) { |
|
const response = await fetch(`${this.url}/detokenize`, { |
|
method: "POST", |
|
headers: { |
|
"Content-Type": "application/json" |
|
}, |
|
body: JSON.stringify({ |
|
"tokens": tokens |
|
}), |
|
signal: abortController.signal |
|
}); |
|
if (response.ok) { |
|
const data = await response.json(); |
|
if (data.content) { |
|
return data.content.trim(); |
|
} |
|
} else if (response.status === 404) { |
|
console.log("Detokenization endpoint not found (404)."); |
|
} else { |
|
console.log(`Failed to detokenize: ${await response.text()}`); |
|
} |
|
return null; |
|
} |
|
createRequest(prompt, llmParams) { |
|
const actualPrompt = this.applyLlmTemplateToPrompt(prompt); |
|
const request = { |
|
"stream": true, |
|
"stop": llmParams.predict_params?.stop || ["</s>", "bot:", "user:"], |
|
"prompt": actualPrompt |
|
}; |
|
request["n_predict"] = llmParams.predict_params?.n_predict || -1; |
|
request["temperature"] = llmParams.predict_params?.temperature || 0; |
|
if (llmParams.predict_params?.top_k != null) { |
|
request["top_k"] = Number(llmParams.predict_params.top_k); |
|
} |
|
if (llmParams.predict_params?.top_p != null) { |
|
request["top_p"] = Number(llmParams.predict_params.top_p); |
|
} |
|
if (llmParams.predict_params?.min_p != null) { |
|
request["min_p"] = Number(llmParams.predict_params.min_p); |
|
} |
|
if (llmParams.predict_params?.seed != null) { |
|
request["seed"] = Number(llmParams.predict_params.seed); |
|
} |
|
if (llmParams.predict_params?.n_keep != null) { |
|
request["n_keep"] = Number(llmParams.predict_params.n_keep); |
|
} |
|
if (llmParams.predict_params?.cache_prompt != null) { |
|
request["cache_prompt"] = Boolean(llmParams.predict_params.cache_prompt); |
|
} |
|
if (llmParams.predict_params?.repeat_penalty != null) { |
|
request["repeat_penalty"] = Boolean(llmParams.predict_params.repeat_penalty); |
|
} |
|
if (llmParams.predict_params?.repeat_last_n != null) { |
|
request["repeat_last_n"] = Boolean(llmParams.predict_params.repeat_last_n); |
|
} |
|
if (llmParams.predict_params?.frequency_penalty != null) { |
|
request["frequency_penalty"] = Boolean(llmParams.predict_params.frequency_penalty); |
|
} |
|
if (llmParams.predict_params?.presence_penalty != null) { |
|
request["presence_penalty"] = Boolean(llmParams.predict_params.presence_penalty); |
|
} |
|
return request; |
|
} |
|
applyLlmTemplateToPrompt(prompt) { |
|
let actualPrompt = prompt; |
|
if (this.llmParams.template != void 0) { |
|
actualPrompt = this.llmParams.template.replace("{{PROMPT}}", actualPrompt); |
|
} |
|
return actualPrompt; |
|
} |
|
async trimTokenizedText(text, userRequest, { abortController }) { |
|
let tokens = await this.tokenize(text, abortController); |
|
let systemPromptTokenCount = 0; |
|
if (this.llmParams.predict_params?.system_prompt) { |
|
systemPromptTokenCount = (await this.tokenize(this.llmParams.predict_params?.system_prompt, abortController))?.length || 0; |
|
} |
|
const originalTokenCount = tokens?.length || -1; |
|
const auxTokens = await this.tokenize(this.applyLlmTemplateToPrompt(userRequest), abortController); |
|
const maxLength = Number(this.llmParams.context) - Number(this.llmParams.predict_params?.n_predict) - (auxTokens?.length ?? 0) - systemPromptTokenCount; |
|
if (tokens !== null) { |
|
tokens = tokens.slice(0, maxLength); |
|
const detokenizedPrompt = await this.detokenize(tokens, abortController); |
|
if (detokenizedPrompt !== null) { |
|
text = detokenizedPrompt; |
|
} else { |
|
text = text.substring(0, maxLength); |
|
} |
|
} else { |
|
text = text.substring(0, maxLength); |
|
} |
|
return { result: text, originalTokenCount, slicedTokenCount: tokens?.length || -1 }; |
|
} |
|
predict({ abortController }) { |
|
return async ({ prompt, llmParams }) => { |
|
const request = this.createRequest(prompt, llmParams); |
|
console.log(`Predict request. Url: ${this.url}`); |
|
console.log(`Prompt: ${request.prompt.substring(0, 30)}`); |
|
let r; |
|
while (true) { |
|
r = await fetch(`${this.url}/completion`, { |
|
method: "POST", |
|
headers: { |
|
"Content-Type": "application/json", |
|
"Accept": "text/event-stream" |
|
}, |
|
body: JSON.stringify(request), |
|
signal: abortController.signal |
|
}); |
|
if (r.status === 404) { |
|
if (!private_env.LLM_API_404_RETRY_INTERVAL) { |
|
break; |
|
} |
|
console.log(`Received 404, retrying after ${private_env.LLM_API_404_RETRY_INTERVAL} seconds...`); |
|
await new Promise((resolve2) => setTimeout(resolve2, Number(private_env.LLM_API_404_RETRY_INTERVAL) * 1e3)); |
|
} else { |
|
break; |
|
} |
|
} |
|
if (!r.ok) { |
|
throw new Error(`Failed to generate text: ${await r.text()}`); |
|
} |
|
const encoder = new TextDecoderStream(); |
|
const reader = await r.body?.pipeThrough(encoder).getReader(); |
|
let t = this; |
|
return async function* () { |
|
let stop = false; |
|
let generatedText = ""; |
|
let tokenId = 0; |
|
while (!stop) { |
|
const out = await reader?.read() ?? { done: false, value: void 0 }; |
|
if (out.done) { |
|
stop = true; |
|
reader?.cancel(); |
|
t.createLogFile(JSON.stringify({ |
|
"request": prompt, |
|
"response": generatedText |
|
}), "llm-service"); |
|
return; |
|
} |
|
if (!out.value) { |
|
stop = true; |
|
reader?.cancel(); |
|
return; |
|
} |
|
let tokenValue = ""; |
|
if (out.value.startsWith("data: ")) { |
|
try { |
|
const data = JSON.parse(out.value.slice(6)); |
|
tokenValue = data.content; |
|
} catch (e) { |
|
} |
|
} else if (private_env.LLM_API_VERSION == "v1") { |
|
tokenValue = out.value; |
|
} |
|
generatedText += tokenValue.replace("</s>", "") ?? ""; |
|
yield { |
|
token: { |
|
id: tokenId++, |
|
text: tokenValue.replace("</s>", "") ?? "", |
|
logprob: 0, |
|
special: false |
|
}, |
|
generated_text: null, |
|
details: null |
|
}; |
|
} |
|
}(); |
|
}; |
|
} |
|
conversation({ abortController }) { |
|
return async ({ history, llmParams }) => { |
|
let prompt = history[history.length - 1][0]; |
|
const request = this.createRequest(prompt, llmParams); |
|
let tokens = await this.tokenize(prompt, abortController); |
|
if (tokens !== null) { |
|
tokens = tokens.slice(0, Number(this.llmParams.context) - Number(this.llmParams.predict_params?.n_predict)); |
|
const detokenizedPrompt = await this.detokenize(tokens, abortController); |
|
if (detokenizedPrompt !== null) { |
|
prompt = detokenizedPrompt; |
|
} else { |
|
prompt = prompt.substring(0, Number(this.llmParams.context) - Number(this.llmParams.predict_params?.n_predict)); |
|
} |
|
} else { |
|
prompt = prompt.substring(0, Number(this.llmParams.context) - Number(this.llmParams.predict_params?.n_predict)); |
|
} |
|
const r = await fetch(`${this.url}/completion`, { |
|
method: "POST", |
|
headers: { |
|
"Content-Type": "application/json", |
|
"Accept": "text/event-stream" |
|
}, |
|
body: JSON.stringify(request), |
|
signal: abortController.signal |
|
}); |
|
if (!r.ok) { |
|
throw new Error(`Failed to generate text: ${await r.text()}`); |
|
} |
|
const encoder = new TextDecoderStream(); |
|
const reader = r.body?.pipeThrough(encoder).getReader(); |
|
return async function* () { |
|
let stop = false; |
|
let generatedText = ""; |
|
let tokenId = 0; |
|
while (!stop) { |
|
const out = await reader?.read() ?? { done: false, value: void 0 }; |
|
if (out.done) { |
|
reader?.cancel(); |
|
return; |
|
} |
|
if (!out.value) { |
|
reader?.cancel(); |
|
return; |
|
} |
|
let tokenValue = ""; |
|
if (out.value.startsWith("data: ")) { |
|
try { |
|
let data2 = JSON.parse(out.value.slice(6)); |
|
tokenValue = data2.content; |
|
} catch (e) { |
|
} |
|
} else if (private_env.LLM_API_VERSION == "v1") { |
|
tokenValue = out.value; |
|
} |
|
generatedText += tokenValue.replace("</s>", "") ?? ""; |
|
yield { |
|
token: { |
|
id: tokenId++, |
|
text: tokenValue.replace("</s>", "") ?? "", |
|
logprob: 0, |
|
special: false |
|
}, |
|
generated_text: null, |
|
details: null |
|
}; |
|
} |
|
}(); |
|
}; |
|
} |
|
formatPrompt(history) { |
|
let prompt = ""; |
|
for (const [userPrompt, botResponse] of history) { |
|
prompt += ` [INST] ${userPrompt} [/INST]`; |
|
if (botResponse) { |
|
prompt += `${botResponse}`; |
|
} |
|
} |
|
return prompt; |
|
} |
|
createLogFile(text, namePrefix = "") { |
|
if (!private_env.LOGS_ROOT_FOLDER) { |
|
return; |
|
} |
|
try { |
|
const logsDirectory = resolve(private_env.LOGS_ROOT_FOLDER + "/llama"); |
|
if (!existsSync(logsDirectory)) { |
|
mkdirSync(logsDirectory, { |
|
recursive: true |
|
}); |
|
} |
|
const timestamp = ( new Date()).toISOString().replace(/[:.]/g, ""); |
|
const logFilePath = resolve(logsDirectory, `${namePrefix}${timestamp}.json`); |
|
writeFileSync(logFilePath, text); |
|
console.log(`Log file created: ${logFilePath}`); |
|
} catch (e) { |
|
console.log(`Failed to create log file in llama service`); |
|
console.log(e); |
|
} |
|
} |
|
} |
|
|
|
export { LlamaCppService as L }; |
|
|
|
|