Spaces:
Sleeping
Sleeping
| import { existsSync, mkdirSync, writeFileSync } from 'fs'; | |
| import { resolve } from 'path'; | |
| import { d as private_env } from './shared-server-49TKSBDM.js'; | |
| import dns from 'node:dns'; | |
| dns.setDefaultResultOrder("ipv4first"); | |
| class LlamaCppService { | |
| url = ""; | |
| constructor(url) { | |
| this.url = url; | |
| } | |
| async health() { | |
| try { | |
| const r = await fetch(`${this.url}/health`, { | |
| method: "GET", | |
| headers: { | |
| "Content-Type": "application/json" | |
| } | |
| }); | |
| const data = await r.json(); | |
| if (data.status === "ok" || data.status === "error" || data.status === "loading model") { | |
| return data.status; | |
| } | |
| } catch (error) { | |
| console.log(JSON.parse(JSON.stringify(error))); | |
| } | |
| return "unavailable"; | |
| } | |
| async tokenize(prompt, abortController) { | |
| const response = await fetch(`${this.url}/tokenize`, { | |
| method: "POST", | |
| headers: { | |
| "Content-Type": "application/json" | |
| }, | |
| body: JSON.stringify({ | |
| "content": prompt | |
| }), | |
| signal: abortController.signal | |
| }); | |
| if (response.ok) { | |
| const data = await response.json(); | |
| if (data.tokens) { | |
| return data.tokens; | |
| } | |
| } else if (response.status === 404) { | |
| console.log("Tokenization endpoint not found (404)."); | |
| } else { | |
| console.log(`Failed to tokenize: ${await response.text()}`); | |
| } | |
| return null; | |
| } | |
| async detokenize(tokens, abortController) { | |
| const response = await fetch(`${this.url}/detokenize`, { | |
| method: "POST", | |
| headers: { | |
| "Content-Type": "application/json" | |
| }, | |
| body: JSON.stringify({ | |
| "tokens": tokens | |
| }), | |
| signal: abortController.signal | |
| }); | |
| if (response.ok) { | |
| const data = await response.json(); | |
| if (data.content) { | |
| return data.content.trim(); | |
| } | |
| } else if (response.status === 404) { | |
| console.log("Detokenization endpoint not found (404)."); | |
| } else { | |
| console.log(`Failed to detokenize: ${await response.text()}`); | |
| } | |
| return null; | |
| } | |
| predict(prompt, { abortController }) { | |
| return async ({ prompt: prompt2 }) => { | |
| let tokens = await this.tokenize(prompt2, abortController); | |
| if (tokens !== null) { | |
| tokens = tokens.slice(0, 30700); | |
| const detokenizedPrompt = await this.detokenize(tokens, abortController); | |
| if (detokenizedPrompt !== null) { | |
| prompt2 = detokenizedPrompt; | |
| } else { | |
| prompt2 = prompt2.substring(0, 32768); | |
| } | |
| } else { | |
| prompt2 = prompt2.substring(0, 32768); | |
| } | |
| const request = { | |
| "stream": true, | |
| "stop": ["</s>", "bot:", "user:"], | |
| "prompt": `[INST] ${prompt2} [/INST]` | |
| }; | |
| if (private_env.LLM_API_N_PREDICT) { | |
| request["n_predict"] = Number(private_env.LLM_API_N_PREDICT); | |
| } | |
| if (private_env.LLM_API_TEMPERATURE) { | |
| request["temperature"] = Number(private_env.LLM_API_TEMPERATURE); | |
| } | |
| if (private_env.LLM_API_TOP_K) { | |
| request["top_k"] = Number(private_env.LLM_API_TOP_K); | |
| } | |
| if (private_env.LLM_API_TOP_P) { | |
| request["top_p"] = Number(private_env.LLM_API_TOP_P); | |
| } | |
| if (private_env.LLM_API_MIN_P) { | |
| request["min_p"] = Number(private_env.LLM_API_MIN_P); | |
| } | |
| if (private_env.LLM_API_SEED) { | |
| request["seed"] = Number(private_env.LLM_API_SEED); | |
| } | |
| if (private_env.LLM_API_N_KEEP) { | |
| request["n_keep"] = Number(private_env.LLM_API_N_KEEP); | |
| } | |
| if (private_env.LLM_CACHE_PROMPT) { | |
| request["cache_prompt"] = Boolean(private_env.LLM_CACHE_PROMPT); | |
| } | |
| let r; | |
| while (true) { | |
| r = await fetch(`${this.url}/completion`, { | |
| method: "POST", | |
| headers: { | |
| "Content-Type": "application/json", | |
| "Accept": "text/event-stream" | |
| }, | |
| body: JSON.stringify(request), | |
| signal: abortController.signal | |
| }); | |
| if (r.status === 404) { | |
| if (!private_env.LLM_API_404_RETRY_INTERVAL) { | |
| break; | |
| } | |
| console.log(`Received 404, retrying after ${private_env.LLM_API_404_RETRY_INTERVAL} seconds...`); | |
| await new Promise((resolve2) => setTimeout(resolve2, Number(private_env.LLM_API_404_RETRY_INTERVAL) * 1e3)); | |
| } else { | |
| break; | |
| } | |
| } | |
| if (!r.ok) { | |
| throw new Error(`Failed to generate text: ${await r.text()}`); | |
| } | |
| const encoder = new TextDecoderStream(); | |
| const reader = await r.body?.pipeThrough(encoder).getReader(); | |
| let t = this; | |
| return async function* () { | |
| let stop = false; | |
| let generatedText = ""; | |
| let tokenId = 0; | |
| while (!stop) { | |
| const out = await reader?.read() ?? { done: false, value: void 0 }; | |
| if (out.done) { | |
| stop = true; | |
| reader?.cancel(); | |
| t.createLogFile(JSON.stringify({ | |
| "request": prompt2, | |
| "response": generatedText | |
| }), "llm-service"); | |
| return; | |
| } | |
| if (!out.value) { | |
| stop = true; | |
| reader?.cancel(); | |
| return; | |
| } | |
| let tokenValue = ""; | |
| if (out.value.startsWith("data: ")) { | |
| try { | |
| const data = JSON.parse(out.value.slice(6)); | |
| tokenValue = data.content; | |
| } catch (e) { | |
| } | |
| } else if (private_env.LLM_API_VERSION == "v1") { | |
| tokenValue = out.value; | |
| } | |
| generatedText += tokenValue.replace("</s>", "") ?? ""; | |
| yield { | |
| token: { | |
| id: tokenId++, | |
| text: tokenValue.replace("</s>", "") ?? "", | |
| logprob: 0, | |
| special: false | |
| }, | |
| generated_text: null, | |
| details: null | |
| }; | |
| } | |
| }(); | |
| }; | |
| } | |
| conversation(history, { abortController }) { | |
| return async ({ history: history2 }) => { | |
| let prompt = this.formatPrompt(history2); | |
| console.log(prompt.length); | |
| let tokens = await this.tokenize(prompt, abortController); | |
| if (tokens !== null) { | |
| tokens = tokens.slice(-30700); | |
| const detokenizedPrompt = await this.detokenize(tokens, abortController); | |
| if (detokenizedPrompt !== null) { | |
| prompt = detokenizedPrompt; | |
| } else { | |
| prompt = prompt.substring(Math.max(0, prompt.length - 30700)); | |
| } | |
| } else { | |
| prompt = prompt.substring(Math.max(0, prompt.length - 30700)); | |
| } | |
| const r = await fetch(`${this.url}/completion`, { | |
| method: "POST", | |
| headers: { | |
| "Content-Type": "application/json", | |
| "Accept": "text/event-stream" | |
| }, | |
| body: JSON.stringify({ | |
| "stream": true, | |
| "n_predict": 2e3, | |
| "temperature": 0.2, | |
| "stop": ["</s>", "bot:", "user:"], | |
| "repeat_penalty": 1, | |
| "top_k": 40, | |
| "top_p": 0.95, | |
| "min_p": 0.05, | |
| "seed": 42, | |
| "n_keep": 0, | |
| "cache_prompt": false, | |
| "prompt": prompt | |
| }), | |
| signal: abortController.signal | |
| }); | |
| if (!r.ok) { | |
| throw new Error(`Failed to generate text: ${await r.text()}`); | |
| } | |
| const encoder = new TextDecoderStream(); | |
| const reader = r.body?.pipeThrough(encoder).getReader(); | |
| return async function* () { | |
| let stop = false; | |
| let generatedText = ""; | |
| let tokenId = 0; | |
| while (!stop) { | |
| const out = await reader?.read() ?? { done: false, value: void 0 }; | |
| if (out.done) { | |
| reader?.cancel(); | |
| return; | |
| } | |
| if (!out.value) { | |
| reader?.cancel(); | |
| return; | |
| } | |
| let tokenValue = ""; | |
| if (out.value.startsWith("data: ")) { | |
| try { | |
| let data2 = JSON.parse(out.value.slice(6)); | |
| tokenValue = data2.content; | |
| } catch (e) { | |
| } | |
| } else if (private_env.LLM_API_VERSION == "v1") { | |
| tokenValue = out.value; | |
| } | |
| generatedText += tokenValue.replace("</s>", "") ?? ""; | |
| yield { | |
| token: { | |
| id: tokenId++, | |
| text: tokenValue.replace("</s>", "") ?? "", | |
| logprob: 0, | |
| special: false | |
| }, | |
| generated_text: null, | |
| details: null | |
| }; | |
| } | |
| }(); | |
| }; | |
| } | |
| formatPrompt(history) { | |
| let prompt = "<s>"; | |
| for (const [userPrompt, botResponse] of history) { | |
| prompt += `[INST] ${userPrompt} [/INST]`; | |
| if (botResponse) { | |
| prompt += ` ${botResponse}</s> `; | |
| } | |
| } | |
| console.log(prompt); | |
| return prompt; | |
| } | |
| createLogFile(text, namePrefix = "") { | |
| try { | |
| const logsDirectory = resolve(private_env.LOGS_ROOT_FOLDER + "/llama"); | |
| if (!existsSync(logsDirectory)) { | |
| mkdirSync(logsDirectory, { | |
| recursive: true | |
| }); | |
| } | |
| const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, ""); | |
| const logFilePath = resolve(logsDirectory, `${namePrefix}${timestamp}.json`); | |
| writeFileSync(logFilePath, text); | |
| console.log(`Log file created: ${logFilePath}`); | |
| } catch (e) { | |
| console.log(`Failed to create log file in llama service`); | |
| console.log(e); | |
| } | |
| } | |
| } | |
| export { LlamaCppService as L }; | |
| //# sourceMappingURL=LlamaCppService-lwZ2ZT0u.js.map | |