Spaces:
Sleeping
Sleeping
| import { existsSync, mkdirSync, writeFileSync } from 'fs'; | |
| import { resolve } from 'path'; | |
| import { d as private_env } from './shared-server-49TKSBDM.js'; | |
| class LlamaCppService { | |
| url = ""; | |
| constructor(url) { | |
| this.url = url; | |
| } | |
| async health() { | |
| const r = await fetch(`${this.url}/health`, { | |
| method: "GET", | |
| headers: { | |
| "Content-Type": "application/json" | |
| } | |
| }); | |
| const data = await r.json(); | |
| if (data.status === "ok" || data.status === "error" || data.status === "loading model") { | |
| return data.status; | |
| } | |
| return "unavailable"; | |
| } | |
| async tokenize(prompt, abortController) { | |
| const response = await fetch(`${this.url}/tokenize`, { | |
| method: "POST", | |
| headers: { | |
| "Content-Type": "application/json" | |
| }, | |
| body: JSON.stringify({ | |
| "content": prompt | |
| }), | |
| signal: abortController.signal | |
| }); | |
| if (response.ok) { | |
| const data = await response.json(); | |
| if (data.tokens) { | |
| return data.tokens; | |
| } | |
| } else if (response.status === 404) { | |
| console.log("Tokenization endpoint not found (404)."); | |
| } else { | |
| console.log(`Failed to tokenize: ${await response.text()}`); | |
| } | |
| return null; | |
| } | |
| async detokenize(tokens, abortController) { | |
| const response = await fetch(`${this.url}/detokenize`, { | |
| method: "POST", | |
| headers: { | |
| "Content-Type": "application/json" | |
| }, | |
| body: JSON.stringify({ | |
| "tokens": tokens | |
| }), | |
| signal: abortController.signal | |
| }); | |
| if (response.ok) { | |
| const data = await response.json(); | |
| if (data.content) { | |
| return data.content.trim(); | |
| } | |
| } else if (response.status === 404) { | |
| console.log("Detokenization endpoint not found (404)."); | |
| } else { | |
| console.log(`Failed to detokenize: ${await response.text()}`); | |
| } | |
| return null; | |
| } | |
| predict(prompt, { abortController }) { | |
| return async ({ prompt: prompt2 }) => { | |
| console.log(prompt2.length); | |
| let tokens = await this.tokenize(prompt2, abortController); | |
| if (tokens !== null) { | |
| console.log("tokens: " + tokens.length); | |
| tokens = tokens.slice(0, 30700); | |
| const detokenizedPrompt = await this.detokenize(tokens, abortController); | |
| if (detokenizedPrompt !== null) { | |
| prompt2 = detokenizedPrompt; | |
| } else { | |
| prompt2 = prompt2.substring(0, 32768); | |
| } | |
| } else { | |
| prompt2 = prompt2.substring(0, 32768); | |
| } | |
| console.log(prompt2.length); | |
| const r = await fetch(`${this.url}/completion`, { | |
| method: "POST", | |
| headers: { | |
| "Content-Type": "application/json", | |
| "Accept": "text/event-stream" | |
| }, | |
| body: JSON.stringify({ | |
| "stream": true, | |
| "n_predict": 2e3, | |
| "temperature": 0.2, | |
| "stop": ["</s>", "bot:", "user:"], | |
| // "repeat_last_n": 256, | |
| "repeat_penalty": 1, | |
| "top_k": 40, | |
| "top_p": 0.95, | |
| "min_p": 0.05, | |
| "seed": 42, | |
| "n_keep": 0, | |
| // "tfs_z": 1, | |
| // "typical_p": 1, | |
| // "presence_penalty": 0, | |
| // "frequency_penalty": 0, | |
| // "mirostat": 0, | |
| // "mirostat_tau": 5, | |
| // "mirostat_eta": 0.1, | |
| // // "grammar": "", | |
| // "n_probs": 0, | |
| // "image_data": [], | |
| "cache_prompt": false, | |
| // "api_key": "", | |
| // "slot_id": 0, | |
| "prompt": "[INST]" + prompt2 + "[/INST]" | |
| }), | |
| signal: abortController.signal | |
| }); | |
| if (!r.ok) { | |
| throw new Error(`Failed to generate text: ${await r.text()}`); | |
| } | |
| const encoder = new TextDecoderStream(); | |
| const reader = r.body?.pipeThrough(encoder).getReader(); | |
| let t = this; | |
| return async function* () { | |
| let stop = false; | |
| let generatedText = ""; | |
| let tokenId = 0; | |
| while (!stop) { | |
| const out = await reader?.read() ?? { done: false, value: void 0 }; | |
| if (out.done) { | |
| reader?.cancel(); | |
| t.createLogFile(JSON.stringify({ | |
| "request": prompt2, | |
| "response": generatedText | |
| }), "llm-service"); | |
| return; | |
| } | |
| if (!out.value) { | |
| reader?.cancel(); | |
| return; | |
| } | |
| let tokenValue = ""; | |
| if (out.value.startsWith("data: ")) { | |
| try { | |
| let data2 = JSON.parse(out.value.slice(6)); | |
| tokenValue = data2.content; | |
| } catch (e) { | |
| } | |
| } else if (private_env.LLM_API_VERSION == "v1") { | |
| tokenValue = out.value; | |
| } | |
| generatedText += tokenValue.replace("</s>", "") ?? ""; | |
| yield { | |
| token: { | |
| id: tokenId++, | |
| text: tokenValue.replace("</s>", "") ?? "", | |
| logprob: 0, | |
| special: false | |
| }, | |
| generated_text: null, | |
| details: null | |
| }; | |
| } | |
| }(); | |
| }; | |
| } | |
| createLogFile(text, namePrefix = "") { | |
| const logsDirectory = resolve(private_env.LOGS_ROOT_FOLDER + "/llama"); | |
| if (!existsSync(logsDirectory)) { | |
| mkdirSync(logsDirectory); | |
| } | |
| const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, ""); | |
| const logFilePath = resolve(logsDirectory, `${namePrefix}${timestamp}.json`); | |
| writeFileSync(logFilePath, text); | |
| console.log(`Log file created: ${logFilePath}`); | |
| } | |
| } | |
| export { LlamaCppService as L }; | |
| //# sourceMappingURL=LlamaCppService-VUAR80b2.js.map | |