Spaces:
Sleeping
Sleeping
import { existsSync, mkdirSync, writeFileSync } from 'fs'; | |
import { resolve } from 'path'; | |
import { d as private_env } from './shared-server-49TKSBDM.js'; | |
class LlamaCppService { | |
url = ""; | |
constructor(url) { | |
this.url = url; | |
} | |
async health() { | |
const r = await fetch(`${this.url}/health`, { | |
method: "GET", | |
headers: { | |
"Content-Type": "application/json" | |
} | |
}); | |
const data = await r.json(); | |
if (data.status === "ok" || data.status === "error" || data.status === "loading model") { | |
return data.status; | |
} | |
return "unavailable"; | |
} | |
async tokenize(prompt, abortController) { | |
const response = await fetch(`${this.url}/tokenize`, { | |
method: "POST", | |
headers: { | |
"Content-Type": "application/json" | |
}, | |
body: JSON.stringify({ | |
"content": prompt | |
}), | |
signal: abortController.signal | |
}); | |
if (response.ok) { | |
const data = await response.json(); | |
if (data.tokens) { | |
return data.tokens; | |
} | |
} else if (response.status === 404) { | |
console.log("Tokenization endpoint not found (404)."); | |
} else { | |
console.log(`Failed to tokenize: ${await response.text()}`); | |
} | |
return null; | |
} | |
async detokenize(tokens, abortController) { | |
const response = await fetch(`${this.url}/detokenize`, { | |
method: "POST", | |
headers: { | |
"Content-Type": "application/json" | |
}, | |
body: JSON.stringify({ | |
"tokens": tokens | |
}), | |
signal: abortController.signal | |
}); | |
if (response.ok) { | |
const data = await response.json(); | |
if (data.content) { | |
return data.content.trim(); | |
} | |
} else if (response.status === 404) { | |
console.log("Detokenization endpoint not found (404)."); | |
} else { | |
console.log(`Failed to detokenize: ${await response.text()}`); | |
} | |
return null; | |
} | |
predict(prompt, { abortController }) { | |
return async ({ prompt: prompt2 }) => { | |
console.log(prompt2.length); | |
let tokens = await this.tokenize(prompt2, abortController); | |
if (tokens !== null) { | |
console.log("tokens: " + tokens.length); | |
tokens = tokens.slice(0, 30700); | |
const detokenizedPrompt = await this.detokenize(tokens, abortController); | |
if (detokenizedPrompt !== null) { | |
prompt2 = detokenizedPrompt; | |
} else { | |
prompt2 = prompt2.substring(0, 32768); | |
} | |
} else { | |
prompt2 = prompt2.substring(0, 32768); | |
} | |
console.log(prompt2.length); | |
const r = await fetch(`${this.url}/completion`, { | |
method: "POST", | |
headers: { | |
"Content-Type": "application/json", | |
"Accept": "text/event-stream" | |
}, | |
body: JSON.stringify({ | |
"stream": true, | |
"n_predict": 2e3, | |
"temperature": 0.2, | |
"stop": ["</s>", "bot:", "user:"], | |
// "repeat_last_n": 256, | |
"repeat_penalty": 1, | |
"top_k": 40, | |
"top_p": 0.95, | |
"min_p": 0.05, | |
"seed": 42, | |
"n_keep": 0, | |
// "tfs_z": 1, | |
// "typical_p": 1, | |
// "presence_penalty": 0, | |
// "frequency_penalty": 0, | |
// "mirostat": 0, | |
// "mirostat_tau": 5, | |
// "mirostat_eta": 0.1, | |
// // "grammar": "", | |
// "n_probs": 0, | |
// "image_data": [], | |
"cache_prompt": false, | |
// "api_key": "", | |
// "slot_id": 0, | |
"prompt": "[INST]" + prompt2 + "[/INST]" | |
}), | |
signal: abortController.signal | |
}); | |
if (!r.ok) { | |
throw new Error(`Failed to generate text: ${await r.text()}`); | |
} | |
const encoder = new TextDecoderStream(); | |
const reader = r.body?.pipeThrough(encoder).getReader(); | |
let t = this; | |
return async function* () { | |
let stop = false; | |
let generatedText = ""; | |
let tokenId = 0; | |
while (!stop) { | |
const out = await reader?.read() ?? { done: false, value: void 0 }; | |
if (out.done) { | |
reader?.cancel(); | |
t.createLogFile(JSON.stringify({ | |
"request": prompt2, | |
"response": generatedText | |
}), "llm-service"); | |
return; | |
} | |
if (!out.value) { | |
reader?.cancel(); | |
return; | |
} | |
let tokenValue = ""; | |
if (out.value.startsWith("data: ")) { | |
try { | |
let data2 = JSON.parse(out.value.slice(6)); | |
tokenValue = data2.content; | |
} catch (e) { | |
} | |
} else if (private_env.LLM_API_VERSION == "v1") { | |
tokenValue = out.value; | |
} | |
generatedText += tokenValue.replace("</s>", "") ?? ""; | |
yield { | |
token: { | |
id: tokenId++, | |
text: tokenValue.replace("</s>", "") ?? "", | |
logprob: 0, | |
special: false | |
}, | |
generated_text: null, | |
details: null | |
}; | |
} | |
}(); | |
}; | |
} | |
createLogFile(text, namePrefix = "") { | |
const logsDirectory = resolve(private_env.LOGS_ROOT_FOLDER + "/llama"); | |
if (!existsSync(logsDirectory)) { | |
mkdirSync(logsDirectory); | |
} | |
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, ""); | |
const logFilePath = resolve(logsDirectory, `${namePrefix}${timestamp}.json`); | |
writeFileSync(logFilePath, text); | |
console.log(`Log file created: ${logFilePath}`); | |
} | |
} | |
export { LlamaCppService as L }; | |
//# sourceMappingURL=LlamaCppService-VUAR80b2.js.map | |