|
import { O as OpenAiService, U as UserToLlmRequestTypeEnum } from './OpenAiService-05Srl9E-.js'; |
|
import { existsSync, mkdirSync, writeFileSync } from 'fs'; |
|
import { resolve } from 'path'; |
|
import { d as private_env } from './shared-server-49TKSBDM.js'; |
|
import dns from 'node:dns'; |
|
|
|
dns.setDefaultResultOrder("ipv4first"); |
|
class DeepinfraService { |
|
url = ""; |
|
llmParams; |
|
apiKey; |
|
constructor(params) { |
|
this.url = params.url; |
|
this.llmParams = params; |
|
this.apiKey = private_env.DEEPINFRA_API_KEY; |
|
} |
|
_getHeaders() { |
|
let headers = { "Content-Type": "application/json" }; |
|
if (this.apiKey) { |
|
headers["Authorization"] = this.apiKey; |
|
} |
|
return headers; |
|
} |
|
async getModels() { |
|
try { |
|
const response = await fetch(`${this.url}/v1/openai/models`, { |
|
method: "GET", |
|
headers: this._getHeaders() |
|
}); |
|
if (response.ok) { |
|
let json = await response.json(); |
|
let result = json["data"].map((o) => o["id"]); |
|
return result; |
|
} |
|
} catch (error) { |
|
console.error("OpenAiService.getModels error:"); |
|
console.error(JSON.parse(JSON.stringify(error))); |
|
} |
|
return []; |
|
} |
|
async health() { |
|
return "ok"; |
|
} |
|
async tokenize(prompt, abortController) { |
|
return null; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
async detokenize(tokens, abortController) { |
|
return null; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async createRequest(prompt, requestType, systemPrompt) { |
|
const llmParams = this.llmParams; |
|
const request = { |
|
"stream": true, |
|
"model": llmParams.model |
|
}; |
|
if (llmParams.predict_params?.stop != void 0 && llmParams.predict_params.stop.length > 0) { |
|
const nonEmptyStop = llmParams.predict_params.stop.filter((o) => o != ""); |
|
if (nonEmptyStop.length > 0) { |
|
request["stop"] = llmParams.predict_params.stop; |
|
} |
|
} |
|
if (llmParams.predict_params?.n_predict != null) { |
|
request["max_tokens"] = Number(llmParams.predict_params?.n_predict); |
|
} |
|
request["temperature"] = llmParams.predict_params?.temperature || 0; |
|
if (llmParams.predict_params?.top_k != null) { |
|
request["top_k"] = Number(llmParams.predict_params.top_k); |
|
} |
|
if (llmParams.predict_params?.top_p != null) { |
|
request["top_p"] = Number(llmParams.predict_params.top_p); |
|
} |
|
if (llmParams.predict_params?.min_p != null) { |
|
request["min_p"] = Number(llmParams.predict_params.min_p); |
|
} |
|
if (llmParams.predict_params?.seed != null) { |
|
request["seed"] = Number(llmParams.predict_params.seed); |
|
} |
|
if (llmParams.predict_params?.n_keep != null) { |
|
request["n_keep"] = Number(llmParams.predict_params.n_keep); |
|
} |
|
if (llmParams.predict_params?.cache_prompt != null) { |
|
request["cache_prompt"] = Boolean(llmParams.predict_params.cache_prompt); |
|
} |
|
if (llmParams.predict_params?.repeat_penalty != null) { |
|
request["repetition_penalty"] = Number(llmParams.predict_params.repeat_penalty); |
|
} |
|
if (llmParams.predict_params?.repeat_last_n != null) { |
|
request["repeat_last_n"] = Number(llmParams.predict_params.repeat_last_n); |
|
} |
|
if (llmParams.predict_params?.presence_penalty != null) { |
|
request["presence_penalty"] = Number(llmParams.predict_params.presence_penalty); |
|
} |
|
if (llmParams.predict_params?.frequency_penalty != null) { |
|
request["frequency_penalty"] = Number(llmParams.predict_params.frequency_penalty); |
|
} |
|
request["messages"] = this.createMessages(prompt, requestType, systemPrompt); |
|
return request; |
|
} |
|
createMessages(prompt, requestType, systemPrompt) { |
|
const actualPrompt = this.applyLlmTemplateToPrompt(prompt); |
|
let messages = []; |
|
const finalSystemPrompt = this.selectSystemPrompt(requestType, systemPrompt); |
|
if (finalSystemPrompt) { |
|
messages.push({ role: "system", content: finalSystemPrompt }); |
|
} |
|
messages.push({ role: "user", content: actualPrompt }); |
|
return messages; |
|
} |
|
selectSystemPrompt(requestType, systemPrompt) { |
|
let prompt = ""; |
|
switch (requestType) { |
|
case UserToLlmRequestTypeEnum.Regular: |
|
prompt = this.llmParams.predict_params?.system_prompt || ""; |
|
break; |
|
case UserToLlmRequestTypeEnum.Clarification: |
|
prompt = this.llmParams.predict_params?.clarification_system_prompt || ""; |
|
break; |
|
case UserToLlmRequestTypeEnum.ClarificationWithUserSelectedSearchResults: |
|
prompt = this.llmParams.predict_params?.user_selected_sources_clarification_system_prompt || ""; |
|
break; |
|
case UserToLlmRequestTypeEnum.UserSelectedSearchResults: |
|
prompt = this.llmParams.predict_params?.user_selected_sources_system_prompt || ""; |
|
break; |
|
case UserToLlmRequestTypeEnum.Raw: |
|
prompt = systemPrompt || ""; |
|
break; |
|
} |
|
return prompt; |
|
} |
|
applyLlmTemplateToPrompt(prompt) { |
|
let actualPrompt = prompt; |
|
if (this.llmParams.template != void 0) { |
|
actualPrompt = this.llmParams.template.replace("{{PROMPT}}", actualPrompt); |
|
} |
|
return actualPrompt; |
|
} |
|
async trimTokenizedText(sources, userRequest, { abortController }) { |
|
return { result: sources, originalTokenCount: 0, slicedTokenCount: 0 }; |
|
} |
|
predict({ requestType, abortController }) { |
|
return async ({ prompt, systemPrompt }) => { |
|
const request = await this.createRequest(prompt, requestType, systemPrompt); |
|
console.log(`Predict request. Url: ${this.url}`); |
|
console.log(`Messages: ${JSON.stringify(request["messages"])}`); |
|
let r = await fetch(`${this.url}/v1/openai/chat/completions`, { |
|
method: "POST", |
|
headers: this._getHeaders(), |
|
body: JSON.stringify(request), |
|
signal: abortController.signal |
|
}); |
|
if (!r.ok) { |
|
throw new Error(`Failed to generate text: ${await r.text()}`); |
|
} |
|
const encoder = new TextDecoderStream(); |
|
const reader = await r.body?.pipeThrough(encoder).getReader(); |
|
return async function* () { |
|
let tokenId = 0; |
|
while (true) { |
|
const out = await reader?.read() ?? { done: false, value: void 0 }; |
|
if (out.done) { |
|
reader?.cancel(); |
|
break; |
|
} |
|
if (!out.value) { |
|
reader?.cancel(); |
|
break; |
|
} |
|
let tokenValue = ""; |
|
if (out.value.startsWith("data: ")) { |
|
try { |
|
let isDone = false; |
|
const result = out.value.trim().split(/\n/).map((line) => { |
|
if (line.includes("data: [DONE]")) { |
|
isDone = true; |
|
return null; |
|
} |
|
try { |
|
const parsedData = JSON.parse(line.replace(/^data: /, "")); |
|
if (parsedData.choices && parsedData.choices.length > 0 && parsedData.choices[0]?.delta?.content) { |
|
tokenValue += parsedData.choices[0]?.delta?.content; |
|
} |
|
} catch { |
|
console.warn(`Invalid JSON string skipped: ${line}`); |
|
} |
|
}).filter((item) => item !== null); |
|
if (isDone) { |
|
reader?.cancel(); |
|
break; |
|
} |
|
} catch (e) { |
|
console.log("Invalid llm response"); |
|
console.log(e); |
|
} |
|
} |
|
yield { |
|
token: { |
|
id: tokenId++, |
|
text: tokenValue ?? "", |
|
logprob: 0, |
|
special: false |
|
}, |
|
generated_text: null, |
|
details: null |
|
}; |
|
} |
|
}(); |
|
}; |
|
} |
|
createLogFile(text, namePrefix = "") { |
|
if (!private_env.LOGS_ROOT_FOLDER) { |
|
return; |
|
} |
|
try { |
|
const logsDirectory = resolve(private_env.LOGS_ROOT_FOLDER + "/llama"); |
|
if (!existsSync(logsDirectory)) { |
|
mkdirSync(logsDirectory, { |
|
recursive: true |
|
}); |
|
} |
|
const timestamp = ( new Date()).toISOString().replace(/[:.]/g, ""); |
|
const logFilePath = resolve(logsDirectory, `${namePrefix}${timestamp}.json`); |
|
writeFileSync(logFilePath, text); |
|
console.log(`Log file created: ${logFilePath}`); |
|
} catch (e) { |
|
console.log(`Failed to create log file in llama service`); |
|
console.log(e); |
|
} |
|
} |
|
} |
|
class LlmApiServiceFactory { |
|
static createVllm(params) { |
|
return new OpenAiService(params); |
|
} |
|
static createDeepinfra(params) { |
|
return new DeepinfraService(params); |
|
} |
|
static create(params) { |
|
switch (params.type) { |
|
case "vllm-openai": |
|
return this.createVllm(params); |
|
case "deepinfra": |
|
return this.createDeepinfra(params); |
|
default: |
|
throw "Неизвестный тип LLM API"; |
|
} |
|
} |
|
} |
|
|
|
export { LlmApiServiceFactory as L }; |
|
|
|
|