nn-ui-v2 / build /server /chunks /LlmApiServiceFactory-9cJF8P5t.js
muryshev's picture
update
2ab515e
import { O as OpenAiService, U as UserToLlmRequestTypeEnum } from './OpenAiService-05Srl9E-.js';
import { existsSync, mkdirSync, writeFileSync } from 'fs';
import { resolve } from 'path';
import { d as private_env } from './shared-server-49TKSBDM.js';
import dns from 'node:dns';
dns.setDefaultResultOrder("ipv4first");
class DeepinfraService {
url = "";
llmParams;
apiKey;
constructor(params) {
this.url = params.url;
this.llmParams = params;
this.apiKey = private_env.DEEPINFRA_API_KEY;
}
_getHeaders() {
let headers = { "Content-Type": "application/json" };
if (this.apiKey) {
headers["Authorization"] = this.apiKey;
}
return headers;
}
async getModels() {
try {
const response = await fetch(`${this.url}/v1/openai/models`, {
method: "GET",
headers: this._getHeaders()
});
if (response.ok) {
let json = await response.json();
let result = json["data"].map((o) => o["id"]);
return result;
}
} catch (error) {
console.error("OpenAiService.getModels error:");
console.error(JSON.parse(JSON.stringify(error)));
}
return [];
}
async health() {
return "ok";
}
async tokenize(prompt, abortController) {
return null;
}
/**
* Не использовать пока что эту функцию, т.к. нет возможности убрать шаблон чата
* @param tokens
* @param abortController
* @returns
*/
async detokenize(tokens, abortController) {
return null;
}
/**
* Формирует запрос к ллм с параметрами и массивом сообщений
* @param prompt Промпт, который будет отправлен в ллм в сообщении с ролью user.
* @param requestType Тип запроса для выбора предопределенного системного промпта.
* @param systemPrompt Кастомный системный промпт для нестандартных случае. Например, "почемучки" (InvestigatorService) использует этот параметр. Сработает только при requestType = UserToLlmRequestTypeEnumю.Raw
* @returns
*/
async createRequest(prompt, requestType, systemPrompt) {
const llmParams = this.llmParams;
const request = {
"stream": true,
"model": llmParams.model
};
if (llmParams.predict_params?.stop != void 0 && llmParams.predict_params.stop.length > 0) {
const nonEmptyStop = llmParams.predict_params.stop.filter((o) => o != "");
if (nonEmptyStop.length > 0) {
request["stop"] = llmParams.predict_params.stop;
}
}
if (llmParams.predict_params?.n_predict != null) {
request["max_tokens"] = Number(llmParams.predict_params?.n_predict);
}
request["temperature"] = llmParams.predict_params?.temperature || 0;
if (llmParams.predict_params?.top_k != null) {
request["top_k"] = Number(llmParams.predict_params.top_k);
}
if (llmParams.predict_params?.top_p != null) {
request["top_p"] = Number(llmParams.predict_params.top_p);
}
if (llmParams.predict_params?.min_p != null) {
request["min_p"] = Number(llmParams.predict_params.min_p);
}
if (llmParams.predict_params?.seed != null) {
request["seed"] = Number(llmParams.predict_params.seed);
}
if (llmParams.predict_params?.n_keep != null) {
request["n_keep"] = Number(llmParams.predict_params.n_keep);
}
if (llmParams.predict_params?.cache_prompt != null) {
request["cache_prompt"] = Boolean(llmParams.predict_params.cache_prompt);
}
if (llmParams.predict_params?.repeat_penalty != null) {
request["repetition_penalty"] = Number(llmParams.predict_params.repeat_penalty);
}
if (llmParams.predict_params?.repeat_last_n != null) {
request["repeat_last_n"] = Number(llmParams.predict_params.repeat_last_n);
}
if (llmParams.predict_params?.presence_penalty != null) {
request["presence_penalty"] = Number(llmParams.predict_params.presence_penalty);
}
if (llmParams.predict_params?.frequency_penalty != null) {
request["frequency_penalty"] = Number(llmParams.predict_params.frequency_penalty);
}
request["messages"] = this.createMessages(prompt, requestType, systemPrompt);
return request;
}
createMessages(prompt, requestType, systemPrompt) {
const actualPrompt = this.applyLlmTemplateToPrompt(prompt);
let messages = [];
const finalSystemPrompt = this.selectSystemPrompt(requestType, systemPrompt);
if (finalSystemPrompt) {
messages.push({ role: "system", content: finalSystemPrompt });
}
messages.push({ role: "user", content: actualPrompt });
return messages;
}
selectSystemPrompt(requestType, systemPrompt) {
let prompt = "";
switch (requestType) {
case UserToLlmRequestTypeEnum.Regular:
prompt = this.llmParams.predict_params?.system_prompt || "";
break;
case UserToLlmRequestTypeEnum.Clarification:
prompt = this.llmParams.predict_params?.clarification_system_prompt || "";
break;
case UserToLlmRequestTypeEnum.ClarificationWithUserSelectedSearchResults:
prompt = this.llmParams.predict_params?.user_selected_sources_clarification_system_prompt || "";
break;
case UserToLlmRequestTypeEnum.UserSelectedSearchResults:
prompt = this.llmParams.predict_params?.user_selected_sources_system_prompt || "";
break;
case UserToLlmRequestTypeEnum.Raw:
prompt = systemPrompt || "";
break;
}
return prompt;
}
applyLlmTemplateToPrompt(prompt) {
let actualPrompt = prompt;
if (this.llmParams.template != void 0) {
actualPrompt = this.llmParams.template.replace("{{PROMPT}}", actualPrompt);
}
return actualPrompt;
}
async trimTokenizedText(sources, userRequest, { abortController }) {
return { result: sources, originalTokenCount: 0, slicedTokenCount: 0 };
}
predict({ requestType, abortController }) {
return async ({ prompt, systemPrompt }) => {
const request = await this.createRequest(prompt, requestType, systemPrompt);
console.log(`Predict request. Url: ${this.url}`);
console.log(`Messages: ${JSON.stringify(request["messages"])}`);
let r = await fetch(`${this.url}/v1/openai/chat/completions`, {
method: "POST",
headers: this._getHeaders(),
body: JSON.stringify(request),
signal: abortController.signal
});
if (!r.ok) {
throw new Error(`Failed to generate text: ${await r.text()}`);
}
const encoder = new TextDecoderStream();
const reader = await r.body?.pipeThrough(encoder).getReader();
return async function* () {
let tokenId = 0;
while (true) {
const out = await reader?.read() ?? { done: false, value: void 0 };
if (out.done) {
reader?.cancel();
break;
}
if (!out.value) {
reader?.cancel();
break;
}
let tokenValue = "";
if (out.value.startsWith("data: ")) {
try {
let isDone = false;
const result = out.value.trim().split(/\n/).map((line) => {
if (line.includes("data: [DONE]")) {
isDone = true;
return null;
}
try {
const parsedData = JSON.parse(line.replace(/^data: /, ""));
if (parsedData.choices && parsedData.choices.length > 0 && parsedData.choices[0]?.delta?.content) {
tokenValue += parsedData.choices[0]?.delta?.content;
}
} catch {
console.warn(`Invalid JSON string skipped: ${line}`);
}
}).filter((item) => item !== null);
if (isDone) {
reader?.cancel();
break;
}
} catch (e) {
console.log("Invalid llm response");
console.log(e);
}
}
yield {
token: {
id: tokenId++,
text: tokenValue ?? "",
logprob: 0,
special: false
},
generated_text: null,
details: null
};
}
}();
};
}
createLogFile(text, namePrefix = "") {
if (!private_env.LOGS_ROOT_FOLDER) {
return;
}
try {
const logsDirectory = resolve(private_env.LOGS_ROOT_FOLDER + "/llama");
if (!existsSync(logsDirectory)) {
mkdirSync(logsDirectory, {
recursive: true
});
}
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "");
const logFilePath = resolve(logsDirectory, `${namePrefix}${timestamp}.json`);
writeFileSync(logFilePath, text);
console.log(`Log file created: ${logFilePath}`);
} catch (e) {
console.log(`Failed to create log file in llama service`);
console.log(e);
}
}
}
class LlmApiServiceFactory {
static createVllm(params) {
return new OpenAiService(params);
}
static createDeepinfra(params) {
return new DeepinfraService(params);
}
static create(params) {
switch (params.type) {
case "vllm-openai":
return this.createVllm(params);
case "deepinfra":
return this.createDeepinfra(params);
default:
throw "Неизвестный тип LLM API";
}
}
}
export { LlmApiServiceFactory as L };
//# sourceMappingURL=LlmApiServiceFactory-9cJF8P5t.js.map