import { O as OpenAiService, U as UserToLlmRequestTypeEnum } from './OpenAiService-05Srl9E-.js'; import { existsSync, mkdirSync, writeFileSync } from 'fs'; import { resolve } from 'path'; import { d as private_env } from './shared-server-49TKSBDM.js'; import dns from 'node:dns'; dns.setDefaultResultOrder("ipv4first"); class DeepinfraService { url = ""; llmParams; apiKey; constructor(params) { this.url = params.url; this.llmParams = params; this.apiKey = private_env.DEEPINFRA_API_KEY; } _getHeaders() { let headers = { "Content-Type": "application/json" }; if (this.apiKey) { headers["Authorization"] = this.apiKey; } return headers; } async getModels() { try { const response = await fetch(`${this.url}/v1/openai/models`, { method: "GET", headers: this._getHeaders() }); if (response.ok) { let json = await response.json(); let result = json["data"].map((o) => o["id"]); return result; } } catch (error) { console.error("OpenAiService.getModels error:"); console.error(JSON.parse(JSON.stringify(error))); } return []; } async health() { return "ok"; } async tokenize(prompt, abortController) { return null; } /** * Не использовать пока что эту функцию, т.к. нет возможности убрать шаблон чата * @param tokens * @param abortController * @returns */ async detokenize(tokens, abortController) { return null; } /** * Формирует запрос к ллм с параметрами и массивом сообщений * @param prompt Промпт, который будет отправлен в ллм в сообщении с ролью user. * @param requestType Тип запроса для выбора предопределенного системного промпта. * @param systemPrompt Кастомный системный промпт для нестандартных случае. Например, "почемучки" (InvestigatorService) использует этот параметр. Сработает только при requestType = UserToLlmRequestTypeEnumю.Raw * @returns */ async createRequest(prompt, requestType, systemPrompt) { const llmParams = this.llmParams; const request = { "stream": true, "model": llmParams.model }; if (llmParams.predict_params?.stop != void 0 && llmParams.predict_params.stop.length > 0) { const nonEmptyStop = llmParams.predict_params.stop.filter((o) => o != ""); if (nonEmptyStop.length > 0) { request["stop"] = llmParams.predict_params.stop; } } if (llmParams.predict_params?.n_predict != null) { request["max_tokens"] = Number(llmParams.predict_params?.n_predict); } request["temperature"] = llmParams.predict_params?.temperature || 0; if (llmParams.predict_params?.top_k != null) { request["top_k"] = Number(llmParams.predict_params.top_k); } if (llmParams.predict_params?.top_p != null) { request["top_p"] = Number(llmParams.predict_params.top_p); } if (llmParams.predict_params?.min_p != null) { request["min_p"] = Number(llmParams.predict_params.min_p); } if (llmParams.predict_params?.seed != null) { request["seed"] = Number(llmParams.predict_params.seed); } if (llmParams.predict_params?.n_keep != null) { request["n_keep"] = Number(llmParams.predict_params.n_keep); } if (llmParams.predict_params?.cache_prompt != null) { request["cache_prompt"] = Boolean(llmParams.predict_params.cache_prompt); } if (llmParams.predict_params?.repeat_penalty != null) { request["repetition_penalty"] = Number(llmParams.predict_params.repeat_penalty); } if (llmParams.predict_params?.repeat_last_n != null) { request["repeat_last_n"] = Number(llmParams.predict_params.repeat_last_n); } if (llmParams.predict_params?.presence_penalty != null) { request["presence_penalty"] = Number(llmParams.predict_params.presence_penalty); } if (llmParams.predict_params?.frequency_penalty != null) { request["frequency_penalty"] = Number(llmParams.predict_params.frequency_penalty); } request["messages"] = this.createMessages(prompt, requestType, systemPrompt); return request; } createMessages(prompt, requestType, systemPrompt) { const actualPrompt = this.applyLlmTemplateToPrompt(prompt); let messages = []; const finalSystemPrompt = this.selectSystemPrompt(requestType, systemPrompt); if (finalSystemPrompt) { messages.push({ role: "system", content: finalSystemPrompt }); } messages.push({ role: "user", content: actualPrompt }); return messages; } selectSystemPrompt(requestType, systemPrompt) { let prompt = ""; switch (requestType) { case UserToLlmRequestTypeEnum.Regular: prompt = this.llmParams.predict_params?.system_prompt || ""; break; case UserToLlmRequestTypeEnum.Clarification: prompt = this.llmParams.predict_params?.clarification_system_prompt || ""; break; case UserToLlmRequestTypeEnum.ClarificationWithUserSelectedSearchResults: prompt = this.llmParams.predict_params?.user_selected_sources_clarification_system_prompt || ""; break; case UserToLlmRequestTypeEnum.UserSelectedSearchResults: prompt = this.llmParams.predict_params?.user_selected_sources_system_prompt || ""; break; case UserToLlmRequestTypeEnum.Raw: prompt = systemPrompt || ""; break; } return prompt; } applyLlmTemplateToPrompt(prompt) { let actualPrompt = prompt; if (this.llmParams.template != void 0) { actualPrompt = this.llmParams.template.replace("{{PROMPT}}", actualPrompt); } return actualPrompt; } async trimTokenizedText(sources, userRequest, { abortController }) { return { result: sources, originalTokenCount: 0, slicedTokenCount: 0 }; } predict({ requestType, abortController }) { return async ({ prompt, systemPrompt }) => { const request = await this.createRequest(prompt, requestType, systemPrompt); console.log(`Predict request. Url: ${this.url}`); console.log(`Messages: ${JSON.stringify(request["messages"])}`); let r = await fetch(`${this.url}/v1/openai/chat/completions`, { method: "POST", headers: this._getHeaders(), body: JSON.stringify(request), signal: abortController.signal }); if (!r.ok) { throw new Error(`Failed to generate text: ${await r.text()}`); } const encoder = new TextDecoderStream(); const reader = await r.body?.pipeThrough(encoder).getReader(); return async function* () { let tokenId = 0; while (true) { const out = await reader?.read() ?? { done: false, value: void 0 }; if (out.done) { reader?.cancel(); break; } if (!out.value) { reader?.cancel(); break; } let tokenValue = ""; if (out.value.startsWith("data: ")) { try { let isDone = false; const result = out.value.trim().split(/\n/).map((line) => { if (line.includes("data: [DONE]")) { isDone = true; return null; } try { const parsedData = JSON.parse(line.replace(/^data: /, "")); if (parsedData.choices && parsedData.choices.length > 0 && parsedData.choices[0]?.delta?.content) { tokenValue += parsedData.choices[0]?.delta?.content; } } catch { console.warn(`Invalid JSON string skipped: ${line}`); } }).filter((item) => item !== null); if (isDone) { reader?.cancel(); break; } } catch (e) { console.log("Invalid llm response"); console.log(e); } } yield { token: { id: tokenId++, text: tokenValue ?? "", logprob: 0, special: false }, generated_text: null, details: null }; } }(); }; } createLogFile(text, namePrefix = "") { if (!private_env.LOGS_ROOT_FOLDER) { return; } try { const logsDirectory = resolve(private_env.LOGS_ROOT_FOLDER + "/llama"); if (!existsSync(logsDirectory)) { mkdirSync(logsDirectory, { recursive: true }); } const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, ""); const logFilePath = resolve(logsDirectory, `${namePrefix}${timestamp}.json`); writeFileSync(logFilePath, text); console.log(`Log file created: ${logFilePath}`); } catch (e) { console.log(`Failed to create log file in llama service`); console.log(e); } } } class LlmApiServiceFactory { static createVllm(params) { return new OpenAiService(params); } static createDeepinfra(params) { return new DeepinfraService(params); } static create(params) { switch (params.type) { case "vllm-openai": return this.createVllm(params); case "deepinfra": return this.createDeepinfra(params); default: throw "Неизвестный тип LLM API"; } } } export { LlmApiServiceFactory as L }; //# sourceMappingURL=LlmApiServiceFactory-9cJF8P5t.js.map