import { existsSync, mkdirSync, writeFileSync } from 'fs'; import { resolve } from 'path'; import { d as private_env } from './shared-server-49TKSBDM.js'; import dns from 'node:dns'; var UserToLlmRequestTypeEnum = /* @__PURE__ */ ((UserToLlmRequestTypeEnum2) => { UserToLlmRequestTypeEnum2[UserToLlmRequestTypeEnum2["Regular"] = 0] = "Regular"; UserToLlmRequestTypeEnum2[UserToLlmRequestTypeEnum2["Clarification"] = 10] = "Clarification"; UserToLlmRequestTypeEnum2[UserToLlmRequestTypeEnum2["UserSelectedSearchResults"] = 20] = "UserSelectedSearchResults"; UserToLlmRequestTypeEnum2[UserToLlmRequestTypeEnum2["ClarificationWithUserSelectedSearchResults"] = 30] = "ClarificationWithUserSelectedSearchResults"; UserToLlmRequestTypeEnum2[UserToLlmRequestTypeEnum2["Raw"] = 40] = "Raw"; return UserToLlmRequestTypeEnum2; })(UserToLlmRequestTypeEnum || {}); dns.setDefaultResultOrder("ipv4first"); class OpenAiService { url = ""; llmParams; constructor(params) { this.url = params.url; this.llmParams = params; } async getModels() { try { const response = await fetch(`${this.url}/v1/models`, { method: "GET", headers: { "Content-Type": "application/json" } }); if (response.ok) { let json = await response.json(); let result = json["data"].map((o) => o["id"]); return result; } } catch (error) { console.error("OpenAiService.getModels error:"); console.error(JSON.parse(JSON.stringify(error))); } return []; } async health() { try { const response = await fetch(`${this.url}/health`, { method: "GET", headers: { "Content-Type": "application/json" } }); if (response.ok) { return "ok"; } } catch (error) { console.error("OpenAiService.health error:"); console.error(JSON.parse(JSON.stringify(error))); } return "unavailable"; } async tokenize(prompt, abortController) { const model = (await this.getModels())[0]; const actualPrompt = this.applyLlmTemplateToPrompt(prompt); const requestData = { model, prompt: actualPrompt, add_special_tokens: false }; const response = await fetch(`${this.url}/tokenize`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(requestData), signal: abortController.signal }); if (response.ok) { const data = await response.json(); if (data.tokens) { return { tokens: data.tokens, maxLength: data.max_model_len }; } } else if (response.status === 404) { console.log("Tokenization endpoint not found (404)."); } else { console.log(`Failed to tokenize:${await response.text()}`); } return null; } /** * Не использовать пока что эту функцию, т.к. нет возможности убрать шаблон чата * @param tokens * @param abortController * @returns */ async detokenize(tokens, abortController) { const model = (await this.getModels())[0]; tokens = tokens || []; const requestData = { model, tokens }; const response = await fetch(`${this.url}/detokenize`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(requestData), signal: abortController.signal }); if (response.ok) { const data = await response.json(); if (data.prompt !== void 0) { return data.prompt.trim(); } } else if (response.status === 404) { console.log("Detokenization endpoint not found (404)."); } else { console.log(`Failed to detokenize`); console.log(await response.json()); } return null; } /** * Формирует запрос к ллм с параметрами и массивом сообщений * @param prompt Промпт, который будет отправлен в ллм в сообщении с ролью user. * @param requestType Тип запроса для выбора предопределенного системного промпта. * @param systemPrompt Кастомный системный промпт для нестандартных случае. Например, "почемучки" (InvestigatorService) использует этот параметр. Сработает только при requestType = UserToLlmRequestTypeEnumю.Raw * @returns */ async createRequest(prompt, requestType, systemPrompt) { const llmParams = this.llmParams; const model = (await this.getModels())[0]; const request = { "stream": true, "model": model }; if (llmParams.predict_params?.stop != void 0 && llmParams.predict_params.stop.length > 0) { const nonEmptyStop = llmParams.predict_params.stop.filter((o) => o != ""); if (nonEmptyStop.length > 0) { request["stop"] = llmParams.predict_params.stop; } } if (llmParams.predict_params?.n_predict != null) { request["max_tokens"] = Number(llmParams.predict_params?.n_predict); } request["temperature"] = llmParams.predict_params?.temperature || 0; if (llmParams.predict_params?.top_k != null) { request["top_k"] = Number(llmParams.predict_params.top_k); } if (llmParams.predict_params?.top_p != null) { request["top_p"] = Number(llmParams.predict_params.top_p); } if (llmParams.predict_params?.min_p != null) { request["min_p"] = Number(llmParams.predict_params.min_p); } if (llmParams.predict_params?.seed != null) { request["seed"] = Number(llmParams.predict_params.seed); } if (llmParams.predict_params?.n_keep != null) { request["n_keep"] = Number(llmParams.predict_params.n_keep); } if (llmParams.predict_params?.cache_prompt != null) { request["cache_prompt"] = Boolean(llmParams.predict_params.cache_prompt); } if (llmParams.predict_params?.repeat_penalty != null) { request["repetition_penalty"] = Number(llmParams.predict_params.repeat_penalty); } if (llmParams.predict_params?.repeat_last_n != null) { request["repeat_last_n"] = Number(llmParams.predict_params.repeat_last_n); } if (llmParams.predict_params?.presence_penalty != null) { request["presence_penalty"] = Number(llmParams.predict_params.presence_penalty); } if (llmParams.predict_params?.frequency_penalty != null) { request["frequency_penalty"] = Number(llmParams.predict_params.frequency_penalty); } request["messages"] = this.createMessages(prompt, requestType, systemPrompt); return request; } createMessages(prompt, requestType, systemPrompt) { const actualPrompt = this.applyLlmTemplateToPrompt(prompt); let messages = []; const finalSystemPrompt = this.selectSystemPrompt(requestType, systemPrompt); if (finalSystemPrompt) { messages.push({ role: "system", content: finalSystemPrompt }); } messages.push({ role: "user", content: actualPrompt }); return messages; } selectSystemPrompt(requestType, systemPrompt) { let prompt = ""; switch (requestType) { case UserToLlmRequestTypeEnum.Regular: prompt = this.llmParams.predict_params?.system_prompt || ""; break; case UserToLlmRequestTypeEnum.Clarification: prompt = this.llmParams.predict_params?.clarification_system_prompt || ""; break; case UserToLlmRequestTypeEnum.ClarificationWithUserSelectedSearchResults: prompt = this.llmParams.predict_params?.user_selected_sources_clarification_system_prompt || ""; break; case UserToLlmRequestTypeEnum.UserSelectedSearchResults: prompt = this.llmParams.predict_params?.user_selected_sources_system_prompt || ""; break; case UserToLlmRequestTypeEnum.Raw: prompt = systemPrompt || ""; break; } return prompt; } applyLlmTemplateToPrompt(prompt) { let actualPrompt = prompt; if (this.llmParams.template != void 0) { actualPrompt = this.llmParams.template.replace("{{PROMPT}}", actualPrompt); } return actualPrompt; } async trimTokenizedText(sources, userRequest, { abortController }) { let sourcesTokensData = await this.tokenize(sources, abortController); const maxTokenCount = sourcesTokensData.maxLength; let systemPromptTokenCount = 0; if (this.llmParams.predict_params?.system_prompt) { systemPromptTokenCount = (await this.tokenize(this.llmParams.predict_params?.system_prompt, abortController))?.length || 0; } const originalTokenCount = sourcesTokensData?.tokens.length || -1; const auxTokensData = await this.tokenize(this.applyLlmTemplateToPrompt(this.llmParams.predict_params?.user_prompt || "") + userRequest, abortController); let maxLength = Number(maxTokenCount) - Number(this.llmParams.predict_params?.n_predict) - (auxTokensData?.tokens.length ?? 0) - systemPromptTokenCount; maxLength = maxLength < 0 ? 0 : maxLength; if (sourcesTokensData !== null && sourcesTokensData.tokens) { sourcesTokensData.tokens = sourcesTokensData.tokens.slice(0, maxLength); const detokenizedPrompt = await this.detokenize(sourcesTokensData.tokens, abortController); if (detokenizedPrompt !== null) { sources = detokenizedPrompt; } else { sources = sources.substring(0, maxLength); } } else { sources = sources.substring(0, maxLength); } return { result: sources, originalTokenCount, slicedTokenCount: sourcesTokensData?.tokens.length }; } predict({ requestType, abortController }) { return async ({ prompt, systemPrompt }) => { const request = await this.createRequest(prompt, requestType, systemPrompt); console.log(`Predict request. Url: ${this.url}`); console.log(`Messages: ${JSON.stringify(request["messages"])}`); let r; while (true) { r = await fetch(`${this.url}/v1/chat/completions`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(request), signal: abortController.signal }); if (r.status === 404) { if (!private_env.LLM_API_404_RETRY_INTERVAL) { break; } console.log(`Received 404, retrying after ${private_env.LLM_API_404_RETRY_INTERVAL} seconds...`); await new Promise((resolve2) => setTimeout(resolve2, Number(private_env.LLM_API_404_RETRY_INTERVAL) * 1e3)); } else { break; } } if (!r.ok) { throw new Error(`Failed to generate text: ${await r.text()}`); } const encoder = new TextDecoderStream(); const reader = await r.body?.pipeThrough(encoder).getReader(); return async function* () { let tokenId = 0; while (true) { const out = await reader?.read() ?? { done: false, value: void 0 }; if (out.done) { reader?.cancel(); break; } if (!out.value) { reader?.cancel(); break; } let tokenValue = ""; if (out.value.startsWith("data: ")) { try { let isDone = false; const result = out.value.trim().split(/\n/).map((line) => { if (line.includes("data: [DONE]")) { isDone = true; } try { const parsedData = JSON.parse(line.replace(/^data: /, "")); if (parsedData.choices && parsedData.choices.length > 0 && parsedData.choices[0]?.delta?.content) { tokenValue += parsedData.choices[0]?.delta?.content; } } catch { } }).filter((item) => item !== null); if (isDone) { reader?.cancel(); break; } } catch (e) { console.log("Invalid llm response"); console.log(e); } } console.log(tokenValue); yield { token: { id: tokenId++, text: tokenValue ?? "", logprob: 0, special: false }, generated_text: null, details: null }; } }(); }; } createLogFile(text, namePrefix = "") { if (!private_env.LOGS_ROOT_FOLDER) { return; } try { const logsDirectory = resolve(private_env.LOGS_ROOT_FOLDER + "/llama"); if (!existsSync(logsDirectory)) { mkdirSync(logsDirectory, { recursive: true }); } const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, ""); const logFilePath = resolve(logsDirectory, `${namePrefix}${timestamp}.json`); writeFileSync(logFilePath, text); console.log(`Log file created: ${logFilePath}`); } catch (e) { console.log(`Failed to create log file in llama service`); console.log(e); } } } export { OpenAiService as O, UserToLlmRequestTypeEnum as U }; //# sourceMappingURL=OpenAiService-05Srl9E-.js.map