Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import { MetricsServer } from "$lib/server/metrics"; | |
| import type { WebSearchScrapedSource, WebSearchUsedSource } from "$lib/types/WebSearch"; | |
| import type { EmbeddingBackendModel } from "../../embeddingModels"; | |
| import { getSentenceSimilarity, innerProduct } from "../../sentenceSimilarity"; | |
| import { MarkdownElementType, type MarkdownElement } from "../markdown/types"; | |
| import { stringifyMarkdownElement } from "../markdown/utils/stringify"; | |
| import { getCombinedSentenceSimilarity } from "./combine"; | |
| import { flattenTree } from "./tree"; | |
| const MIN_CHARS = 3_000; | |
| const SOFT_MAX_CHARS = 8_000; | |
| export async function findContextSources( | |
| sources: WebSearchScrapedSource[], | |
| prompt: string, | |
| embeddingModel: EmbeddingBackendModel | |
| ) { | |
| const startTime = Date.now(); | |
| const sourcesMarkdownElems = sources.map((source) => flattenTree(source.page.markdownTree)); | |
| const markdownElems = sourcesMarkdownElems.flat(); | |
| // When using CPU embedding (transformersjs), join sentences together to the max character limit | |
| // to reduce inference time | |
| const embeddingFunc = | |
| embeddingModel.endpoints[0].type === "transformersjs" | |
| ? getCombinedSentenceSimilarity | |
| : getSentenceSimilarity; | |
| const embeddings = await embeddingFunc( | |
| embeddingModel, | |
| prompt, | |
| markdownElems | |
| .map(stringifyMarkdownElement) | |
| // Safety in case the stringified markdown elements are too long | |
| // but chunking should have happened earlier | |
| .map((elem) => elem.slice(0, embeddingModel.chunkCharLength)) | |
| ); | |
| const topEmbeddings = embeddings | |
| .sort((a, b) => a.distance - b.distance) | |
| .filter((embedding) => markdownElems[embedding.idx].type !== MarkdownElementType.Header); | |
| let totalChars = 0; | |
| const selectedMarkdownElems = new Set<MarkdownElement>(); | |
| const selectedEmbeddings: number[][] = []; | |
| for (const embedding of topEmbeddings) { | |
| const elem = markdownElems[embedding.idx]; | |
| // Ignore elements that are too similar to already selected elements | |
| const tooSimilar = selectedEmbeddings.some( | |
| (selectedEmbedding) => innerProduct(selectedEmbedding, embedding.embedding) < 0.01 | |
| ); | |
| if (tooSimilar) continue; | |
| // Add element | |
| if (!selectedMarkdownElems.has(elem)) { | |
| selectedMarkdownElems.add(elem); | |
| selectedEmbeddings.push(embedding.embedding); | |
| totalChars += elem.content.length; | |
| } | |
| // Add element's parent (header) | |
| if (elem.parent && !selectedMarkdownElems.has(elem.parent)) { | |
| selectedMarkdownElems.add(elem.parent); | |
| totalChars += elem.parent.content.length; | |
| } | |
| if (totalChars > SOFT_MAX_CHARS) break; | |
| if (totalChars > MIN_CHARS && embedding.distance > 0.25) break; | |
| } | |
| const contextSources = sourcesMarkdownElems | |
| .map<WebSearchUsedSource>((elems, idx) => { | |
| const sourceSelectedElems = elems.filter((elem) => selectedMarkdownElems.has(elem)); | |
| const context = sourceSelectedElems.map(stringifyMarkdownElement).join("\n"); | |
| const source = sources[idx]; | |
| return { ...source, context }; | |
| }) | |
| .filter((contextSource) => contextSource.context.length > 0); | |
| MetricsServer.getMetrics().webSearch.embeddingDuration.observe(Date.now() - startTime); | |
| return contextSources; | |
| } | |