Spaces:
Paused
Paused
File size: 3,359 Bytes
b17a5c8 9264459 2c00ea8 9264459 2c00ea8 e5f4e9a 2c00ea8 e5f4e9a 2c00ea8 c504a2c e5f4e9a 2a808d7 9264459 2c00ea8 2a808d7 2c00ea8 e5f4e9a b17a5c8 2c00ea8 b17a5c8 2c00ea8 e5f4e9a 2c00ea8 e5f4e9a 2c00ea8 3148b2c 2c00ea8 3148b2c 2c00ea8 e5f4e9a 2c00ea8 e5f4e9a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import { defaultEmbeddingModel, embeddingModels } from "$lib/server/embeddingModels";
import type { Conversation } from "$lib/types/Conversation";
import type { MessageUpdate } from "$lib/types/MessageUpdate";
import type { Message } from "$lib/types/Message";
import type { WebSearch, WebSearchScrapedSource } from "$lib/types/WebSearch";
import type { Assistant } from "$lib/types/Assistant";
import { search } from "./search/search";
import { scrape } from "./scrape/scrape";
import { findContextSources } from "./embed/embed";
import { removeParents } from "./markdown/tree";
const MAX_N_PAGES_TO_SCRAPE = 8 as const;
const MAX_N_PAGES_TO_EMBED = 5 as const;
export type AppendUpdate = (message: string, args?: string[], type?: "error" | "update") => void;
const makeAppendUpdate =
(updatePad: (upd: MessageUpdate) => void): AppendUpdate =>
(message, args, type) =>
updatePad({ type: "webSearch", messageType: type ?? "update", message, args });
export async function runWebSearch(
conv: Conversation,
messages: Message[],
updatePad: (upd: MessageUpdate) => void,
ragSettings?: Assistant["rag"]
): Promise<WebSearch> {
const prompt = messages[messages.length - 1].content;
const createdAt = new Date();
const updatedAt = new Date();
const appendUpdate = makeAppendUpdate(updatePad);
try {
const embeddingModel =
embeddingModels.find((m) => m.id === conv.embeddingModel) ?? defaultEmbeddingModel;
if (!embeddingModel) {
throw Error(`Embedding model ${conv.embeddingModel} not available anymore`);
}
// Search the web
const { searchQuery, pages } = await search(messages, ragSettings, appendUpdate);
if (pages.length === 0) throw Error("No results found for this search query");
// Scrape pages
appendUpdate("Browsing search results");
const scrapedPages = await Promise.all(
pages
.slice(0, MAX_N_PAGES_TO_SCRAPE)
.map(scrape(appendUpdate, embeddingModel.chunkCharLength))
).then((allScrapedPages) =>
allScrapedPages
.filter((p): p is WebSearchScrapedSource => Boolean(p))
.filter((p) => p.page.markdownTree.children.length > 0)
.slice(0, MAX_N_PAGES_TO_EMBED)
);
if (!scrapedPages.length) {
throw Error(`No text found in the first ${MAX_N_PAGES_TO_SCRAPE} results`);
}
// Chunk the text of each of the elements and find the most similar chunks to the prompt
appendUpdate("Extracting relevant information");
const contextSources = await findContextSources(scrapedPages, prompt, embeddingModel).then(
(ctxSources) =>
ctxSources.map((source) => ({
...source,
page: { ...source.page, markdownTree: removeParents(source.page.markdownTree) },
}))
);
updatePad({
type: "webSearch",
messageType: "sources",
message: "sources",
sources: contextSources,
});
return {
prompt,
searchQuery,
results: scrapedPages.map(({ page, ...source }) => ({
...source,
page: { ...page, markdownTree: removeParents(page.markdownTree) },
})),
contextSources,
createdAt,
updatedAt,
};
} catch (searchError) {
const message = searchError instanceof Error ? searchError.message : String(searchError);
console.error(message);
appendUpdate("An error occurred", [JSON.stringify(message)], "error");
return {
prompt,
searchQuery: "",
results: [],
contextSources: [],
createdAt,
updatedAt,
};
}
}
|