Spaces:

huggingfacejs
/

inference-widgets

Running on CPU Upgrade

App Files Files Community

machineuser commited on Mar 6, 2024

Commit

5a13705

1 Parent(s): 7a24123

Sync widgets demo

Browse files

Files changed (2) hide show

packages/widgets/src/lib/components/InferenceWidget/widgets/ConversationalWidget/ConversationalWidget.svelte +18 -12
packages/widgets/src/routes/+page.svelte +10 -15

packages/widgets/src/lib/components/InferenceWidget/widgets/ConversationalWidget/ConversationalWidget.svelte CHANGED Viewed

@@ -1,6 +1,7 @@
 <script lang="ts">
 	import { onMount, tick } from "svelte";
 	import type { WidgetProps, ExampleRunOpts, InferenceRunOpts } from "../../shared/types.js";
 	import { Template } from "@huggingface/jinja";
 	import type {
 		SpecialTokensMap,
@@ -22,7 +23,7 @@
 	import { addInferenceParameters, updateUrl } from "../../shared/helpers.js";
 	import { widgetStates, getTgiSupportedModels } from "../../stores.js";
 	import type { Writable } from "svelte/store";
-	import { isChatInput, isObject, isTextInput } from "../../shared/inputValidation.js";
 	import { isValidOutputText } from "../../shared/outputValidation.js";
 	export let apiToken: WidgetProps["apiToken"];
@@ -41,7 +42,7 @@
 	let messages: ChatMessage[] = [];
 	let error: string = "";
-	let isLoading = false;
 	let outputJson: string;
 	let text = "";
@@ -52,9 +53,6 @@
 	// Check config and compile template
 	onMount(() => {
-		(async () => {
-			tgiSupportedModels = await getTgiSupportedModels(apiUrl);
-		})();
 		const config = model.config;
 		if (config === undefined) {
 			error = "Model config not found";
@@ -79,7 +77,7 @@
 			return;
 		}
-		inferenceClient = new HfInference();
 	});
 	async function handleNewMessage(): Promise<void> {
@@ -108,6 +106,8 @@
 	async function getOutput({
 		withModelLoading = false,
 		exampleOutput = undefined,
 	}: InferenceRunOpts<WidgetExampleOutputText> = {}) {
 		if (exampleOutput) {
@@ -151,6 +151,15 @@
 		text = "";
 		error = "";
 		try {
 			if ($tgiSupportedModels?.has(model.id)) {
 				console.debug("Starting text generation using the TGI streaming API");
 				let newMessage = {
@@ -164,7 +173,7 @@
 						model: model.id,
 						accessToken: apiToken,
 					},
-					{ signal: abort?.signal }
 				);
 				for await (const newToken of tokenStream) {
 					if (newToken.token.special) continue;
@@ -175,10 +184,7 @@
 			} else {
 				console.debug("Starting text generation using the synchronous API");
 				input.parameters.max_new_tokens = 100;
-				const output = await inferenceClient.textGeneration(
-					{ ...input, model: model.id, accessToken: apiToken },
-					{ includeCredentials, dont_load_model: !withModelLoading, signal: abort?.signal }
-				);
 				messages = [...messages, { role: "assistant", content: output.generated_text }];
 				await tick();
 			}
@@ -222,7 +228,7 @@
 				return;
 			}
 			const exampleOutput = example.output;
-			getOutput({ ...opts.inferenceOpts, exampleOutput });
 		} finally {
 			isLoading = false;
 		}

 <script lang="ts">
 	import { onMount, tick } from "svelte";
 	import type { WidgetProps, ExampleRunOpts, InferenceRunOpts } from "../../shared/types.js";
+	import type { Options } from "@huggingface/inference";
 	import { Template } from "@huggingface/jinja";
 	import type {
 		SpecialTokensMap,
 	import { addInferenceParameters, updateUrl } from "../../shared/helpers.js";
 	import { widgetStates, getTgiSupportedModels } from "../../stores.js";
 	import type { Writable } from "svelte/store";
+	import { isChatInput, isTextInput } from "../../shared/inputValidation.js";
 	import { isValidOutputText } from "../../shared/outputValidation.js";
 	export let apiToken: WidgetProps["apiToken"];
 	let messages: ChatMessage[] = [];
 	let error: string = "";
+	let isLoading: boolean = false;
 	let outputJson: string;
 	let text = "";
 	// Check config and compile template
 	onMount(() => {
 		const config = model.config;
 		if (config === undefined) {
 			error = "Model config not found";
 			return;
 		}
+		inferenceClient = new HfInference(apiToken);
 	});
 	async function handleNewMessage(): Promise<void> {
 	async function getOutput({
 		withModelLoading = false,
+		isOnLoadCall = false,
+		useCache = true,
 		exampleOutput = undefined,
 	}: InferenceRunOpts<WidgetExampleOutputText> = {}) {
 		if (exampleOutput) {
 		text = "";
 		error = "";
 		try {
+			const opts = {
+				dont_load_model: isOnLoadCall,
+				includeCredentials,
+				signal: abort?.signal,
+				use_cache: useCache,
+				wait_for_model: withModelLoading,
+			} satisfies Options;
+			tgiSupportedModels = await getTgiSupportedModels(apiUrl);
 			if ($tgiSupportedModels?.has(model.id)) {
 				console.debug("Starting text generation using the TGI streaming API");
 				let newMessage = {
 						model: model.id,
 						accessToken: apiToken,
 					},
+					opts
 				);
 				for await (const newToken of tokenStream) {
 					if (newToken.token.special) continue;
 			} else {
 				console.debug("Starting text generation using the synchronous API");
 				input.parameters.max_new_tokens = 100;
+				const output = await inferenceClient.textGeneration({ ...input, model: model.id, accessToken: apiToken }, opts);
 				messages = [...messages, { role: "assistant", content: output.generated_text }];
 				await tick();
 			}
 				return;
 			}
 			const exampleOutput = example.output;
+			await getOutput({ ...opts.inferenceOpts, exampleOutput });
 		} finally {
 			isLoading = false;
 		}

packages/widgets/src/routes/+page.svelte CHANGED Viewed

@@ -45,29 +45,17 @@
 					bos_token: "<s>",
 					eos_token: "</s>",
 					unk_token: "<unk>",
-					pad_token: undefined,
 				},
 			},
 			widgetData: [
 				{ text: "This is a text-only example", example_title: "Text only" },
 				{
-					messages: [
-						{
-							content: "You are a helpful assistant replying in very brief and straight-to-the-point answers.",
-							role: "system",
-						},
-						{ content: "Please exlain QCD in very few words", role: "user" },
-					],
 					example_title: "Chat messages",
 				},
 				{
-					messages: [
-						{
-							content: "You are a helpful assistant replying in very brief and straight-to-the-point answers.",
-							role: "system",
-						},
-						{ content: "Please exlain QCD in very few words", role: "user" },
-					],
 					output: {
 						text: "QCD is the physics of strong force and small particles.",
 					},
@@ -80,6 +68,13 @@
 					},
 					example_title: "Text only with Output",
 				},
 			],
 		},
 		{

 					bos_token: "<s>",
 					eos_token: "</s>",
 					unk_token: "<unk>",
+					pad_token: null,
 				},
 			},
 			widgetData: [
 				{ text: "This is a text-only example", example_title: "Text only" },
 				{
+					messages: [{ content: "Please exlain QCD in very few words", role: "user" }],
 					example_title: "Chat messages",
 				},
 				{
+					messages: [{ content: "Please exlain QCD in very few words", role: "user" }],
 					output: {
 						text: "QCD is the physics of strong force and small particles.",
 					},
 					},
 					example_title: "Text only with Output",
 				},
+				{
+					example_title: "Invalid example - unsupported role",
+					messages: [
+						{ role: "system", content: "This will fail because of the chat template" },
+						{ role: "user", content: "What's your favorite condiment?" },
+					],
+				},
 			],
 		},
 		{