Spaces:

Fraser
/

piclets

Running

App Files Files Community

piclets / src /lib /services /qwen3Client.ts

Fraser

new setup

d4c424f 27 days ago

raw

history blame

11.4 kB

	/**
	* Qwen3 Client - Drop-in replacement for rwkvClient using Qwen3 HF Space
	* Compatible with existing rwkvClient.predict("/chat", [...]) API
	* Uses proper Gradio Client connection instead of direct HTTP calls
	*/

	interface Qwen3Message {
	role: 'user' \| 'assistant' \| 'system';
	content: string;
	}

	interface Qwen3ClientOptions {
	huggingFaceSpace: string;
	model: string;
	apiKey?: string;
	}

	export class Qwen3Client {
	private options: Qwen3ClientOptions;
	private sessionId: string;
	private gradioClient: any = null;

	constructor(options: Partial<Qwen3ClientOptions> = {}) {
	this.options = {
	huggingFaceSpace: 'Qwen/Qwen3-Demo',
	model: 'qwen2.5-72b-instruct', // Use Qwen2.5-72B for best performance
	...options
	};
	this.sessionId = this.generateSessionId();
	}

	private generateSessionId(): string {
	return Math.random().toString(36).substring(2, 15) + Math.random().toString(36).substring(2, 15);
	}

	/**
	* Initialize Gradio Client connection to Qwen3 Space
	*/
	private async initializeGradioClient(): Promise<void> {
	if (this.gradioClient) {
	return; // Already initialized
	}

	try {
	// Use the same approach as App.svelte - access window.gradioClient
	if (!window.gradioClient?.Client) {
	throw new Error('Gradio Client not available - ensure App.svelte has loaded the client');
	}

	console.log(`🔗 Connecting to ${this.options.huggingFaceSpace}...`);
	this.gradioClient = await window.gradioClient.Client.connect(this.options.huggingFaceSpace);

	console.log(`✅ Connected to Qwen3 space: ${this.options.huggingFaceSpace}`);
	} catch (error) {
	console.error('Failed to initialize Qwen3 Gradio Client:', error);
	throw new Error(`Could not connect to Qwen3 space: ${error}`);
	}
	}

	/**
	* Predict method that mimics rwkvClient.predict("/chat", [...]) API
	* @param endpoint Should be "/chat" for compatibility
	* @param params Array of parameters: [message, chat_history, system_prompt, max_new_tokens, temperature, top_p, top_k, repetition_penalty]
	* @returns Promise<{data: any[]}>
	*/
	async predict(endpoint: string, params: any[]): Promise<{data: any[]}> {
	if (endpoint !== '/chat') {
	throw new Error('Qwen3Client only supports "/chat" endpoint');
	}

	const [
	message,
	chat_history = [],
	system_prompt = "You are a helpful assistant.",
	max_new_tokens = 2048,
	temperature = 0.7,
	top_p = 0.95,
	top_k = 50,
	repetition_penalty = 1.0
	] = params;

	try {
	// Ensure Gradio client is initialized
	await this.initializeGradioClient();

	// Use the proper Gradio Client API to call the add_message function
	const response = await this.callQwen3API(message, {
	sys_prompt: system_prompt,
	model: this.options.model,
	max_new_tokens,
	temperature,
	top_p,
	top_k,
	repetition_penalty
	});

	// Return in the expected format: {data: [response_text]}
	return {
	data: [response]
	};

	} catch (error) {
	console.error('Qwen3Client error:', error);
	throw new Error(`Qwen3 API call failed: ${error}`);
	}
	}

	private async callQwen3API(message: string, options: any): Promise<string> {
	try {
	if (!this.gradioClient) {
	throw new Error('Gradio client not initialized');
	}

	// Prepare settings for the Qwen3 space based on app.py structure
	const settingsFormValue = {
	model: options.model \|\| this.options.model,
	sys_prompt: options.sys_prompt \|\| "You are a helpful assistant.",
	thinking_budget: Math.min(options.max_new_tokens \|\| 20, 38), // Qwen3 has max 38k thinking budget
	temperature: options.temperature \|\| 0.7,
	top_p: options.top_p \|\| 0.95,
	top_k: options.top_k \|\| 50,
	repetition_penalty: options.repetition_penalty \|\| 1.0
	};

	// Thinking button state - disable for faster responses
	const thinkingBtnState = {
	enable_thinking: false
	};

	// Initial state for the conversation
	const stateValue = {
	conversation_contexts: {},
	conversations: [],
	conversation_id: this.sessionId
	};

	console.log(`🤖 Calling Qwen3 add_message with: "${message.substring(0, 50)}..."`);

	// Call the add_message function from the Gradio app
	// Based on app.py line 170: add_message(input_value, settings_form_value, thinking_btn_state_value, state_value)
	const result = await this.gradioClient.predict("/add_message", [
	message, // input_value
	settingsFormValue, // settings_form_value
	thinkingBtnState, // thinking_btn_state_value
	stateValue // state_value
	]);

	console.log('🔍 Raw Qwen3 response:', result);

	// Extract the response text from the Gradio result
	if (result && result.data && Array.isArray(result.data)) {
	// The response format should include the chatbot data
	// Look for the chatbot component data (usually index 2 or 3)
	for (let i = 0; i < result.data.length; i++) {
	const item = result.data[i];
	if (Array.isArray(item) && item.length > 0) {
	// Look for the last assistant message
	const lastMessage = item[item.length - 1];
	if (lastMessage && lastMessage.role === 'assistant' && lastMessage.content) {
	// Extract text content from the structured content
	if (Array.isArray(lastMessage.content)) {
	for (const contentItem of lastMessage.content) {
	if (contentItem.type === 'text' && contentItem.content) {
	console.log('✅ Extracted Qwen3 response:', contentItem.content.substring(0, 100) + '...');
	return contentItem.content;
	}
	}
	} else if (typeof lastMessage.content === 'string') {
	console.log('✅ Extracted Qwen3 response:', lastMessage.content.substring(0, 100) + '...');
	return lastMessage.content;
	}
	}
	}
	}
	}

	// If we can't extract the response, throw an error to trigger fallback
	throw new Error('Could not extract text response from Qwen3 API result');

	} catch (error) {
	console.warn('Qwen3 Gradio API call failed, using fallback strategy:', error);

	// Development fallback: Generate a reasonable response based on the input
	// If it's a JSON generation request, provide a structured response
	if (message.includes('JSON') \|\| message.includes('json') \|\| options.sys_prompt?.includes('JSON')) {
	if (message.includes('monster') \|\| message.includes('stats')) {
	return this.generateFallbackMonsterStats(message);
	}
	return '```json\n{"status": "Qwen3 temporarily unavailable", "using_fallback": true}\n```';
	}

	// For text generation, provide a reasonable response
	if (message.includes('visual description') \|\| message.includes('image generation')) {
	return this.generateFallbackImageDescription(message);
	}

	return `I understand you're asking about: "${message.substring(0, 100)}..."\n\nHowever, I'm currently unable to connect to the Qwen3 service. The system will automatically fall back to an alternative model for your request.`;
	}
	}

	private generateFallbackMonsterStats(userMessage: string): string {
	// Extract key information from the user message to generate reasonable stats
	const isRare = userMessage.toLowerCase().includes('rare') \|\| userMessage.toLowerCase().includes('legendary');
	const isCommon = userMessage.toLowerCase().includes('common') \|\| userMessage.toLowerCase().includes('basic');

	let baseStats = isRare ? 70 : isCommon ? 25 : 45;
	let variation = isRare ? 25 : isCommon ? 15 : 20;

	const stats = {
	rarity: isRare ? 'rare' : isCommon ? 'common' : 'uncommon',
	picletType: 'beast', // Default fallback
	height: Math.round((Math.random() * 3 + 0.5) * 10) / 10,
	weight: Math.round((Math.random() * 100 + 10) * 10) / 10,
	HP: Math.round(Math.max(10, Math.min(100, baseStats + Math.random() * variation - variation/2))),
	defence: Math.round(Math.max(10, Math.min(100, baseStats + Math.random() * variation - variation/2))),
	attack: Math.round(Math.max(10, Math.min(100, baseStats + Math.random() * variation - variation/2))),
	speed: Math.round(Math.max(10, Math.min(100, baseStats + Math.random() * variation - variation/2))),
	monsterLore: "A mysterious creature discovered through advanced AI analysis. Its true nature remains to be studied.",
	specialPassiveTraitDescription: "Adaptive Resilience - This creature adapts to its environment.",
	attackActionName: "Strike",
	attackActionDescription: "A focused attack that deals moderate damage.",
	buffActionName: "Focus",
	buffActionDescription: "Increases concentration, boosting attack power temporarily.",
	debuffActionName: "Intimidate",
	debuffActionDescription: "Reduces the opponent's confidence, lowering their attack.",
	specialActionName: "Signature Move",
	specialActionDescription: "A powerful technique unique to this creature."
	};

	return '```json\n' + JSON.stringify(stats, null, 2) + '\n```';
	}

	private generateFallbackImageDescription(userMessage: string): string {
	// Generate a basic visual description based on common elements
	const colors = ['vibrant blue', 'emerald green', 'golden yellow', 'deep purple', 'crimson red'];
	const features = ['large expressive eyes', 'sleek form', 'distinctive markings', 'graceful limbs'];

	const color = colors[Math.floor(Math.random() * colors.length)];
	const feature = features[Math.floor(Math.random() * features.length)];

	return `A ${color} creature with ${feature}, designed in an anime-inspired style with clean lines and appealing proportions.`;
	}

	/**
	* Test connection to Qwen3 service
	*/
	async testConnection(): Promise<boolean> {
	try {
	// Try to initialize the Gradio client first
	await this.initializeGradioClient();

	// Test with a simple message
	const result = await this.predict('/chat', [
	'Hello, are you working? Please respond with just "Yes" if you can receive this message.',
	[],
	'You are a helpful assistant. Respond very briefly with just "Yes" if you can receive messages.',
	50, // Small token limit for test
	0.7,
	0.95,
	50,
	1.0
	]);

	const response = result.data && result.data[0] && typeof result.data[0] === 'string' ? result.data[0] : '';
	const isWorking = response.length > 0 && !response.includes('temporarily unavailable');

	console.log(`🔍 Qwen3 connection test result: ${isWorking ? 'PASS' : 'FAIL'}`);
	console.log(`📝 Test response: "${response.substring(0, 50)}..."`);

	return isWorking;
	} catch (error) {
	console.error('Qwen3 connection test failed:', error);
	return false;
	}
	}
	}

	// Export a default instance
	export const qwen3Client = new Qwen3Client();