Spaces:
Sleeping
Sleeping
fix: improve randomization and constrain prompts further
Browse files- Add random offset (0-5000) for preloaded books instead of always starting at 0
- Increase random range from 1000 to 10000 for streaming books
- Add session-based duplicate tracking to prevent same book twice
- Tighten prompt constraints (10-20 word limits instead of 20-25)
- Add stricter format enforcement for all question types
- Clear used book cache when all books exhausted
- src/bookDataService.js +48 -11
- src/conversationManager.js +4 -4
src/bookDataService.js
CHANGED
@@ -9,6 +9,7 @@ class HuggingFaceDatasetService {
|
|
9 |
this.streamingEnabled = false;
|
10 |
this.cache = new Map();
|
11 |
this.preloadedBooks = [];
|
|
|
12 |
}
|
13 |
|
14 |
// Local fallback books for when HF streaming is unavailable
|
@@ -129,8 +130,9 @@ class HuggingFaceDatasetService {
|
|
129 |
if (!this.streamingEnabled) return;
|
130 |
|
131 |
try {
|
132 |
-
//
|
133 |
-
const
|
|
|
134 |
const response = await fetch(url);
|
135 |
|
136 |
if (response.ok) {
|
@@ -354,16 +356,51 @@ class HuggingFaceDatasetService {
|
|
354 |
throw new Error('Dataset not loaded');
|
355 |
}
|
356 |
|
357 |
-
//
|
358 |
-
|
359 |
-
|
360 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
361 |
}
|
362 |
|
363 |
-
//
|
364 |
-
|
365 |
-
|
366 |
-
return
|
|
|
|
|
|
|
|
|
|
|
367 |
}
|
368 |
|
369 |
async getStreamingBook() {
|
@@ -375,7 +412,7 @@ class HuggingFaceDatasetService {
|
|
375 |
|
376 |
// If no preloaded books, try to fetch directly
|
377 |
try {
|
378 |
-
const offset = Math.floor(Math.random() *
|
379 |
const url = `${this.apiBase}/rows?dataset=${this.datasetName}&config=default&split=en&offset=${offset}&length=1`;
|
380 |
const response = await fetch(url);
|
381 |
|
|
|
9 |
this.streamingEnabled = false;
|
10 |
this.cache = new Map();
|
11 |
this.preloadedBooks = [];
|
12 |
+
this.usedBooks = new Set(); // Track books used this session
|
13 |
}
|
14 |
|
15 |
// Local fallback books for when HF streaming is unavailable
|
|
|
130 |
if (!this.streamingEnabled) return;
|
131 |
|
132 |
try {
|
133 |
+
// Use random offset to avoid always getting the same books
|
134 |
+
const randomOffset = Math.floor(Math.random() * 5000); // Random start point in dataset
|
135 |
+
const url = `${this.apiBase}/rows?dataset=${this.datasetName}&config=default&split=en&offset=${randomOffset}&length=${count}`;
|
136 |
const response = await fetch(url);
|
137 |
|
138 |
if (response.ok) {
|
|
|
356 |
throw new Error('Dataset not loaded');
|
357 |
}
|
358 |
|
359 |
+
// Try multiple times to get an unused book
|
360 |
+
for (let attempt = 0; attempt < 10; attempt++) {
|
361 |
+
let book = null;
|
362 |
+
|
363 |
+
// Prioritize preloaded books for fast access (90% chance)
|
364 |
+
if (this.streamingEnabled && this.preloadedBooks.length > 0 && Math.random() > 0.1) {
|
365 |
+
const availableBooks = this.preloadedBooks.filter(book =>
|
366 |
+
!this.usedBooks.has(this.getBookId(book))
|
367 |
+
);
|
368 |
+
|
369 |
+
if (availableBooks.length > 0) {
|
370 |
+
const randomIndex = Math.floor(Math.random() * availableBooks.length);
|
371 |
+
book = availableBooks[randomIndex];
|
372 |
+
} else {
|
373 |
+
// All preloaded books used, try streaming
|
374 |
+
book = await this.getStreamingBook();
|
375 |
+
}
|
376 |
+
} else {
|
377 |
+
// Use local samples for remaining 10% + fallback
|
378 |
+
const fallbackBooks = this.books.length > 0 ? this.books : this.getSampleBooks();
|
379 |
+
const availableBooks = fallbackBooks.filter(book =>
|
380 |
+
!this.usedBooks.has(this.getBookId(book))
|
381 |
+
);
|
382 |
+
|
383 |
+
if (availableBooks.length > 0) {
|
384 |
+
const randomIndex = Math.floor(Math.random() * availableBooks.length);
|
385 |
+
book = availableBooks[randomIndex];
|
386 |
+
}
|
387 |
+
}
|
388 |
+
|
389 |
+
if (book && !this.usedBooks.has(this.getBookId(book))) {
|
390 |
+
this.usedBooks.add(this.getBookId(book));
|
391 |
+
return book;
|
392 |
+
}
|
393 |
}
|
394 |
|
395 |
+
// If all attempts failed, clear used books and start over
|
396 |
+
this.usedBooks.clear();
|
397 |
+
console.log('All books used, cleared used book cache');
|
398 |
+
return this.getRandomBook();
|
399 |
+
}
|
400 |
+
|
401 |
+
getBookId(book) {
|
402 |
+
// Create unique ID from title and author to track duplicates
|
403 |
+
return `${book.title}_${book.author}`.replace(/\s+/g, '_').toLowerCase();
|
404 |
}
|
405 |
|
406 |
async getStreamingBook() {
|
|
|
412 |
|
413 |
// If no preloaded books, try to fetch directly
|
414 |
try {
|
415 |
+
const offset = Math.floor(Math.random() * 10000); // Much larger random range
|
416 |
const url = `${this.apiBase}/rows?dataset=${this.datasetName}&config=default&split=en&offset=${offset}&length=1`;
|
417 |
const response = await fetch(url);
|
418 |
|
src/conversationManager.js
CHANGED
@@ -104,13 +104,13 @@ class ChatService {
|
|
104 |
const baseContext = `From "${bookTitle}" by ${author}: "${sentence}"`;
|
105 |
|
106 |
const prompts = {
|
107 |
-
part_of_speech: `${baseContext}\n\nRespond with exactly
|
108 |
|
109 |
-
sentence_role: `${baseContext}\n\nPoint to specific words around the blank.
|
110 |
|
111 |
-
word_category: `${baseContext}\n\nStart with exactly
|
112 |
|
113 |
-
synonym: `${baseContext}\n\
|
114 |
};
|
115 |
|
116 |
return prompts[questionType] || `${baseContext}\n\nProvide a helpful hint about the missing word without revealing it.`;
|
|
|
104 |
const baseContext = `From "${bookTitle}" by ${author}: "${sentence}"`;
|
105 |
|
106 |
const prompts = {
|
107 |
+
part_of_speech: `${baseContext}\n\nRespond with exactly: "This is a [noun/verb/adjective/adverb]" then add ONE simple clue about what type (e.g., "a thing", "an action", "describes something"). Maximum 15 words total. Do not reveal the word.`,
|
108 |
|
109 |
+
sentence_role: `${baseContext}\n\nPoint to specific words around the blank. Format: "Look at 'the [word before] ____ [word after]' - what could [verb/function]?" Use only immediate neighboring words. Maximum 20 words.`,
|
110 |
|
111 |
+
word_category: `${baseContext}\n\nStart with exactly "This is abstract" or "This is concrete." Then add ONE example: "Like [feelings/objects]" or "Think [size/type]". Maximum 12 words total.`,
|
112 |
|
113 |
+
synonym: `${baseContext}\n\nFormat: "Try a word similar to [related word]" or "Another word for [concept]". Give ONE direct synonym or related concept only. Maximum 10 words.`
|
114 |
};
|
115 |
|
116 |
return prompts[questionType] || `${baseContext}\n\nProvide a helpful hint about the missing word without revealing it.`;
|