milwright commited on
Commit
575efeb
·
1 Parent(s): 18f0d59

fix: improve randomization and constrain prompts further

Browse files

- Add random offset (0-5000) for preloaded books instead of always starting at 0
- Increase random range from 1000 to 10000 for streaming books
- Add session-based duplicate tracking to prevent same book twice
- Tighten prompt constraints (10-20 word limits instead of 20-25)
- Add stricter format enforcement for all question types
- Clear used book cache when all books exhausted

src/bookDataService.js CHANGED
@@ -9,6 +9,7 @@ class HuggingFaceDatasetService {
9
  this.streamingEnabled = false;
10
  this.cache = new Map();
11
  this.preloadedBooks = [];
 
12
  }
13
 
14
  // Local fallback books for when HF streaming is unavailable
@@ -129,8 +130,9 @@ class HuggingFaceDatasetService {
129
  if (!this.streamingEnabled) return;
130
 
131
  try {
132
- // Fetch a batch of books from HF Datasets (correct API format)
133
- const url = `${this.apiBase}/rows?dataset=${this.datasetName}&config=default&split=en&offset=0&length=${count}`;
 
134
  const response = await fetch(url);
135
 
136
  if (response.ok) {
@@ -354,16 +356,51 @@ class HuggingFaceDatasetService {
354
  throw new Error('Dataset not loaded');
355
  }
356
 
357
- // Prioritize preloaded books for fast access (90% chance)
358
- if (this.streamingEnabled && this.preloadedBooks.length > 0 && Math.random() > 0.1) {
359
- const randomIndex = Math.floor(Math.random() * this.preloadedBooks.length);
360
- return this.preloadedBooks[randomIndex];
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  }
362
 
363
- // Use local samples for remaining 10% + fallback
364
- const fallbackBooks = this.books.length > 0 ? this.books : this.getSampleBooks();
365
- const randomIndex = Math.floor(Math.random() * fallbackBooks.length);
366
- return fallbackBooks[randomIndex];
 
 
 
 
 
367
  }
368
 
369
  async getStreamingBook() {
@@ -375,7 +412,7 @@ class HuggingFaceDatasetService {
375
 
376
  // If no preloaded books, try to fetch directly
377
  try {
378
- const offset = Math.floor(Math.random() * 1000); // Random offset
379
  const url = `${this.apiBase}/rows?dataset=${this.datasetName}&config=default&split=en&offset=${offset}&length=1`;
380
  const response = await fetch(url);
381
 
 
9
  this.streamingEnabled = false;
10
  this.cache = new Map();
11
  this.preloadedBooks = [];
12
+ this.usedBooks = new Set(); // Track books used this session
13
  }
14
 
15
  // Local fallback books for when HF streaming is unavailable
 
130
  if (!this.streamingEnabled) return;
131
 
132
  try {
133
+ // Use random offset to avoid always getting the same books
134
+ const randomOffset = Math.floor(Math.random() * 5000); // Random start point in dataset
135
+ const url = `${this.apiBase}/rows?dataset=${this.datasetName}&config=default&split=en&offset=${randomOffset}&length=${count}`;
136
  const response = await fetch(url);
137
 
138
  if (response.ok) {
 
356
  throw new Error('Dataset not loaded');
357
  }
358
 
359
+ // Try multiple times to get an unused book
360
+ for (let attempt = 0; attempt < 10; attempt++) {
361
+ let book = null;
362
+
363
+ // Prioritize preloaded books for fast access (90% chance)
364
+ if (this.streamingEnabled && this.preloadedBooks.length > 0 && Math.random() > 0.1) {
365
+ const availableBooks = this.preloadedBooks.filter(book =>
366
+ !this.usedBooks.has(this.getBookId(book))
367
+ );
368
+
369
+ if (availableBooks.length > 0) {
370
+ const randomIndex = Math.floor(Math.random() * availableBooks.length);
371
+ book = availableBooks[randomIndex];
372
+ } else {
373
+ // All preloaded books used, try streaming
374
+ book = await this.getStreamingBook();
375
+ }
376
+ } else {
377
+ // Use local samples for remaining 10% + fallback
378
+ const fallbackBooks = this.books.length > 0 ? this.books : this.getSampleBooks();
379
+ const availableBooks = fallbackBooks.filter(book =>
380
+ !this.usedBooks.has(this.getBookId(book))
381
+ );
382
+
383
+ if (availableBooks.length > 0) {
384
+ const randomIndex = Math.floor(Math.random() * availableBooks.length);
385
+ book = availableBooks[randomIndex];
386
+ }
387
+ }
388
+
389
+ if (book && !this.usedBooks.has(this.getBookId(book))) {
390
+ this.usedBooks.add(this.getBookId(book));
391
+ return book;
392
+ }
393
  }
394
 
395
+ // If all attempts failed, clear used books and start over
396
+ this.usedBooks.clear();
397
+ console.log('All books used, cleared used book cache');
398
+ return this.getRandomBook();
399
+ }
400
+
401
+ getBookId(book) {
402
+ // Create unique ID from title and author to track duplicates
403
+ return `${book.title}_${book.author}`.replace(/\s+/g, '_').toLowerCase();
404
  }
405
 
406
  async getStreamingBook() {
 
412
 
413
  // If no preloaded books, try to fetch directly
414
  try {
415
+ const offset = Math.floor(Math.random() * 10000); // Much larger random range
416
  const url = `${this.apiBase}/rows?dataset=${this.datasetName}&config=default&split=en&offset=${offset}&length=1`;
417
  const response = await fetch(url);
418
 
src/conversationManager.js CHANGED
@@ -104,13 +104,13 @@ class ChatService {
104
  const baseContext = `From "${bookTitle}" by ${author}: "${sentence}"`;
105
 
106
  const prompts = {
107
- part_of_speech: `${baseContext}\n\nRespond with exactly this format: "This is a [noun/verb/adjective/adverb]" then add one simple, concrete clue about what type (e.g., "a thing", "an action", "describes something"). Keep it under 20 words total.`,
108
 
109
- sentence_role: `${baseContext}\n\nPoint to specific words around the blank. Example format: "Look at 'the [words before] ____ [words after]' - what could [function]?" Focus only on the immediate context. Keep under 25 words.`,
110
 
111
- word_category: `${baseContext}\n\nStart with exactly: "This is abstract" or "This is concrete." Then give one relatable example or size clue: "Think about something very [big/small]" or "Like [feelings/objects]". Keep under 20 words total.`,
112
 
113
- synonym: `${baseContext}\n\nUse this format: "Try a word similar to [related word]" or "Think of another word for [meaning]". Give direct synonyms or word families only. Keep under 15 words.`
114
  };
115
 
116
  return prompts[questionType] || `${baseContext}\n\nProvide a helpful hint about the missing word without revealing it.`;
 
104
  const baseContext = `From "${bookTitle}" by ${author}: "${sentence}"`;
105
 
106
  const prompts = {
107
+ part_of_speech: `${baseContext}\n\nRespond with exactly: "This is a [noun/verb/adjective/adverb]" then add ONE simple clue about what type (e.g., "a thing", "an action", "describes something"). Maximum 15 words total. Do not reveal the word.`,
108
 
109
+ sentence_role: `${baseContext}\n\nPoint to specific words around the blank. Format: "Look at 'the [word before] ____ [word after]' - what could [verb/function]?" Use only immediate neighboring words. Maximum 20 words.`,
110
 
111
+ word_category: `${baseContext}\n\nStart with exactly "This is abstract" or "This is concrete." Then add ONE example: "Like [feelings/objects]" or "Think [size/type]". Maximum 12 words total.`,
112
 
113
+ synonym: `${baseContext}\n\nFormat: "Try a word similar to [related word]" or "Another word for [concept]". Give ONE direct synonym or related concept only. Maximum 10 words.`
114
  };
115
 
116
  return prompts[questionType] || `${baseContext}\n\nProvide a helpful hint about the missing word without revealing it.`;