milwright commited on
Commit
6839f0d
Β·
1 Parent(s): 13ed840

fix: resolve HF dataset loading errors and improve word selection

Browse files

- Reduce API request size from 100 to 20 to avoid TooBigContentError
- Lower random offset range to stay within dataset bounds
- Improve AI word selection prompt to avoid function words
- Explicitly exclude articles, prepositions, conjunctions from selection
- Require 4+ letter content words (nouns, verbs, adjectives)

Files changed (2) hide show
  1. src/aiService.js +1 -1
  2. src/bookDataService.js +5 -5
src/aiService.js CHANGED
@@ -112,7 +112,7 @@ class OpenRouterService {
112
  content: 'Select words for cloze reading exercises. Choose common, everyday words that students know. Avoid proper nouns (names, places), technical terms, archaic words, and words over 8 letters. Pick words students can guess from surrounding context.'
113
  }, {
114
  role: 'user',
115
- content: `Select exactly ${count} appropriate words for a cloze exercise. Choose common words students can guess from context. Avoid: proper nouns, technical terms, rare/archaic words, words over 8 letters. Return ONLY a JSON array of words.
116
 
117
  Passage: "${passage}"`
118
  }],
 
112
  content: 'Select words for cloze reading exercises. Choose common, everyday words that students know. Avoid proper nouns (names, places), technical terms, archaic words, and words over 8 letters. Pick words students can guess from surrounding context.'
113
  }, {
114
  role: 'user',
115
+ content: `Select exactly ${count} meaningful content words for a cloze exercise. Choose nouns, verbs, adjectives that are 4+ letters long and important to meaning. NEVER select: articles (a, an, the), prepositions (in, on, at, to, for, of, with, by, from), conjunctions (and, or, but), pronouns (I, you, he, she, it, they), or auxiliary verbs (is, are, was, were, have, has, had). Return ONLY a JSON array of words.
116
 
117
  Passage: "${passage}"`
118
  }],
src/bookDataService.js CHANGED
@@ -85,7 +85,7 @@ class HuggingFaceDatasetService {
85
 
86
  if (this.streamingEnabled) {
87
  // Preload some books for immediate access
88
- await this.preloadBooks(100);
89
  console.log(`βœ… HF Streaming enabled: ${this.preloadedBooks.length} books preloaded`);
90
  } else {
91
  // Fall back to local samples
@@ -126,12 +126,12 @@ class HuggingFaceDatasetService {
126
  }
127
  }
128
 
129
- async preloadBooks(count = 100) {
130
  if (!this.streamingEnabled) return;
131
 
132
  try {
133
  // Use random offset to avoid always getting the same books
134
- const randomOffset = Math.floor(Math.random() * 5000); // Random start point in dataset
135
  const url = `${this.apiBase}/rows?dataset=${this.datasetName}&config=default&split=en&offset=${randomOffset}&length=${count}`;
136
  const response = await fetch(url);
137
 
@@ -412,7 +412,7 @@ class HuggingFaceDatasetService {
412
 
413
  // If no preloaded books, try to fetch directly
414
  try {
415
- const offset = Math.floor(Math.random() * 10000); // Much larger random range
416
  const url = `${this.apiBase}/rows?dataset=${this.datasetName}&config=default&split=en&offset=${offset}&length=1`;
417
  const response = await fetch(url);
418
 
@@ -499,7 +499,7 @@ class HuggingFaceDatasetService {
499
  // Refresh preloaded books cache
500
  async refreshCache() {
501
  if (this.streamingEnabled) {
502
- await this.preloadBooks(100);
503
  console.log(`πŸ”„ Cache refreshed: ${this.preloadedBooks.length} books`);
504
  }
505
  }
 
85
 
86
  if (this.streamingEnabled) {
87
  // Preload some books for immediate access
88
+ await this.preloadBooks(20);
89
  console.log(`βœ… HF Streaming enabled: ${this.preloadedBooks.length} books preloaded`);
90
  } else {
91
  // Fall back to local samples
 
126
  }
127
  }
128
 
129
+ async preloadBooks(count = 20) {
130
  if (!this.streamingEnabled) return;
131
 
132
  try {
133
  // Use random offset to avoid always getting the same books
134
+ const randomOffset = Math.floor(Math.random() * 1000); // Smaller random range
135
  const url = `${this.apiBase}/rows?dataset=${this.datasetName}&config=default&split=en&offset=${randomOffset}&length=${count}`;
136
  const response = await fetch(url);
137
 
 
412
 
413
  // If no preloaded books, try to fetch directly
414
  try {
415
+ const offset = Math.floor(Math.random() * 1000); // Smaller random range
416
  const url = `${this.apiBase}/rows?dataset=${this.datasetName}&config=default&split=en&offset=${offset}&length=1`;
417
  const response = await fetch(url);
418
 
 
499
  // Refresh preloaded books cache
500
  async refreshCache() {
501
  if (this.streamingEnabled) {
502
+ await this.preloadBooks(20);
503
  console.log(`πŸ”„ Cache refreshed: ${this.preloadedBooks.length} books`);
504
  }
505
  }