milwright commited on
Commit
9e0745a
·
1 Parent(s): 870251f

Implement level-aware word selection and performance optimizations

Browse files

- Add level-based word length constraints (1-2: 4-7 letters, 3-4: 4-10 letters, 5+: 5-14 letters)
- Implement lazy loading for faster startup (5 books vs 20, on-demand processing)
- Add period-based book selection (levels 1-2: 1850-1925, 3-4: 1800s, 5+: any)
- Strengthen AI word validation with client-side length filtering
- Remove difficulty labels from UI (Easy/Medium/Hard)
- Clean up unused methods and debug logging

Files changed (4) hide show
  1. src/aiService.js +74 -7
  2. src/app.js +1 -2
  3. src/bookDataService.js +158 -51
  4. src/clozeGameEngine.js +3 -3
src/aiService.js CHANGED
@@ -110,8 +110,8 @@ ${prompt}`
110
  }
111
 
112
 
113
- async selectSignificantWords(passage, count) {
114
- console.log('selectSignificantWords called with count:', count);
115
 
116
  // Check for API key at runtime in case it was loaded after initialization
117
  const currentKey = this.getApiKey();
@@ -126,6 +126,19 @@ ${prompt}`
126
  throw new Error('API key required for word selection');
127
  }
128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  try {
130
  return await this.retryRequest(async () => {
131
  const response = await fetch(this.apiUrl, {
@@ -142,6 +155,9 @@ ${prompt}`
142
  role: 'user',
143
  content: `You are a cluemaster vocabulary selector for educational cloze exercises. Select exactly ${count} words from this passage for a cloze exercise.
144
 
 
 
 
145
  CLOZE DELETION PRINCIPLES:
146
  - Select words that require understanding context and vocabulary to identify
147
  - Choose words essential for comprehension that test language ability
@@ -149,7 +165,7 @@ CLOZE DELETION PRINCIPLES:
149
 
150
  REQUIREMENTS:
151
  - Choose clear, properly-spelled words (no OCR errors like "andsatires")
152
- - Select meaningful nouns, verbs, or adjectives (4-12 letters)
153
  - Words must appear EXACTLY as written in the passage
154
  - Avoid: capitalized words, ALL-CAPS words, function words, archaic terms, proper nouns, technical jargon
155
  - Skip any words that look malformed or concatenated
@@ -189,13 +205,48 @@ Passage: "${passage}"`
189
  try {
190
  const words = JSON.parse(content);
191
  if (Array.isArray(words)) {
192
- return words.slice(0, count);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  }
194
  } catch (e) {
195
  // If not valid JSON, try to extract words from the response
196
  const matches = content.match(/"([^"]+)"/g);
197
  if (matches) {
198
- return matches.map(m => m.replace(/"/g, '')).slice(0, count);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
  }
200
  }
201
 
@@ -207,7 +258,7 @@ Passage: "${passage}"`
207
  }
208
  }
209
 
210
- async processBothPassages(passage1, book1, passage2, book2, blanksPerPassage) {
211
  // Process both passages in a single API call to avoid rate limits
212
  const currentKey = this.getApiKey();
213
  if (currentKey && !this.apiKey) {
@@ -218,6 +269,19 @@ Passage: "${passage}"`
218
  throw new Error('API key required for passage processing');
219
  }
220
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  try {
222
  const response = await fetch(this.apiUrl, {
223
  method: 'POST',
@@ -233,6 +297,9 @@ Passage: "${passage}"`
233
  role: 'user',
234
  content: `You process passages for cloze reading exercises. For each passage: 1) Select words for blanks, 2) Generate a contextual introduction. Return a JSON object with both passages' data.
235
 
 
 
 
236
  Process these two passages for cloze exercises:
237
 
238
  PASSAGE 1:
@@ -247,7 +314,7 @@ Select ${blanksPerPassage} words for blanks.
247
 
248
  SELECTION RULES:
249
  - Select EXACTLY ${blanksPerPassage} word${blanksPerPassage > 1 ? 's' : ''} per passage, no more, no less
250
- - Choose meaningful nouns, verbs, or adjectives (4-12 letters)
251
  - Avoid capitalized words, ALL-CAPS words, and table of contents entries
252
  - NEVER select words from the first or last sentence/clause of each passage
253
  - Choose words from the middle portions for better context dependency
 
110
  }
111
 
112
 
113
+ async selectSignificantWords(passage, count, level = 1) {
114
+ console.log('selectSignificantWords called with count:', count, 'level:', level);
115
 
116
  // Check for API key at runtime in case it was loaded after initialization
117
  const currentKey = this.getApiKey();
 
126
  throw new Error('API key required for word selection');
127
  }
128
 
129
+ // Define level-based constraints
130
+ let wordLengthConstraint, difficultyGuidance;
131
+ if (level <= 2) {
132
+ wordLengthConstraint = "EXACTLY 4-7 letters (no words longer than 7 letters)";
133
+ difficultyGuidance = "Select EASY vocabulary words - common, everyday words that most readers know. NEVER select words longer than 7 letters.";
134
+ } else if (level <= 4) {
135
+ wordLengthConstraint = "EXACTLY 4-10 letters (no words longer than 10 letters)";
136
+ difficultyGuidance = "Select MEDIUM difficulty words - mix of common and moderately challenging vocabulary. NEVER select words longer than 10 letters.";
137
+ } else {
138
+ wordLengthConstraint = "5-14 letters";
139
+ difficultyGuidance = "Select CHALLENGING words - sophisticated vocabulary that requires strong reading skills";
140
+ }
141
+
142
  try {
143
  return await this.retryRequest(async () => {
144
  const response = await fetch(this.apiUrl, {
 
155
  role: 'user',
156
  content: `You are a cluemaster vocabulary selector for educational cloze exercises. Select exactly ${count} words from this passage for a cloze exercise.
157
 
158
+ DIFFICULTY LEVEL ${level}:
159
+ ${difficultyGuidance}
160
+
161
  CLOZE DELETION PRINCIPLES:
162
  - Select words that require understanding context and vocabulary to identify
163
  - Choose words essential for comprehension that test language ability
 
165
 
166
  REQUIREMENTS:
167
  - Choose clear, properly-spelled words (no OCR errors like "andsatires")
168
+ - Select meaningful nouns, verbs, or adjectives (${wordLengthConstraint})
169
  - Words must appear EXACTLY as written in the passage
170
  - Avoid: capitalized words, ALL-CAPS words, function words, archaic terms, proper nouns, technical jargon
171
  - Skip any words that look malformed or concatenated
 
205
  try {
206
  const words = JSON.parse(content);
207
  if (Array.isArray(words)) {
208
+ // Validate word lengths based on level
209
+ const validWords = words.filter(word => {
210
+ const cleanWord = word.replace(/[^a-zA-Z]/g, '');
211
+ if (level <= 2) {
212
+ return cleanWord.length >= 4 && cleanWord.length <= 7;
213
+ } else if (level <= 4) {
214
+ return cleanWord.length >= 4 && cleanWord.length <= 10;
215
+ } else {
216
+ return cleanWord.length >= 5 && cleanWord.length <= 14;
217
+ }
218
+ });
219
+
220
+ if (validWords.length > 0) {
221
+ console.log(`✅ Level ${level} word validation: ${validWords.length}/${words.length} words passed`);
222
+ return validWords.slice(0, count);
223
+ } else {
224
+ console.warn(`❌ Level ${level}: No words met length requirements, rejecting all`);
225
+ throw new Error(`No valid words for level ${level}`);
226
+ }
227
  }
228
  } catch (e) {
229
  // If not valid JSON, try to extract words from the response
230
  const matches = content.match(/"([^"]+)"/g);
231
  if (matches) {
232
+ const words = matches.map(m => m.replace(/"/g, ''));
233
+ // Validate word lengths
234
+ const validWords = words.filter(word => {
235
+ const cleanWord = word.replace(/[^a-zA-Z]/g, '');
236
+ if (level <= 2) {
237
+ return cleanWord.length >= 4 && cleanWord.length <= 7;
238
+ } else if (level <= 4) {
239
+ return cleanWord.length >= 4 && cleanWord.length <= 10;
240
+ } else {
241
+ return cleanWord.length >= 5 && cleanWord.length <= 14;
242
+ }
243
+ });
244
+
245
+ if (validWords.length > 0) {
246
+ return validWords.slice(0, count);
247
+ } else {
248
+ throw new Error(`No valid words for level ${level}`);
249
+ }
250
  }
251
  }
252
 
 
258
  }
259
  }
260
 
261
+ async processBothPassages(passage1, book1, passage2, book2, blanksPerPassage, level = 1) {
262
  // Process both passages in a single API call to avoid rate limits
263
  const currentKey = this.getApiKey();
264
  if (currentKey && !this.apiKey) {
 
269
  throw new Error('API key required for passage processing');
270
  }
271
 
272
+ // Define level-based constraints
273
+ let wordLengthConstraint, difficultyGuidance;
274
+ if (level <= 2) {
275
+ wordLengthConstraint = "EXACTLY 4-7 letters (no words longer than 7 letters)";
276
+ difficultyGuidance = "Select EASY vocabulary words - common, everyday words that most readers know. NEVER select words longer than 7 letters.";
277
+ } else if (level <= 4) {
278
+ wordLengthConstraint = "EXACTLY 4-10 letters (no words longer than 10 letters)";
279
+ difficultyGuidance = "Select MEDIUM difficulty words - mix of common and moderately challenging vocabulary. NEVER select words longer than 10 letters.";
280
+ } else {
281
+ wordLengthConstraint = "5-14 letters";
282
+ difficultyGuidance = "Select CHALLENGING words - sophisticated vocabulary that requires strong reading skills";
283
+ }
284
+
285
  try {
286
  const response = await fetch(this.apiUrl, {
287
  method: 'POST',
 
297
  role: 'user',
298
  content: `You process passages for cloze reading exercises. For each passage: 1) Select words for blanks, 2) Generate a contextual introduction. Return a JSON object with both passages' data.
299
 
300
+ DIFFICULTY LEVEL ${level}:
301
+ ${difficultyGuidance}
302
+
303
  Process these two passages for cloze exercises:
304
 
305
  PASSAGE 1:
 
314
 
315
  SELECTION RULES:
316
  - Select EXACTLY ${blanksPerPassage} word${blanksPerPassage > 1 ? 's' : ''} per passage, no more, no less
317
+ - Choose meaningful nouns, verbs, or adjectives (${wordLengthConstraint})
318
  - Avoid capitalized words, ALL-CAPS words, and table of contents entries
319
  - NEVER select words from the first or last sentence/clause of each passage
320
  - Choose words from the middle portions for better context dependency
src/app.js CHANGED
@@ -71,8 +71,7 @@ class App {
71
 
72
  // Show level information without passage number
73
  const blanksCount = roundData.blanks.length;
74
- const difficultyText = blanksCount === 1 ? 'Easy' : blanksCount === 2 ? 'Medium' : 'Hard';
75
- this.elements.roundInfo.innerHTML = `Level ${this.game.currentLevel} • ${blanksCount} blank${blanksCount > 1 ? 's' : ''} • ${difficultyText}`;
76
 
77
  // Show contextualization from AI agent
78
  this.elements.contextualization.innerHTML = `
 
71
 
72
  // Show level information without passage number
73
  const blanksCount = roundData.blanks.length;
74
+ this.elements.roundInfo.innerHTML = `Level ${this.game.currentLevel} ${blanksCount} blank${blanksCount > 1 ? 's' : ''}`;
 
75
 
76
  // Show contextualization from AI agent
77
  this.elements.contextualization.innerHTML = `
src/bookDataService.js CHANGED
@@ -19,60 +19,70 @@ class HuggingFaceDatasetService {
19
  id: 1,
20
  title: "Pride and Prejudice",
21
  author: "Jane Austen",
 
22
  text: "It is a truth universally acknowledged, that a single man in possession of a good fortune, must be in want of a wife. However little known the feelings or views of such a man may be on his first entering a neighbourhood, this truth is so well fixed in the minds of the surrounding families, that he is considered the rightful property of some one or other of their daughters. \"My dear Mr. Bennet,\" said his lady to him one day, \"have you heard that Netherfield Park is let at last?\" Mr. Bennet replied that he had not. \"But it is,\" returned she; \"for Mrs. Long has just been here, and she told me all about it.\" Mr. Bennet made no answer. \"Do you not want to know who has taken it?\" cried his wife impatiently. \"You want to tell me, and I have no objection to hearing it.\" This was invitation enough."
23
  },
24
  {
25
  id: 2,
26
  title: "The Adventures of Tom Sawyer",
27
  author: "Mark Twain",
 
28
  text: "\"Tom!\" No answer. \"Tom!\" No answer. \"What's gone with that boy, I wonder? You TOM!\" No answer. The old lady pulled her spectacles down and looked over them about the room; then she put them up and looked out under them. She seldom or never looked through them for so small a thing as a boy; they were her state pair, the pride of her heart, and were built for \"style,\" not service--she could have seen through a pair of stove-lids just as well. She looked perplexed for a moment, and then said, not fiercely, but still loud enough for the furniture to hear: \"Well, I lay if I get hold of you I'll--\""
29
  },
30
  {
31
  id: 3,
32
  title: "Great Expectations",
33
  author: "Charles Dickens",
 
34
  text: "My father's family name being Pirrip, and my Christian name Philip, my infant tongue could make of both names nothing longer or more explicit than Pip. So, I called myself Pip, and came to be called Pip. I give Pirrip as my father's family name, on the authority of his tombstone and my sister,--Mrs. Joe Gargery, who married the blacksmith. As I never saw my father or my mother, and never saw any likeness of them (for their days were long before the days of photographs), my first fancies regarding what they were like were unreasonably derived from their tombstones."
35
  },
36
  {
37
  id: 4,
38
  title: "Alice's Adventures in Wonderland",
39
  author: "Lewis Carroll",
 
40
  text: "Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do: once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, 'and what is the use of a book,' thought Alice 'without pictures or conversation?' So she was considering in her own mind (as well as she could, for the hot day made her feel very sleepy and stupid), whether the pleasure of making a daisy-chain would be worth the trouble of getting up and picking the daisies, when suddenly a White Rabbit with pink eyes ran close by her."
41
  },
42
  {
43
  id: 5,
44
  title: "The Picture of Dorian Gray",
45
  author: "Oscar Wilde",
 
46
  text: "The studio was filled with the rich odour of roses, and when the strong summer wind stirred, amidst the trees of the garden, there came through the open door the heavy scent of the lilac, or the more delicate perfume of the pink-flowering thorn. From the corner of the divan of Persian saddle-bags on which he was lying, smoking, as was his custom, innumerable cigarettes, Lord Henry Wotton could just catch the gleam of the honey-sweet and honey-coloured blossoms of a laburnum, whose tremulous branches seemed hardly able to bear the burden of a beauty so flamelike as theirs."
47
  },
48
  {
49
  id: 6,
50
  title: "Moby Dick",
51
  author: "Herman Melville",
 
52
  text: "Call me Ishmael. Some years ago—never mind how long precisely—having little or no money in my purse, and nothing particular to interest me on shore, I thought I would sail about a little and see the watery part of the world. It is a way I have of driving off the spleen and regulating the circulation. Whenever I find myself growing grim about the mouth; whenever it is a damp, drizzly November in my soul; whenever I find myself involuntarily pausing before coffin warehouses, and bringing up the rear of every funeral I meet; and especially whenever my hypos get such an upper hand of me, that it requires a strong moral principle to prevent me from deliberately stepping into the street, and methodically knocking people's hats off—then, I account it high time to get to sea as soon as possible."
53
  },
54
  {
55
  id: 7,
56
  title: "Jane Eyre",
57
  author: "Charlotte Bronte",
 
58
  text: "There was no possibility of taking a walk that day. We had been wandering, indeed, in the leafless shrubbery an hour in the morning; but since dinner (Mrs. Reed, when there was no company, dined early) the cold winter wind had brought with it clouds so sombre, and a rain so penetrating, that further out-door exercise was now out of the question. I was glad of it: I never liked long walks, especially on chilly afternoons: dreadful to me was the coming home in the raw twilight, with nipped fingers and toes, and a heart saddened by the chidings of Bessie, the nurse, and humbled by the consciousness of my physical inferiority to Eliza, John, and Georgiana Reed."
59
  },
60
  {
61
  id: 8,
62
  title: "The Count of Monte Cristo",
63
  author: "Alexandre Dumas",
 
64
  text: "On the first Monday of February, 1815, the watchtower at Marseilles signaled the arrival of the three-master Pharaon from Smyrna, Trieste, and Naples. As was customary, the pilot immediately left the port and steered toward the château d'If to conduct the ship through the narrow passage that leads to the harbor. However, a young sailor of about nineteen or twenty years, standing on the ship's bow, had signaled the pilot even before he had time to ask the traditional questions that are exchanged between the pilot and the captain. The young man had already assumed command, being the ship's owner and captain."
65
  },
66
  {
67
  id: 9,
68
  title: "Wuthering Heights",
69
  author: "Emily Bronte",
 
70
  text: "I have just returned from a visit to my landlord—the solitary neighbour that I shall be troubled with. This is certainly a beautiful country! In all England, I do not believe that I could have fixed on a situation so completely removed from the stir of society. A perfect misanthropist's Heaven: and Mr. Heathcliff and I are such a suitable pair to divide the desolation between us. A capital fellow! He little imagined how my heart warmed towards him when I beheld his black eyes withdraw so suspiciously under their brows, as I rode up, and when his fingers sheltered themselves, with a jealous resolution, still further in his waistcoat, as I announced my name."
71
  },
72
  {
73
  id: 10,
74
  title: "Frankenstein",
75
  author: "Mary Shelley",
 
76
  text: "It was on a dreary night of November that I beheld the accomplishment of my toils. With an anxiety that almost amounted to agony, I collected the instruments of life around me, that I might infuse a spark of being into the lifeless thing that lay at my feet. It was already one in the morning; the rain pattered dismally against the panes, and my candle was nearly burnt out, when, by the glimmer of the half-extinguished light, I saw the dull yellow eye of the creature open; it breathed hard, and a convulsive motion agitated its limbs. How can I describe my emotions at this catastrophe, or how delineate the wretch whom with such infinite pains and care I had endeavoured to form?"
77
  }
78
  ];
@@ -85,7 +95,7 @@ class HuggingFaceDatasetService {
85
 
86
  if (this.streamingEnabled) {
87
  // Preload some books for immediate access
88
- await this.preloadBooks(20);
89
  console.log(`✅ HF Streaming enabled: ${this.preloadedBooks.length} books preloaded`);
90
  } else {
91
  // Fall back to local samples
@@ -126,12 +136,12 @@ class HuggingFaceDatasetService {
126
  }
127
  }
128
 
129
- async preloadBooks(count = 20) {
130
  if (!this.streamingEnabled) return;
131
 
132
  try {
133
  // Use random offset to avoid always getting the same books
134
- const randomOffset = Math.floor(Math.random() * 1000); // Smaller random range
135
  const url = `${this.apiBase}/rows?dataset=${this.datasetName}&config=default&split=en&offset=${randomOffset}&length=${count}`;
136
  const response = await fetch(url);
137
 
@@ -146,19 +156,18 @@ class HuggingFaceDatasetService {
146
 
147
  console.log(`📥 Received ${data.rows.length} books from HF API`);
148
 
149
- // Process and filter books
150
  this.preloadedBooks = data.rows
151
  .map(row => {
152
  try {
153
- return this.processHFBook(row.row);
154
  } catch (e) {
155
  console.warn('Error processing book:', e);
156
  return null;
157
  }
158
  })
159
- .filter(book => book && this.isValidForCloze(book));
160
 
161
- console.log(`📚 Preloaded ${this.preloadedBooks.length} suitable books`);
162
  } else {
163
  console.error(`HF API request failed: ${response.status} ${response.statusText}`);
164
  }
@@ -167,15 +176,12 @@ class HuggingFaceDatasetService {
167
  }
168
  }
169
 
170
- processHFBook(rowData) {
171
- // Extract and clean book data from HF format
172
- const originalText = rowData.text || '';
173
- const cleanedText = this.cleanProjectGutenbergText(originalText);
174
 
175
- // Try multiple metadata extraction approaches
176
- const extractedMetadata = this.extractMetadata(originalText);
177
-
178
- // Prioritize extracted metadata over potentially incorrect HF dataset fields
179
  const title = extractedMetadata.title || rowData.title || 'Classic Literature';
180
  const author = extractedMetadata.author || rowData.author || 'Unknown Author';
181
 
@@ -183,12 +189,40 @@ class HuggingFaceDatasetService {
183
  id: rowData.id || Math.random().toString(36),
184
  title: title,
185
  author: author,
186
- text: cleanedText,
 
 
187
  language: rowData.language || 'en',
188
- source: 'project_gutenberg'
 
189
  };
190
  }
191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  cleanProjectGutenbergText(text) {
193
  if (!text) return '';
194
 
@@ -277,7 +311,7 @@ class HuggingFaceDatasetService {
277
  }
278
 
279
  extractMetadata(text) {
280
- const metadata = { title: 'Classic Literature', author: 'Unknown Author' };
281
 
282
  if (!text) return metadata;
283
 
@@ -316,6 +350,12 @@ class HuggingFaceDatasetService {
316
  if (author && author.length > 1) {
317
  metadata.author = this.cleanMetadataField(author);
318
  }
 
 
 
 
 
 
319
  }
320
  }
321
 
@@ -329,6 +369,38 @@ class HuggingFaceDatasetService {
329
  .trim();
330
  }
331
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
  isValidTitle(title) {
333
  if (!title || title.length < 3 || title.length > 100) return false;
334
  // Avoid fragments that are clearly not titles
@@ -387,6 +459,10 @@ class HuggingFaceDatasetService {
387
  if (availableBooks.length > 0) {
388
  const randomIndex = Math.floor(Math.random() * availableBooks.length);
389
  book = availableBooks[randomIndex];
 
 
 
 
390
  } else {
391
  // All preloaded books used, try streaming
392
  book = await this.getStreamingBook();
@@ -425,22 +501,27 @@ class HuggingFaceDatasetService {
425
  // Use preloaded books for immediate access
426
  if (this.preloadedBooks.length > 0) {
427
  const randomIndex = Math.floor(Math.random() * this.preloadedBooks.length);
428
- return this.preloadedBooks[randomIndex];
 
 
 
 
 
 
 
429
  }
430
 
431
  // If no preloaded books, try to fetch directly
432
  try {
433
- const offset = Math.floor(Math.random() * 1000); // Smaller random range
434
  const url = `${this.apiBase}/rows?dataset=${this.datasetName}&config=default&split=en&offset=${offset}&length=1`;
435
  const response = await fetch(url);
436
 
437
  if (response.ok) {
438
  const data = await response.json();
439
  if (data.rows && data.rows.length > 0) {
440
- const book = this.processHFBook(data.rows[0].row);
441
- if (this.isValidForCloze(book)) {
442
- return book;
443
- }
444
  }
445
  }
446
  } catch (error) {
@@ -450,42 +531,68 @@ class HuggingFaceDatasetService {
450
  return null;
451
  }
452
 
453
- async getBooksByDifficulty(level) {
454
- const difficultyRanges = {
455
- 1: { min: 5000, max: 30000 }, // Short stories/novellas
456
- 2: { min: 25000, max: 80000 }, // Medium novels
457
- 3: { min: 60000, max: 200000 } // Long novels
458
- };
459
-
460
- const range = difficultyRanges[Math.min(level, 3)];
461
-
462
- if (this.streamingEnabled && this.preloadedBooks.length > 0) {
463
- // Filter preloaded books by difficulty
464
- const suitable = this.preloadedBooks.filter(book =>
465
- book.text.length >= range.min && book.text.length <= range.max
466
- );
467
 
468
- if (suitable.length > 0) {
469
- const randomIndex = Math.floor(Math.random() * suitable.length);
470
- return suitable[randomIndex];
 
 
 
 
 
 
 
 
 
471
  }
472
  }
 
 
 
473
 
474
- // Fallback to local filtering
475
- const fallbackBooks = this.books.length > 0 ? this.books : this.getSampleBooks();
476
- const filtered = fallbackBooks.filter(book =>
477
- book.text.length >= range.min && book.text.length <= range.max
478
- );
 
 
 
 
 
 
 
 
 
 
 
 
 
479
 
480
- if (filtered.length > 0) {
481
- const randomIndex = Math.floor(Math.random() * filtered.length);
482
- return filtered[randomIndex];
 
 
 
 
 
483
  }
484
 
485
- // If no books match difficulty, return any available book
486
- return await this.getRandomBook();
487
  }
488
 
 
489
  getBookById(id) {
490
  // Search in both preloaded and local books
491
  const allBooks = [...this.preloadedBooks, ...this.books];
 
19
  id: 1,
20
  title: "Pride and Prejudice",
21
  author: "Jane Austen",
22
+ year: 1813,
23
  text: "It is a truth universally acknowledged, that a single man in possession of a good fortune, must be in want of a wife. However little known the feelings or views of such a man may be on his first entering a neighbourhood, this truth is so well fixed in the minds of the surrounding families, that he is considered the rightful property of some one or other of their daughters. \"My dear Mr. Bennet,\" said his lady to him one day, \"have you heard that Netherfield Park is let at last?\" Mr. Bennet replied that he had not. \"But it is,\" returned she; \"for Mrs. Long has just been here, and she told me all about it.\" Mr. Bennet made no answer. \"Do you not want to know who has taken it?\" cried his wife impatiently. \"You want to tell me, and I have no objection to hearing it.\" This was invitation enough."
24
  },
25
  {
26
  id: 2,
27
  title: "The Adventures of Tom Sawyer",
28
  author: "Mark Twain",
29
+ year: 1876,
30
  text: "\"Tom!\" No answer. \"Tom!\" No answer. \"What's gone with that boy, I wonder? You TOM!\" No answer. The old lady pulled her spectacles down and looked over them about the room; then she put them up and looked out under them. She seldom or never looked through them for so small a thing as a boy; they were her state pair, the pride of her heart, and were built for \"style,\" not service--she could have seen through a pair of stove-lids just as well. She looked perplexed for a moment, and then said, not fiercely, but still loud enough for the furniture to hear: \"Well, I lay if I get hold of you I'll--\""
31
  },
32
  {
33
  id: 3,
34
  title: "Great Expectations",
35
  author: "Charles Dickens",
36
+ year: 1861,
37
  text: "My father's family name being Pirrip, and my Christian name Philip, my infant tongue could make of both names nothing longer or more explicit than Pip. So, I called myself Pip, and came to be called Pip. I give Pirrip as my father's family name, on the authority of his tombstone and my sister,--Mrs. Joe Gargery, who married the blacksmith. As I never saw my father or my mother, and never saw any likeness of them (for their days were long before the days of photographs), my first fancies regarding what they were like were unreasonably derived from their tombstones."
38
  },
39
  {
40
  id: 4,
41
  title: "Alice's Adventures in Wonderland",
42
  author: "Lewis Carroll",
43
+ year: 1865,
44
  text: "Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do: once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, 'and what is the use of a book,' thought Alice 'without pictures or conversation?' So she was considering in her own mind (as well as she could, for the hot day made her feel very sleepy and stupid), whether the pleasure of making a daisy-chain would be worth the trouble of getting up and picking the daisies, when suddenly a White Rabbit with pink eyes ran close by her."
45
  },
46
  {
47
  id: 5,
48
  title: "The Picture of Dorian Gray",
49
  author: "Oscar Wilde",
50
+ year: 1890,
51
  text: "The studio was filled with the rich odour of roses, and when the strong summer wind stirred, amidst the trees of the garden, there came through the open door the heavy scent of the lilac, or the more delicate perfume of the pink-flowering thorn. From the corner of the divan of Persian saddle-bags on which he was lying, smoking, as was his custom, innumerable cigarettes, Lord Henry Wotton could just catch the gleam of the honey-sweet and honey-coloured blossoms of a laburnum, whose tremulous branches seemed hardly able to bear the burden of a beauty so flamelike as theirs."
52
  },
53
  {
54
  id: 6,
55
  title: "Moby Dick",
56
  author: "Herman Melville",
57
+ year: 1851,
58
  text: "Call me Ishmael. Some years ago—never mind how long precisely—having little or no money in my purse, and nothing particular to interest me on shore, I thought I would sail about a little and see the watery part of the world. It is a way I have of driving off the spleen and regulating the circulation. Whenever I find myself growing grim about the mouth; whenever it is a damp, drizzly November in my soul; whenever I find myself involuntarily pausing before coffin warehouses, and bringing up the rear of every funeral I meet; and especially whenever my hypos get such an upper hand of me, that it requires a strong moral principle to prevent me from deliberately stepping into the street, and methodically knocking people's hats off—then, I account it high time to get to sea as soon as possible."
59
  },
60
  {
61
  id: 7,
62
  title: "Jane Eyre",
63
  author: "Charlotte Bronte",
64
+ year: 1847,
65
  text: "There was no possibility of taking a walk that day. We had been wandering, indeed, in the leafless shrubbery an hour in the morning; but since dinner (Mrs. Reed, when there was no company, dined early) the cold winter wind had brought with it clouds so sombre, and a rain so penetrating, that further out-door exercise was now out of the question. I was glad of it: I never liked long walks, especially on chilly afternoons: dreadful to me was the coming home in the raw twilight, with nipped fingers and toes, and a heart saddened by the chidings of Bessie, the nurse, and humbled by the consciousness of my physical inferiority to Eliza, John, and Georgiana Reed."
66
  },
67
  {
68
  id: 8,
69
  title: "The Count of Monte Cristo",
70
  author: "Alexandre Dumas",
71
+ year: 1844,
72
  text: "On the first Monday of February, 1815, the watchtower at Marseilles signaled the arrival of the three-master Pharaon from Smyrna, Trieste, and Naples. As was customary, the pilot immediately left the port and steered toward the château d'If to conduct the ship through the narrow passage that leads to the harbor. However, a young sailor of about nineteen or twenty years, standing on the ship's bow, had signaled the pilot even before he had time to ask the traditional questions that are exchanged between the pilot and the captain. The young man had already assumed command, being the ship's owner and captain."
73
  },
74
  {
75
  id: 9,
76
  title: "Wuthering Heights",
77
  author: "Emily Bronte",
78
+ year: 1847,
79
  text: "I have just returned from a visit to my landlord—the solitary neighbour that I shall be troubled with. This is certainly a beautiful country! In all England, I do not believe that I could have fixed on a situation so completely removed from the stir of society. A perfect misanthropist's Heaven: and Mr. Heathcliff and I are such a suitable pair to divide the desolation between us. A capital fellow! He little imagined how my heart warmed towards him when I beheld his black eyes withdraw so suspiciously under their brows, as I rode up, and when his fingers sheltered themselves, with a jealous resolution, still further in his waistcoat, as I announced my name."
80
  },
81
  {
82
  id: 10,
83
  title: "Frankenstein",
84
  author: "Mary Shelley",
85
+ year: 1818,
86
  text: "It was on a dreary night of November that I beheld the accomplishment of my toils. With an anxiety that almost amounted to agony, I collected the instruments of life around me, that I might infuse a spark of being into the lifeless thing that lay at my feet. It was already one in the morning; the rain pattered dismally against the panes, and my candle was nearly burnt out, when, by the glimmer of the half-extinguished light, I saw the dull yellow eye of the creature open; it breathed hard, and a convulsive motion agitated its limbs. How can I describe my emotions at this catastrophe, or how delineate the wretch whom with such infinite pains and care I had endeavoured to form?"
87
  }
88
  ];
 
95
 
96
  if (this.streamingEnabled) {
97
  // Preload some books for immediate access
98
+ await this.preloadBooks(5);
99
  console.log(`✅ HF Streaming enabled: ${this.preloadedBooks.length} books preloaded`);
100
  } else {
101
  // Fall back to local samples
 
136
  }
137
  }
138
 
139
+ async preloadBooks(count = 5) {
140
  if (!this.streamingEnabled) return;
141
 
142
  try {
143
  // Use random offset to avoid always getting the same books
144
+ const randomOffset = Math.floor(Math.random() * 1000);
145
  const url = `${this.apiBase}/rows?dataset=${this.datasetName}&config=default&split=en&offset=${randomOffset}&length=${count}`;
146
  const response = await fetch(url);
147
 
 
156
 
157
  console.log(`📥 Received ${data.rows.length} books from HF API`);
158
 
 
159
  this.preloadedBooks = data.rows
160
  .map(row => {
161
  try {
162
+ return this.processHFBookLazy(row.row);
163
  } catch (e) {
164
  console.warn('Error processing book:', e);
165
  return null;
166
  }
167
  })
168
+ .filter(book => book !== null);
169
 
170
+ console.log(`📚 Preloaded ${this.preloadedBooks.length} books (lazy validation)`);
171
  } else {
172
  console.error(`HF API request failed: ${response.status} ${response.statusText}`);
173
  }
 
176
  }
177
  }
178
 
179
+ processHFBookLazy(rowData) {
180
+ // Minimal processing - defer text cleaning and validation until book is selected
181
+ const rawText = rowData.text || '';
 
182
 
183
+ // Do basic metadata extraction to get proper title/author
184
+ const extractedMetadata = this.extractMetadata(rawText);
 
 
185
  const title = extractedMetadata.title || rowData.title || 'Classic Literature';
186
  const author = extractedMetadata.author || rowData.author || 'Unknown Author';
187
 
 
189
  id: rowData.id || Math.random().toString(36),
190
  title: title,
191
  author: author,
192
+ year: extractedMetadata.year, // Extract year during lazy processing
193
+ rawText: rawText,
194
+ text: null, // Will clean when needed
195
  language: rowData.language || 'en',
196
+ source: 'project_gutenberg',
197
+ processed: false
198
  };
199
  }
200
 
201
+ async processBookOnDemand(book) {
202
+ if (book.processed) return book;
203
+
204
+ console.log(`🔄 Processing "${book.title}" on demand...`);
205
+ const startTime = Date.now();
206
+
207
+ // Clean text and extract metadata when actually needed
208
+ const cleanedText = this.cleanProjectGutenbergText(book.rawText);
209
+ const extractedMetadata = this.extractMetadata(book.rawText);
210
+
211
+ book.text = cleanedText;
212
+ book.year = extractedMetadata.year || this.estimatePublicationYear(book.title, book.author);
213
+ book.processed = true;
214
+
215
+ // Validate after processing
216
+ if (!this.isValidForCloze(book)) {
217
+ console.log(`❌ "${book.title}" failed validation after ${Date.now() - startTime}ms`);
218
+ return null;
219
+ }
220
+
221
+ console.log(`✅ "${book.title}" processed in ${Date.now() - startTime}ms`);
222
+ return book;
223
+ }
224
+
225
+
226
  cleanProjectGutenbergText(text) {
227
  if (!text) return '';
228
 
 
311
  }
312
 
313
  extractMetadata(text) {
314
+ const metadata = { title: 'Classic Literature', author: 'Unknown Author', year: null };
315
 
316
  if (!text) return metadata;
317
 
 
350
  if (author && author.length > 1) {
351
  metadata.author = this.cleanMetadataField(author);
352
  }
353
+ } else if (line.includes('Release Date:')) {
354
+ // Try to extract year from release date
355
+ const yearMatch = line.match(/\b(1[789]\d\d|20[012]\d)\b/);
356
+ if (yearMatch) {
357
+ metadata.year = parseInt(yearMatch[1]);
358
+ }
359
  }
360
  }
361
 
 
369
  .trim();
370
  }
371
 
372
+ estimatePublicationYear(title, author) {
373
+ // Return null to indicate unknown year rather than guessing
374
+ // This allows for truly random selection without bias
375
+ return null;
376
+ }
377
+
378
+ extractPublicationPeriod(text) {
379
+ // Look for publication year clues in the text itself
380
+ if (!text) return null;
381
+
382
+ // Check first 200 lines for copyright or publication information
383
+ const lines = text.split('\n').slice(0, 200);
384
+ const textSnippet = lines.join(' ');
385
+
386
+ // Look for explicit year mentions in copyright notices or metadata
387
+ const yearPatterns = [
388
+ /(?:copyright|©|published|publication date)[:\s]+.*?\b(1[6-9]\d{2}|20[0-2]\d)\b/i,
389
+ /\b(1[6-9]\d{2}|20[0-2]\d)\b[,\s]+by\s+/i,
390
+ /first published[:\s]+.*?\b(1[6-9]\d{2}|20[0-2]\d)\b/i,
391
+ /originally published[:\s]+.*?\b(1[6-9]\d{2}|20[0-2]\d)\b/i
392
+ ];
393
+
394
+ for (const pattern of yearPatterns) {
395
+ const match = textSnippet.match(pattern);
396
+ if (match) {
397
+ return parseInt(match[1]);
398
+ }
399
+ }
400
+
401
+ return null;
402
+ }
403
+
404
  isValidTitle(title) {
405
  if (!title || title.length < 3 || title.length > 100) return false;
406
  // Avoid fragments that are clearly not titles
 
459
  if (availableBooks.length > 0) {
460
  const randomIndex = Math.floor(Math.random() * availableBooks.length);
461
  book = availableBooks[randomIndex];
462
+
463
+ // Process book on demand
464
+ book = await this.processBookOnDemand(book);
465
+ if (!book) continue; // Book failed validation, try next
466
  } else {
467
  // All preloaded books used, try streaming
468
  book = await this.getStreamingBook();
 
501
  // Use preloaded books for immediate access
502
  if (this.preloadedBooks.length > 0) {
503
  const randomIndex = Math.floor(Math.random() * this.preloadedBooks.length);
504
+ let book = this.preloadedBooks[randomIndex];
505
+
506
+ // Process on demand if needed
507
+ if (!book.processed) {
508
+ book = await this.processBookOnDemand(book);
509
+ }
510
+
511
+ return book;
512
  }
513
 
514
  // If no preloaded books, try to fetch directly
515
  try {
516
+ const offset = Math.floor(Math.random() * 1000);
517
  const url = `${this.apiBase}/rows?dataset=${this.datasetName}&config=default&split=en&offset=${offset}&length=1`;
518
  const response = await fetch(url);
519
 
520
  if (response.ok) {
521
  const data = await response.json();
522
  if (data.rows && data.rows.length > 0) {
523
+ const book = this.processHFBookLazy(data.rows[0].row);
524
+ return await this.processBookOnDemand(book);
 
 
525
  }
526
  }
527
  } catch (error) {
 
531
  return null;
532
  }
533
 
534
+ async getBookByLevelCriteria(level) {
535
+ let targetPeriod = null;
536
+ if (level <= 2) {
537
+ targetPeriod = { min: 1850, max: 1925 };
538
+ } else if (level <= 4) {
539
+ targetPeriod = { min: 1800, max: 1899 };
540
+ }
541
+
542
+ if (targetPeriod) {
543
+ const periodBooks = await this.getBooksByPeriod(targetPeriod.min, targetPeriod.max);
 
 
 
 
544
 
545
+ if (periodBooks.length > 0) {
546
+ const randomIndex = Math.floor(Math.random() * periodBooks.length);
547
+ let book = periodBooks[randomIndex];
548
+
549
+ if (book.source === 'project_gutenberg' && !book.processed) {
550
+ book = await this.processBookOnDemand(book);
551
+ if (!book) {
552
+ return await this.getRandomBook();
553
+ }
554
+ }
555
+
556
+ return book;
557
  }
558
  }
559
+
560
+ return await this.getRandomBook();
561
+ }
562
 
563
+ async getBooksByPeriod(minYear, maxYear) {
564
+ const matchingBooks = [];
565
+
566
+ if (this.streamingEnabled && this.preloadedBooks.length > 0) {
567
+ for (const book of this.preloadedBooks) {
568
+ if (!this.usedBooks.has(this.getBookId(book))) {
569
+ let year = book.year;
570
+ if (!year && book.rawText) {
571
+ year = this.extractPublicationPeriod(book.rawText);
572
+ book.year = year;
573
+ }
574
+
575
+ if (year && year >= minYear && year <= maxYear) {
576
+ matchingBooks.push(book);
577
+ }
578
+ }
579
+ }
580
+ }
581
 
582
+ const fallbackBooks = this.books.length > 0 ? this.books : this.getSampleBooks();
583
+ for (const book of fallbackBooks) {
584
+ if (!this.usedBooks.has(this.getBookId(book))) {
585
+ const year = book.year || this.extractPublicationPeriod(book.text);
586
+ if (year && year >= minYear && year <= maxYear) {
587
+ matchingBooks.push(book);
588
+ }
589
+ }
590
  }
591
 
592
+ return matchingBooks;
 
593
  }
594
 
595
+
596
  getBookById(id) {
597
  // Search in both preloaded and local books
598
  const allBooks = [...this.preloadedBooks, ...this.books];
src/clozeGameEngine.js CHANGED
@@ -39,9 +39,9 @@ class ClozeGame {
39
 
40
  async startNewRound() {
41
  try {
42
- // Get two random books for this round
43
- const book1 = await bookDataService.getRandomBook();
44
- const book2 = await bookDataService.getRandomBook();
45
 
46
  // Extract passages from both books
47
  const passage1 = this.extractCoherentPassage(book1.text);
 
39
 
40
  async startNewRound() {
41
  try {
42
+ // Get two books for this round based on current level criteria
43
+ const book1 = await bookDataService.getBookByLevelCriteria(this.currentLevel);
44
+ const book2 = await bookDataService.getBookByLevelCriteria(this.currentLevel);
45
 
46
  // Extract passages from both books
47
  const passage1 = this.extractCoherentPassage(book1.text);