Spaces:
Sleeping
Sleeping
Implement level-aware word selection and performance optimizations
Browse files- Add level-based word length constraints (1-2: 4-7 letters, 3-4: 4-10 letters, 5+: 5-14 letters)
- Implement lazy loading for faster startup (5 books vs 20, on-demand processing)
- Add period-based book selection (levels 1-2: 1850-1925, 3-4: 1800s, 5+: any)
- Strengthen AI word validation with client-side length filtering
- Remove difficulty labels from UI (Easy/Medium/Hard)
- Clean up unused methods and debug logging
- src/aiService.js +74 -7
- src/app.js +1 -2
- src/bookDataService.js +158 -51
- src/clozeGameEngine.js +3 -3
src/aiService.js
CHANGED
@@ -110,8 +110,8 @@ ${prompt}`
|
|
110 |
}
|
111 |
|
112 |
|
113 |
-
async selectSignificantWords(passage, count) {
|
114 |
-
console.log('selectSignificantWords called with count:', count);
|
115 |
|
116 |
// Check for API key at runtime in case it was loaded after initialization
|
117 |
const currentKey = this.getApiKey();
|
@@ -126,6 +126,19 @@ ${prompt}`
|
|
126 |
throw new Error('API key required for word selection');
|
127 |
}
|
128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
try {
|
130 |
return await this.retryRequest(async () => {
|
131 |
const response = await fetch(this.apiUrl, {
|
@@ -142,6 +155,9 @@ ${prompt}`
|
|
142 |
role: 'user',
|
143 |
content: `You are a cluemaster vocabulary selector for educational cloze exercises. Select exactly ${count} words from this passage for a cloze exercise.
|
144 |
|
|
|
|
|
|
|
145 |
CLOZE DELETION PRINCIPLES:
|
146 |
- Select words that require understanding context and vocabulary to identify
|
147 |
- Choose words essential for comprehension that test language ability
|
@@ -149,7 +165,7 @@ CLOZE DELETION PRINCIPLES:
|
|
149 |
|
150 |
REQUIREMENTS:
|
151 |
- Choose clear, properly-spelled words (no OCR errors like "andsatires")
|
152 |
-
- Select meaningful nouns, verbs, or adjectives (
|
153 |
- Words must appear EXACTLY as written in the passage
|
154 |
- Avoid: capitalized words, ALL-CAPS words, function words, archaic terms, proper nouns, technical jargon
|
155 |
- Skip any words that look malformed or concatenated
|
@@ -189,13 +205,48 @@ Passage: "${passage}"`
|
|
189 |
try {
|
190 |
const words = JSON.parse(content);
|
191 |
if (Array.isArray(words)) {
|
192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
}
|
194 |
} catch (e) {
|
195 |
// If not valid JSON, try to extract words from the response
|
196 |
const matches = content.match(/"([^"]+)"/g);
|
197 |
if (matches) {
|
198 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
}
|
200 |
}
|
201 |
|
@@ -207,7 +258,7 @@ Passage: "${passage}"`
|
|
207 |
}
|
208 |
}
|
209 |
|
210 |
-
async processBothPassages(passage1, book1, passage2, book2, blanksPerPassage) {
|
211 |
// Process both passages in a single API call to avoid rate limits
|
212 |
const currentKey = this.getApiKey();
|
213 |
if (currentKey && !this.apiKey) {
|
@@ -218,6 +269,19 @@ Passage: "${passage}"`
|
|
218 |
throw new Error('API key required for passage processing');
|
219 |
}
|
220 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
221 |
try {
|
222 |
const response = await fetch(this.apiUrl, {
|
223 |
method: 'POST',
|
@@ -233,6 +297,9 @@ Passage: "${passage}"`
|
|
233 |
role: 'user',
|
234 |
content: `You process passages for cloze reading exercises. For each passage: 1) Select words for blanks, 2) Generate a contextual introduction. Return a JSON object with both passages' data.
|
235 |
|
|
|
|
|
|
|
236 |
Process these two passages for cloze exercises:
|
237 |
|
238 |
PASSAGE 1:
|
@@ -247,7 +314,7 @@ Select ${blanksPerPassage} words for blanks.
|
|
247 |
|
248 |
SELECTION RULES:
|
249 |
- Select EXACTLY ${blanksPerPassage} word${blanksPerPassage > 1 ? 's' : ''} per passage, no more, no less
|
250 |
-
- Choose meaningful nouns, verbs, or adjectives (
|
251 |
- Avoid capitalized words, ALL-CAPS words, and table of contents entries
|
252 |
- NEVER select words from the first or last sentence/clause of each passage
|
253 |
- Choose words from the middle portions for better context dependency
|
|
|
110 |
}
|
111 |
|
112 |
|
113 |
+
async selectSignificantWords(passage, count, level = 1) {
|
114 |
+
console.log('selectSignificantWords called with count:', count, 'level:', level);
|
115 |
|
116 |
// Check for API key at runtime in case it was loaded after initialization
|
117 |
const currentKey = this.getApiKey();
|
|
|
126 |
throw new Error('API key required for word selection');
|
127 |
}
|
128 |
|
129 |
+
// Define level-based constraints
|
130 |
+
let wordLengthConstraint, difficultyGuidance;
|
131 |
+
if (level <= 2) {
|
132 |
+
wordLengthConstraint = "EXACTLY 4-7 letters (no words longer than 7 letters)";
|
133 |
+
difficultyGuidance = "Select EASY vocabulary words - common, everyday words that most readers know. NEVER select words longer than 7 letters.";
|
134 |
+
} else if (level <= 4) {
|
135 |
+
wordLengthConstraint = "EXACTLY 4-10 letters (no words longer than 10 letters)";
|
136 |
+
difficultyGuidance = "Select MEDIUM difficulty words - mix of common and moderately challenging vocabulary. NEVER select words longer than 10 letters.";
|
137 |
+
} else {
|
138 |
+
wordLengthConstraint = "5-14 letters";
|
139 |
+
difficultyGuidance = "Select CHALLENGING words - sophisticated vocabulary that requires strong reading skills";
|
140 |
+
}
|
141 |
+
|
142 |
try {
|
143 |
return await this.retryRequest(async () => {
|
144 |
const response = await fetch(this.apiUrl, {
|
|
|
155 |
role: 'user',
|
156 |
content: `You are a cluemaster vocabulary selector for educational cloze exercises. Select exactly ${count} words from this passage for a cloze exercise.
|
157 |
|
158 |
+
DIFFICULTY LEVEL ${level}:
|
159 |
+
${difficultyGuidance}
|
160 |
+
|
161 |
CLOZE DELETION PRINCIPLES:
|
162 |
- Select words that require understanding context and vocabulary to identify
|
163 |
- Choose words essential for comprehension that test language ability
|
|
|
165 |
|
166 |
REQUIREMENTS:
|
167 |
- Choose clear, properly-spelled words (no OCR errors like "andsatires")
|
168 |
+
- Select meaningful nouns, verbs, or adjectives (${wordLengthConstraint})
|
169 |
- Words must appear EXACTLY as written in the passage
|
170 |
- Avoid: capitalized words, ALL-CAPS words, function words, archaic terms, proper nouns, technical jargon
|
171 |
- Skip any words that look malformed or concatenated
|
|
|
205 |
try {
|
206 |
const words = JSON.parse(content);
|
207 |
if (Array.isArray(words)) {
|
208 |
+
// Validate word lengths based on level
|
209 |
+
const validWords = words.filter(word => {
|
210 |
+
const cleanWord = word.replace(/[^a-zA-Z]/g, '');
|
211 |
+
if (level <= 2) {
|
212 |
+
return cleanWord.length >= 4 && cleanWord.length <= 7;
|
213 |
+
} else if (level <= 4) {
|
214 |
+
return cleanWord.length >= 4 && cleanWord.length <= 10;
|
215 |
+
} else {
|
216 |
+
return cleanWord.length >= 5 && cleanWord.length <= 14;
|
217 |
+
}
|
218 |
+
});
|
219 |
+
|
220 |
+
if (validWords.length > 0) {
|
221 |
+
console.log(`✅ Level ${level} word validation: ${validWords.length}/${words.length} words passed`);
|
222 |
+
return validWords.slice(0, count);
|
223 |
+
} else {
|
224 |
+
console.warn(`❌ Level ${level}: No words met length requirements, rejecting all`);
|
225 |
+
throw new Error(`No valid words for level ${level}`);
|
226 |
+
}
|
227 |
}
|
228 |
} catch (e) {
|
229 |
// If not valid JSON, try to extract words from the response
|
230 |
const matches = content.match(/"([^"]+)"/g);
|
231 |
if (matches) {
|
232 |
+
const words = matches.map(m => m.replace(/"/g, ''));
|
233 |
+
// Validate word lengths
|
234 |
+
const validWords = words.filter(word => {
|
235 |
+
const cleanWord = word.replace(/[^a-zA-Z]/g, '');
|
236 |
+
if (level <= 2) {
|
237 |
+
return cleanWord.length >= 4 && cleanWord.length <= 7;
|
238 |
+
} else if (level <= 4) {
|
239 |
+
return cleanWord.length >= 4 && cleanWord.length <= 10;
|
240 |
+
} else {
|
241 |
+
return cleanWord.length >= 5 && cleanWord.length <= 14;
|
242 |
+
}
|
243 |
+
});
|
244 |
+
|
245 |
+
if (validWords.length > 0) {
|
246 |
+
return validWords.slice(0, count);
|
247 |
+
} else {
|
248 |
+
throw new Error(`No valid words for level ${level}`);
|
249 |
+
}
|
250 |
}
|
251 |
}
|
252 |
|
|
|
258 |
}
|
259 |
}
|
260 |
|
261 |
+
async processBothPassages(passage1, book1, passage2, book2, blanksPerPassage, level = 1) {
|
262 |
// Process both passages in a single API call to avoid rate limits
|
263 |
const currentKey = this.getApiKey();
|
264 |
if (currentKey && !this.apiKey) {
|
|
|
269 |
throw new Error('API key required for passage processing');
|
270 |
}
|
271 |
|
272 |
+
// Define level-based constraints
|
273 |
+
let wordLengthConstraint, difficultyGuidance;
|
274 |
+
if (level <= 2) {
|
275 |
+
wordLengthConstraint = "EXACTLY 4-7 letters (no words longer than 7 letters)";
|
276 |
+
difficultyGuidance = "Select EASY vocabulary words - common, everyday words that most readers know. NEVER select words longer than 7 letters.";
|
277 |
+
} else if (level <= 4) {
|
278 |
+
wordLengthConstraint = "EXACTLY 4-10 letters (no words longer than 10 letters)";
|
279 |
+
difficultyGuidance = "Select MEDIUM difficulty words - mix of common and moderately challenging vocabulary. NEVER select words longer than 10 letters.";
|
280 |
+
} else {
|
281 |
+
wordLengthConstraint = "5-14 letters";
|
282 |
+
difficultyGuidance = "Select CHALLENGING words - sophisticated vocabulary that requires strong reading skills";
|
283 |
+
}
|
284 |
+
|
285 |
try {
|
286 |
const response = await fetch(this.apiUrl, {
|
287 |
method: 'POST',
|
|
|
297 |
role: 'user',
|
298 |
content: `You process passages for cloze reading exercises. For each passage: 1) Select words for blanks, 2) Generate a contextual introduction. Return a JSON object with both passages' data.
|
299 |
|
300 |
+
DIFFICULTY LEVEL ${level}:
|
301 |
+
${difficultyGuidance}
|
302 |
+
|
303 |
Process these two passages for cloze exercises:
|
304 |
|
305 |
PASSAGE 1:
|
|
|
314 |
|
315 |
SELECTION RULES:
|
316 |
- Select EXACTLY ${blanksPerPassage} word${blanksPerPassage > 1 ? 's' : ''} per passage, no more, no less
|
317 |
+
- Choose meaningful nouns, verbs, or adjectives (${wordLengthConstraint})
|
318 |
- Avoid capitalized words, ALL-CAPS words, and table of contents entries
|
319 |
- NEVER select words from the first or last sentence/clause of each passage
|
320 |
- Choose words from the middle portions for better context dependency
|
src/app.js
CHANGED
@@ -71,8 +71,7 @@ class App {
|
|
71 |
|
72 |
// Show level information without passage number
|
73 |
const blanksCount = roundData.blanks.length;
|
74 |
-
|
75 |
-
this.elements.roundInfo.innerHTML = `Level ${this.game.currentLevel} • ${blanksCount} blank${blanksCount > 1 ? 's' : ''} • ${difficultyText}`;
|
76 |
|
77 |
// Show contextualization from AI agent
|
78 |
this.elements.contextualization.innerHTML = `
|
|
|
71 |
|
72 |
// Show level information without passage number
|
73 |
const blanksCount = roundData.blanks.length;
|
74 |
+
this.elements.roundInfo.innerHTML = `Level ${this.game.currentLevel} • ${blanksCount} blank${blanksCount > 1 ? 's' : ''}`;
|
|
|
75 |
|
76 |
// Show contextualization from AI agent
|
77 |
this.elements.contextualization.innerHTML = `
|
src/bookDataService.js
CHANGED
@@ -19,60 +19,70 @@ class HuggingFaceDatasetService {
|
|
19 |
id: 1,
|
20 |
title: "Pride and Prejudice",
|
21 |
author: "Jane Austen",
|
|
|
22 |
text: "It is a truth universally acknowledged, that a single man in possession of a good fortune, must be in want of a wife. However little known the feelings or views of such a man may be on his first entering a neighbourhood, this truth is so well fixed in the minds of the surrounding families, that he is considered the rightful property of some one or other of their daughters. \"My dear Mr. Bennet,\" said his lady to him one day, \"have you heard that Netherfield Park is let at last?\" Mr. Bennet replied that he had not. \"But it is,\" returned she; \"for Mrs. Long has just been here, and she told me all about it.\" Mr. Bennet made no answer. \"Do you not want to know who has taken it?\" cried his wife impatiently. \"You want to tell me, and I have no objection to hearing it.\" This was invitation enough."
|
23 |
},
|
24 |
{
|
25 |
id: 2,
|
26 |
title: "The Adventures of Tom Sawyer",
|
27 |
author: "Mark Twain",
|
|
|
28 |
text: "\"Tom!\" No answer. \"Tom!\" No answer. \"What's gone with that boy, I wonder? You TOM!\" No answer. The old lady pulled her spectacles down and looked over them about the room; then she put them up and looked out under them. She seldom or never looked through them for so small a thing as a boy; they were her state pair, the pride of her heart, and were built for \"style,\" not service--she could have seen through a pair of stove-lids just as well. She looked perplexed for a moment, and then said, not fiercely, but still loud enough for the furniture to hear: \"Well, I lay if I get hold of you I'll--\""
|
29 |
},
|
30 |
{
|
31 |
id: 3,
|
32 |
title: "Great Expectations",
|
33 |
author: "Charles Dickens",
|
|
|
34 |
text: "My father's family name being Pirrip, and my Christian name Philip, my infant tongue could make of both names nothing longer or more explicit than Pip. So, I called myself Pip, and came to be called Pip. I give Pirrip as my father's family name, on the authority of his tombstone and my sister,--Mrs. Joe Gargery, who married the blacksmith. As I never saw my father or my mother, and never saw any likeness of them (for their days were long before the days of photographs), my first fancies regarding what they were like were unreasonably derived from their tombstones."
|
35 |
},
|
36 |
{
|
37 |
id: 4,
|
38 |
title: "Alice's Adventures in Wonderland",
|
39 |
author: "Lewis Carroll",
|
|
|
40 |
text: "Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do: once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, 'and what is the use of a book,' thought Alice 'without pictures or conversation?' So she was considering in her own mind (as well as she could, for the hot day made her feel very sleepy and stupid), whether the pleasure of making a daisy-chain would be worth the trouble of getting up and picking the daisies, when suddenly a White Rabbit with pink eyes ran close by her."
|
41 |
},
|
42 |
{
|
43 |
id: 5,
|
44 |
title: "The Picture of Dorian Gray",
|
45 |
author: "Oscar Wilde",
|
|
|
46 |
text: "The studio was filled with the rich odour of roses, and when the strong summer wind stirred, amidst the trees of the garden, there came through the open door the heavy scent of the lilac, or the more delicate perfume of the pink-flowering thorn. From the corner of the divan of Persian saddle-bags on which he was lying, smoking, as was his custom, innumerable cigarettes, Lord Henry Wotton could just catch the gleam of the honey-sweet and honey-coloured blossoms of a laburnum, whose tremulous branches seemed hardly able to bear the burden of a beauty so flamelike as theirs."
|
47 |
},
|
48 |
{
|
49 |
id: 6,
|
50 |
title: "Moby Dick",
|
51 |
author: "Herman Melville",
|
|
|
52 |
text: "Call me Ishmael. Some years ago—never mind how long precisely—having little or no money in my purse, and nothing particular to interest me on shore, I thought I would sail about a little and see the watery part of the world. It is a way I have of driving off the spleen and regulating the circulation. Whenever I find myself growing grim about the mouth; whenever it is a damp, drizzly November in my soul; whenever I find myself involuntarily pausing before coffin warehouses, and bringing up the rear of every funeral I meet; and especially whenever my hypos get such an upper hand of me, that it requires a strong moral principle to prevent me from deliberately stepping into the street, and methodically knocking people's hats off—then, I account it high time to get to sea as soon as possible."
|
53 |
},
|
54 |
{
|
55 |
id: 7,
|
56 |
title: "Jane Eyre",
|
57 |
author: "Charlotte Bronte",
|
|
|
58 |
text: "There was no possibility of taking a walk that day. We had been wandering, indeed, in the leafless shrubbery an hour in the morning; but since dinner (Mrs. Reed, when there was no company, dined early) the cold winter wind had brought with it clouds so sombre, and a rain so penetrating, that further out-door exercise was now out of the question. I was glad of it: I never liked long walks, especially on chilly afternoons: dreadful to me was the coming home in the raw twilight, with nipped fingers and toes, and a heart saddened by the chidings of Bessie, the nurse, and humbled by the consciousness of my physical inferiority to Eliza, John, and Georgiana Reed."
|
59 |
},
|
60 |
{
|
61 |
id: 8,
|
62 |
title: "The Count of Monte Cristo",
|
63 |
author: "Alexandre Dumas",
|
|
|
64 |
text: "On the first Monday of February, 1815, the watchtower at Marseilles signaled the arrival of the three-master Pharaon from Smyrna, Trieste, and Naples. As was customary, the pilot immediately left the port and steered toward the château d'If to conduct the ship through the narrow passage that leads to the harbor. However, a young sailor of about nineteen or twenty years, standing on the ship's bow, had signaled the pilot even before he had time to ask the traditional questions that are exchanged between the pilot and the captain. The young man had already assumed command, being the ship's owner and captain."
|
65 |
},
|
66 |
{
|
67 |
id: 9,
|
68 |
title: "Wuthering Heights",
|
69 |
author: "Emily Bronte",
|
|
|
70 |
text: "I have just returned from a visit to my landlord—the solitary neighbour that I shall be troubled with. This is certainly a beautiful country! In all England, I do not believe that I could have fixed on a situation so completely removed from the stir of society. A perfect misanthropist's Heaven: and Mr. Heathcliff and I are such a suitable pair to divide the desolation between us. A capital fellow! He little imagined how my heart warmed towards him when I beheld his black eyes withdraw so suspiciously under their brows, as I rode up, and when his fingers sheltered themselves, with a jealous resolution, still further in his waistcoat, as I announced my name."
|
71 |
},
|
72 |
{
|
73 |
id: 10,
|
74 |
title: "Frankenstein",
|
75 |
author: "Mary Shelley",
|
|
|
76 |
text: "It was on a dreary night of November that I beheld the accomplishment of my toils. With an anxiety that almost amounted to agony, I collected the instruments of life around me, that I might infuse a spark of being into the lifeless thing that lay at my feet. It was already one in the morning; the rain pattered dismally against the panes, and my candle was nearly burnt out, when, by the glimmer of the half-extinguished light, I saw the dull yellow eye of the creature open; it breathed hard, and a convulsive motion agitated its limbs. How can I describe my emotions at this catastrophe, or how delineate the wretch whom with such infinite pains and care I had endeavoured to form?"
|
77 |
}
|
78 |
];
|
@@ -85,7 +95,7 @@ class HuggingFaceDatasetService {
|
|
85 |
|
86 |
if (this.streamingEnabled) {
|
87 |
// Preload some books for immediate access
|
88 |
-
await this.preloadBooks(
|
89 |
console.log(`✅ HF Streaming enabled: ${this.preloadedBooks.length} books preloaded`);
|
90 |
} else {
|
91 |
// Fall back to local samples
|
@@ -126,12 +136,12 @@ class HuggingFaceDatasetService {
|
|
126 |
}
|
127 |
}
|
128 |
|
129 |
-
async preloadBooks(count =
|
130 |
if (!this.streamingEnabled) return;
|
131 |
|
132 |
try {
|
133 |
// Use random offset to avoid always getting the same books
|
134 |
-
const randomOffset = Math.floor(Math.random() * 1000);
|
135 |
const url = `${this.apiBase}/rows?dataset=${this.datasetName}&config=default&split=en&offset=${randomOffset}&length=${count}`;
|
136 |
const response = await fetch(url);
|
137 |
|
@@ -146,19 +156,18 @@ class HuggingFaceDatasetService {
|
|
146 |
|
147 |
console.log(`📥 Received ${data.rows.length} books from HF API`);
|
148 |
|
149 |
-
// Process and filter books
|
150 |
this.preloadedBooks = data.rows
|
151 |
.map(row => {
|
152 |
try {
|
153 |
-
return this.
|
154 |
} catch (e) {
|
155 |
console.warn('Error processing book:', e);
|
156 |
return null;
|
157 |
}
|
158 |
})
|
159 |
-
.filter(book => book
|
160 |
|
161 |
-
console.log(`📚 Preloaded ${this.preloadedBooks.length}
|
162 |
} else {
|
163 |
console.error(`HF API request failed: ${response.status} ${response.statusText}`);
|
164 |
}
|
@@ -167,15 +176,12 @@ class HuggingFaceDatasetService {
|
|
167 |
}
|
168 |
}
|
169 |
|
170 |
-
|
171 |
-
//
|
172 |
-
const
|
173 |
-
const cleanedText = this.cleanProjectGutenbergText(originalText);
|
174 |
|
175 |
-
//
|
176 |
-
const extractedMetadata = this.extractMetadata(
|
177 |
-
|
178 |
-
// Prioritize extracted metadata over potentially incorrect HF dataset fields
|
179 |
const title = extractedMetadata.title || rowData.title || 'Classic Literature';
|
180 |
const author = extractedMetadata.author || rowData.author || 'Unknown Author';
|
181 |
|
@@ -183,12 +189,40 @@ class HuggingFaceDatasetService {
|
|
183 |
id: rowData.id || Math.random().toString(36),
|
184 |
title: title,
|
185 |
author: author,
|
186 |
-
|
|
|
|
|
187 |
language: rowData.language || 'en',
|
188 |
-
source: 'project_gutenberg'
|
|
|
189 |
};
|
190 |
}
|
191 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
cleanProjectGutenbergText(text) {
|
193 |
if (!text) return '';
|
194 |
|
@@ -277,7 +311,7 @@ class HuggingFaceDatasetService {
|
|
277 |
}
|
278 |
|
279 |
extractMetadata(text) {
|
280 |
-
const metadata = { title: 'Classic Literature', author: 'Unknown Author' };
|
281 |
|
282 |
if (!text) return metadata;
|
283 |
|
@@ -316,6 +350,12 @@ class HuggingFaceDatasetService {
|
|
316 |
if (author && author.length > 1) {
|
317 |
metadata.author = this.cleanMetadataField(author);
|
318 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
319 |
}
|
320 |
}
|
321 |
|
@@ -329,6 +369,38 @@ class HuggingFaceDatasetService {
|
|
329 |
.trim();
|
330 |
}
|
331 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
332 |
isValidTitle(title) {
|
333 |
if (!title || title.length < 3 || title.length > 100) return false;
|
334 |
// Avoid fragments that are clearly not titles
|
@@ -387,6 +459,10 @@ class HuggingFaceDatasetService {
|
|
387 |
if (availableBooks.length > 0) {
|
388 |
const randomIndex = Math.floor(Math.random() * availableBooks.length);
|
389 |
book = availableBooks[randomIndex];
|
|
|
|
|
|
|
|
|
390 |
} else {
|
391 |
// All preloaded books used, try streaming
|
392 |
book = await this.getStreamingBook();
|
@@ -425,22 +501,27 @@ class HuggingFaceDatasetService {
|
|
425 |
// Use preloaded books for immediate access
|
426 |
if (this.preloadedBooks.length > 0) {
|
427 |
const randomIndex = Math.floor(Math.random() * this.preloadedBooks.length);
|
428 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
429 |
}
|
430 |
|
431 |
// If no preloaded books, try to fetch directly
|
432 |
try {
|
433 |
-
const offset = Math.floor(Math.random() * 1000);
|
434 |
const url = `${this.apiBase}/rows?dataset=${this.datasetName}&config=default&split=en&offset=${offset}&length=1`;
|
435 |
const response = await fetch(url);
|
436 |
|
437 |
if (response.ok) {
|
438 |
const data = await response.json();
|
439 |
if (data.rows && data.rows.length > 0) {
|
440 |
-
const book = this.
|
441 |
-
|
442 |
-
return book;
|
443 |
-
}
|
444 |
}
|
445 |
}
|
446 |
} catch (error) {
|
@@ -450,42 +531,68 @@ class HuggingFaceDatasetService {
|
|
450 |
return null;
|
451 |
}
|
452 |
|
453 |
-
async
|
454 |
-
|
455 |
-
|
456 |
-
|
457 |
-
|
458 |
-
|
459 |
-
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
// Filter preloaded books by difficulty
|
464 |
-
const suitable = this.preloadedBooks.filter(book =>
|
465 |
-
book.text.length >= range.min && book.text.length <= range.max
|
466 |
-
);
|
467 |
|
468 |
-
if (
|
469 |
-
const randomIndex = Math.floor(Math.random() *
|
470 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
471 |
}
|
472 |
}
|
|
|
|
|
|
|
473 |
|
474 |
-
|
475 |
-
const
|
476 |
-
|
477 |
-
|
478 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
479 |
|
480 |
-
|
481 |
-
|
482 |
-
|
|
|
|
|
|
|
|
|
|
|
483 |
}
|
484 |
|
485 |
-
|
486 |
-
return await this.getRandomBook();
|
487 |
}
|
488 |
|
|
|
489 |
getBookById(id) {
|
490 |
// Search in both preloaded and local books
|
491 |
const allBooks = [...this.preloadedBooks, ...this.books];
|
|
|
19 |
id: 1,
|
20 |
title: "Pride and Prejudice",
|
21 |
author: "Jane Austen",
|
22 |
+
year: 1813,
|
23 |
text: "It is a truth universally acknowledged, that a single man in possession of a good fortune, must be in want of a wife. However little known the feelings or views of such a man may be on his first entering a neighbourhood, this truth is so well fixed in the minds of the surrounding families, that he is considered the rightful property of some one or other of their daughters. \"My dear Mr. Bennet,\" said his lady to him one day, \"have you heard that Netherfield Park is let at last?\" Mr. Bennet replied that he had not. \"But it is,\" returned she; \"for Mrs. Long has just been here, and she told me all about it.\" Mr. Bennet made no answer. \"Do you not want to know who has taken it?\" cried his wife impatiently. \"You want to tell me, and I have no objection to hearing it.\" This was invitation enough."
|
24 |
},
|
25 |
{
|
26 |
id: 2,
|
27 |
title: "The Adventures of Tom Sawyer",
|
28 |
author: "Mark Twain",
|
29 |
+
year: 1876,
|
30 |
text: "\"Tom!\" No answer. \"Tom!\" No answer. \"What's gone with that boy, I wonder? You TOM!\" No answer. The old lady pulled her spectacles down and looked over them about the room; then she put them up and looked out under them. She seldom or never looked through them for so small a thing as a boy; they were her state pair, the pride of her heart, and were built for \"style,\" not service--she could have seen through a pair of stove-lids just as well. She looked perplexed for a moment, and then said, not fiercely, but still loud enough for the furniture to hear: \"Well, I lay if I get hold of you I'll--\""
|
31 |
},
|
32 |
{
|
33 |
id: 3,
|
34 |
title: "Great Expectations",
|
35 |
author: "Charles Dickens",
|
36 |
+
year: 1861,
|
37 |
text: "My father's family name being Pirrip, and my Christian name Philip, my infant tongue could make of both names nothing longer or more explicit than Pip. So, I called myself Pip, and came to be called Pip. I give Pirrip as my father's family name, on the authority of his tombstone and my sister,--Mrs. Joe Gargery, who married the blacksmith. As I never saw my father or my mother, and never saw any likeness of them (for their days were long before the days of photographs), my first fancies regarding what they were like were unreasonably derived from their tombstones."
|
38 |
},
|
39 |
{
|
40 |
id: 4,
|
41 |
title: "Alice's Adventures in Wonderland",
|
42 |
author: "Lewis Carroll",
|
43 |
+
year: 1865,
|
44 |
text: "Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do: once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, 'and what is the use of a book,' thought Alice 'without pictures or conversation?' So she was considering in her own mind (as well as she could, for the hot day made her feel very sleepy and stupid), whether the pleasure of making a daisy-chain would be worth the trouble of getting up and picking the daisies, when suddenly a White Rabbit with pink eyes ran close by her."
|
45 |
},
|
46 |
{
|
47 |
id: 5,
|
48 |
title: "The Picture of Dorian Gray",
|
49 |
author: "Oscar Wilde",
|
50 |
+
year: 1890,
|
51 |
text: "The studio was filled with the rich odour of roses, and when the strong summer wind stirred, amidst the trees of the garden, there came through the open door the heavy scent of the lilac, or the more delicate perfume of the pink-flowering thorn. From the corner of the divan of Persian saddle-bags on which he was lying, smoking, as was his custom, innumerable cigarettes, Lord Henry Wotton could just catch the gleam of the honey-sweet and honey-coloured blossoms of a laburnum, whose tremulous branches seemed hardly able to bear the burden of a beauty so flamelike as theirs."
|
52 |
},
|
53 |
{
|
54 |
id: 6,
|
55 |
title: "Moby Dick",
|
56 |
author: "Herman Melville",
|
57 |
+
year: 1851,
|
58 |
text: "Call me Ishmael. Some years ago—never mind how long precisely—having little or no money in my purse, and nothing particular to interest me on shore, I thought I would sail about a little and see the watery part of the world. It is a way I have of driving off the spleen and regulating the circulation. Whenever I find myself growing grim about the mouth; whenever it is a damp, drizzly November in my soul; whenever I find myself involuntarily pausing before coffin warehouses, and bringing up the rear of every funeral I meet; and especially whenever my hypos get such an upper hand of me, that it requires a strong moral principle to prevent me from deliberately stepping into the street, and methodically knocking people's hats off—then, I account it high time to get to sea as soon as possible."
|
59 |
},
|
60 |
{
|
61 |
id: 7,
|
62 |
title: "Jane Eyre",
|
63 |
author: "Charlotte Bronte",
|
64 |
+
year: 1847,
|
65 |
text: "There was no possibility of taking a walk that day. We had been wandering, indeed, in the leafless shrubbery an hour in the morning; but since dinner (Mrs. Reed, when there was no company, dined early) the cold winter wind had brought with it clouds so sombre, and a rain so penetrating, that further out-door exercise was now out of the question. I was glad of it: I never liked long walks, especially on chilly afternoons: dreadful to me was the coming home in the raw twilight, with nipped fingers and toes, and a heart saddened by the chidings of Bessie, the nurse, and humbled by the consciousness of my physical inferiority to Eliza, John, and Georgiana Reed."
|
66 |
},
|
67 |
{
|
68 |
id: 8,
|
69 |
title: "The Count of Monte Cristo",
|
70 |
author: "Alexandre Dumas",
|
71 |
+
year: 1844,
|
72 |
text: "On the first Monday of February, 1815, the watchtower at Marseilles signaled the arrival of the three-master Pharaon from Smyrna, Trieste, and Naples. As was customary, the pilot immediately left the port and steered toward the château d'If to conduct the ship through the narrow passage that leads to the harbor. However, a young sailor of about nineteen or twenty years, standing on the ship's bow, had signaled the pilot even before he had time to ask the traditional questions that are exchanged between the pilot and the captain. The young man had already assumed command, being the ship's owner and captain."
|
73 |
},
|
74 |
{
|
75 |
id: 9,
|
76 |
title: "Wuthering Heights",
|
77 |
author: "Emily Bronte",
|
78 |
+
year: 1847,
|
79 |
text: "I have just returned from a visit to my landlord—the solitary neighbour that I shall be troubled with. This is certainly a beautiful country! In all England, I do not believe that I could have fixed on a situation so completely removed from the stir of society. A perfect misanthropist's Heaven: and Mr. Heathcliff and I are such a suitable pair to divide the desolation between us. A capital fellow! He little imagined how my heart warmed towards him when I beheld his black eyes withdraw so suspiciously under their brows, as I rode up, and when his fingers sheltered themselves, with a jealous resolution, still further in his waistcoat, as I announced my name."
|
80 |
},
|
81 |
{
|
82 |
id: 10,
|
83 |
title: "Frankenstein",
|
84 |
author: "Mary Shelley",
|
85 |
+
year: 1818,
|
86 |
text: "It was on a dreary night of November that I beheld the accomplishment of my toils. With an anxiety that almost amounted to agony, I collected the instruments of life around me, that I might infuse a spark of being into the lifeless thing that lay at my feet. It was already one in the morning; the rain pattered dismally against the panes, and my candle was nearly burnt out, when, by the glimmer of the half-extinguished light, I saw the dull yellow eye of the creature open; it breathed hard, and a convulsive motion agitated its limbs. How can I describe my emotions at this catastrophe, or how delineate the wretch whom with such infinite pains and care I had endeavoured to form?"
|
87 |
}
|
88 |
];
|
|
|
95 |
|
96 |
if (this.streamingEnabled) {
|
97 |
// Preload some books for immediate access
|
98 |
+
await this.preloadBooks(5);
|
99 |
console.log(`✅ HF Streaming enabled: ${this.preloadedBooks.length} books preloaded`);
|
100 |
} else {
|
101 |
// Fall back to local samples
|
|
|
136 |
}
|
137 |
}
|
138 |
|
139 |
+
async preloadBooks(count = 5) {
|
140 |
if (!this.streamingEnabled) return;
|
141 |
|
142 |
try {
|
143 |
// Use random offset to avoid always getting the same books
|
144 |
+
const randomOffset = Math.floor(Math.random() * 1000);
|
145 |
const url = `${this.apiBase}/rows?dataset=${this.datasetName}&config=default&split=en&offset=${randomOffset}&length=${count}`;
|
146 |
const response = await fetch(url);
|
147 |
|
|
|
156 |
|
157 |
console.log(`📥 Received ${data.rows.length} books from HF API`);
|
158 |
|
|
|
159 |
this.preloadedBooks = data.rows
|
160 |
.map(row => {
|
161 |
try {
|
162 |
+
return this.processHFBookLazy(row.row);
|
163 |
} catch (e) {
|
164 |
console.warn('Error processing book:', e);
|
165 |
return null;
|
166 |
}
|
167 |
})
|
168 |
+
.filter(book => book !== null);
|
169 |
|
170 |
+
console.log(`📚 Preloaded ${this.preloadedBooks.length} books (lazy validation)`);
|
171 |
} else {
|
172 |
console.error(`HF API request failed: ${response.status} ${response.statusText}`);
|
173 |
}
|
|
|
176 |
}
|
177 |
}
|
178 |
|
179 |
+
processHFBookLazy(rowData) {
|
180 |
+
// Minimal processing - defer text cleaning and validation until book is selected
|
181 |
+
const rawText = rowData.text || '';
|
|
|
182 |
|
183 |
+
// Do basic metadata extraction to get proper title/author
|
184 |
+
const extractedMetadata = this.extractMetadata(rawText);
|
|
|
|
|
185 |
const title = extractedMetadata.title || rowData.title || 'Classic Literature';
|
186 |
const author = extractedMetadata.author || rowData.author || 'Unknown Author';
|
187 |
|
|
|
189 |
id: rowData.id || Math.random().toString(36),
|
190 |
title: title,
|
191 |
author: author,
|
192 |
+
year: extractedMetadata.year, // Extract year during lazy processing
|
193 |
+
rawText: rawText,
|
194 |
+
text: null, // Will clean when needed
|
195 |
language: rowData.language || 'en',
|
196 |
+
source: 'project_gutenberg',
|
197 |
+
processed: false
|
198 |
};
|
199 |
}
|
200 |
|
201 |
+
async processBookOnDemand(book) {
|
202 |
+
if (book.processed) return book;
|
203 |
+
|
204 |
+
console.log(`🔄 Processing "${book.title}" on demand...`);
|
205 |
+
const startTime = Date.now();
|
206 |
+
|
207 |
+
// Clean text and extract metadata when actually needed
|
208 |
+
const cleanedText = this.cleanProjectGutenbergText(book.rawText);
|
209 |
+
const extractedMetadata = this.extractMetadata(book.rawText);
|
210 |
+
|
211 |
+
book.text = cleanedText;
|
212 |
+
book.year = extractedMetadata.year || this.estimatePublicationYear(book.title, book.author);
|
213 |
+
book.processed = true;
|
214 |
+
|
215 |
+
// Validate after processing
|
216 |
+
if (!this.isValidForCloze(book)) {
|
217 |
+
console.log(`❌ "${book.title}" failed validation after ${Date.now() - startTime}ms`);
|
218 |
+
return null;
|
219 |
+
}
|
220 |
+
|
221 |
+
console.log(`✅ "${book.title}" processed in ${Date.now() - startTime}ms`);
|
222 |
+
return book;
|
223 |
+
}
|
224 |
+
|
225 |
+
|
226 |
cleanProjectGutenbergText(text) {
|
227 |
if (!text) return '';
|
228 |
|
|
|
311 |
}
|
312 |
|
313 |
extractMetadata(text) {
|
314 |
+
const metadata = { title: 'Classic Literature', author: 'Unknown Author', year: null };
|
315 |
|
316 |
if (!text) return metadata;
|
317 |
|
|
|
350 |
if (author && author.length > 1) {
|
351 |
metadata.author = this.cleanMetadataField(author);
|
352 |
}
|
353 |
+
} else if (line.includes('Release Date:')) {
|
354 |
+
// Try to extract year from release date
|
355 |
+
const yearMatch = line.match(/\b(1[789]\d\d|20[012]\d)\b/);
|
356 |
+
if (yearMatch) {
|
357 |
+
metadata.year = parseInt(yearMatch[1]);
|
358 |
+
}
|
359 |
}
|
360 |
}
|
361 |
|
|
|
369 |
.trim();
|
370 |
}
|
371 |
|
372 |
+
estimatePublicationYear(title, author) {
|
373 |
+
// Return null to indicate unknown year rather than guessing
|
374 |
+
// This allows for truly random selection without bias
|
375 |
+
return null;
|
376 |
+
}
|
377 |
+
|
378 |
+
extractPublicationPeriod(text) {
|
379 |
+
// Look for publication year clues in the text itself
|
380 |
+
if (!text) return null;
|
381 |
+
|
382 |
+
// Check first 200 lines for copyright or publication information
|
383 |
+
const lines = text.split('\n').slice(0, 200);
|
384 |
+
const textSnippet = lines.join(' ');
|
385 |
+
|
386 |
+
// Look for explicit year mentions in copyright notices or metadata
|
387 |
+
const yearPatterns = [
|
388 |
+
/(?:copyright|©|published|publication date)[:\s]+.*?\b(1[6-9]\d{2}|20[0-2]\d)\b/i,
|
389 |
+
/\b(1[6-9]\d{2}|20[0-2]\d)\b[,\s]+by\s+/i,
|
390 |
+
/first published[:\s]+.*?\b(1[6-9]\d{2}|20[0-2]\d)\b/i,
|
391 |
+
/originally published[:\s]+.*?\b(1[6-9]\d{2}|20[0-2]\d)\b/i
|
392 |
+
];
|
393 |
+
|
394 |
+
for (const pattern of yearPatterns) {
|
395 |
+
const match = textSnippet.match(pattern);
|
396 |
+
if (match) {
|
397 |
+
return parseInt(match[1]);
|
398 |
+
}
|
399 |
+
}
|
400 |
+
|
401 |
+
return null;
|
402 |
+
}
|
403 |
+
|
404 |
isValidTitle(title) {
|
405 |
if (!title || title.length < 3 || title.length > 100) return false;
|
406 |
// Avoid fragments that are clearly not titles
|
|
|
459 |
if (availableBooks.length > 0) {
|
460 |
const randomIndex = Math.floor(Math.random() * availableBooks.length);
|
461 |
book = availableBooks[randomIndex];
|
462 |
+
|
463 |
+
// Process book on demand
|
464 |
+
book = await this.processBookOnDemand(book);
|
465 |
+
if (!book) continue; // Book failed validation, try next
|
466 |
} else {
|
467 |
// All preloaded books used, try streaming
|
468 |
book = await this.getStreamingBook();
|
|
|
501 |
// Use preloaded books for immediate access
|
502 |
if (this.preloadedBooks.length > 0) {
|
503 |
const randomIndex = Math.floor(Math.random() * this.preloadedBooks.length);
|
504 |
+
let book = this.preloadedBooks[randomIndex];
|
505 |
+
|
506 |
+
// Process on demand if needed
|
507 |
+
if (!book.processed) {
|
508 |
+
book = await this.processBookOnDemand(book);
|
509 |
+
}
|
510 |
+
|
511 |
+
return book;
|
512 |
}
|
513 |
|
514 |
// If no preloaded books, try to fetch directly
|
515 |
try {
|
516 |
+
const offset = Math.floor(Math.random() * 1000);
|
517 |
const url = `${this.apiBase}/rows?dataset=${this.datasetName}&config=default&split=en&offset=${offset}&length=1`;
|
518 |
const response = await fetch(url);
|
519 |
|
520 |
if (response.ok) {
|
521 |
const data = await response.json();
|
522 |
if (data.rows && data.rows.length > 0) {
|
523 |
+
const book = this.processHFBookLazy(data.rows[0].row);
|
524 |
+
return await this.processBookOnDemand(book);
|
|
|
|
|
525 |
}
|
526 |
}
|
527 |
} catch (error) {
|
|
|
531 |
return null;
|
532 |
}
|
533 |
|
534 |
+
async getBookByLevelCriteria(level) {
|
535 |
+
let targetPeriod = null;
|
536 |
+
if (level <= 2) {
|
537 |
+
targetPeriod = { min: 1850, max: 1925 };
|
538 |
+
} else if (level <= 4) {
|
539 |
+
targetPeriod = { min: 1800, max: 1899 };
|
540 |
+
}
|
541 |
+
|
542 |
+
if (targetPeriod) {
|
543 |
+
const periodBooks = await this.getBooksByPeriod(targetPeriod.min, targetPeriod.max);
|
|
|
|
|
|
|
|
|
544 |
|
545 |
+
if (periodBooks.length > 0) {
|
546 |
+
const randomIndex = Math.floor(Math.random() * periodBooks.length);
|
547 |
+
let book = periodBooks[randomIndex];
|
548 |
+
|
549 |
+
if (book.source === 'project_gutenberg' && !book.processed) {
|
550 |
+
book = await this.processBookOnDemand(book);
|
551 |
+
if (!book) {
|
552 |
+
return await this.getRandomBook();
|
553 |
+
}
|
554 |
+
}
|
555 |
+
|
556 |
+
return book;
|
557 |
}
|
558 |
}
|
559 |
+
|
560 |
+
return await this.getRandomBook();
|
561 |
+
}
|
562 |
|
563 |
+
async getBooksByPeriod(minYear, maxYear) {
|
564 |
+
const matchingBooks = [];
|
565 |
+
|
566 |
+
if (this.streamingEnabled && this.preloadedBooks.length > 0) {
|
567 |
+
for (const book of this.preloadedBooks) {
|
568 |
+
if (!this.usedBooks.has(this.getBookId(book))) {
|
569 |
+
let year = book.year;
|
570 |
+
if (!year && book.rawText) {
|
571 |
+
year = this.extractPublicationPeriod(book.rawText);
|
572 |
+
book.year = year;
|
573 |
+
}
|
574 |
+
|
575 |
+
if (year && year >= minYear && year <= maxYear) {
|
576 |
+
matchingBooks.push(book);
|
577 |
+
}
|
578 |
+
}
|
579 |
+
}
|
580 |
+
}
|
581 |
|
582 |
+
const fallbackBooks = this.books.length > 0 ? this.books : this.getSampleBooks();
|
583 |
+
for (const book of fallbackBooks) {
|
584 |
+
if (!this.usedBooks.has(this.getBookId(book))) {
|
585 |
+
const year = book.year || this.extractPublicationPeriod(book.text);
|
586 |
+
if (year && year >= minYear && year <= maxYear) {
|
587 |
+
matchingBooks.push(book);
|
588 |
+
}
|
589 |
+
}
|
590 |
}
|
591 |
|
592 |
+
return matchingBooks;
|
|
|
593 |
}
|
594 |
|
595 |
+
|
596 |
getBookById(id) {
|
597 |
// Search in both preloaded and local books
|
598 |
const allBooks = [...this.preloadedBooks, ...this.books];
|
src/clozeGameEngine.js
CHANGED
@@ -39,9 +39,9 @@ class ClozeGame {
|
|
39 |
|
40 |
async startNewRound() {
|
41 |
try {
|
42 |
-
// Get two
|
43 |
-
const book1 = await bookDataService.
|
44 |
-
const book2 = await bookDataService.
|
45 |
|
46 |
// Extract passages from both books
|
47 |
const passage1 = this.extractCoherentPassage(book1.text);
|
|
|
39 |
|
40 |
async startNewRound() {
|
41 |
try {
|
42 |
+
// Get two books for this round based on current level criteria
|
43 |
+
const book1 = await bookDataService.getBookByLevelCriteria(this.currentLevel);
|
44 |
+
const book2 = await bookDataService.getBookByLevelCriteria(this.currentLevel);
|
45 |
|
46 |
// Extract passages from both books
|
47 |
const passage1 = this.extractCoherentPassage(book1.text);
|