Spaces:
Running
Running
fix: resolve HF dataset loading errors and improve word selection
Browse files- Reduce API request size from 100 to 20 to avoid TooBigContentError
- Lower random offset range to stay within dataset bounds
- Improve AI word selection prompt to avoid function words
- Explicitly exclude articles, prepositions, conjunctions from selection
- Require 4+ letter content words (nouns, verbs, adjectives)
- src/aiService.js +1 -1
- src/bookDataService.js +5 -5
src/aiService.js
CHANGED
@@ -112,7 +112,7 @@ class OpenRouterService {
|
|
112 |
content: 'Select words for cloze reading exercises. Choose common, everyday words that students know. Avoid proper nouns (names, places), technical terms, archaic words, and words over 8 letters. Pick words students can guess from surrounding context.'
|
113 |
}, {
|
114 |
role: 'user',
|
115 |
-
content: `Select exactly ${count}
|
116 |
|
117 |
Passage: "${passage}"`
|
118 |
}],
|
|
|
112 |
content: 'Select words for cloze reading exercises. Choose common, everyday words that students know. Avoid proper nouns (names, places), technical terms, archaic words, and words over 8 letters. Pick words students can guess from surrounding context.'
|
113 |
}, {
|
114 |
role: 'user',
|
115 |
+
content: `Select exactly ${count} meaningful content words for a cloze exercise. Choose nouns, verbs, adjectives that are 4+ letters long and important to meaning. NEVER select: articles (a, an, the), prepositions (in, on, at, to, for, of, with, by, from), conjunctions (and, or, but), pronouns (I, you, he, she, it, they), or auxiliary verbs (is, are, was, were, have, has, had). Return ONLY a JSON array of words.
|
116 |
|
117 |
Passage: "${passage}"`
|
118 |
}],
|
src/bookDataService.js
CHANGED
@@ -85,7 +85,7 @@ class HuggingFaceDatasetService {
|
|
85 |
|
86 |
if (this.streamingEnabled) {
|
87 |
// Preload some books for immediate access
|
88 |
-
await this.preloadBooks(
|
89 |
console.log(`β
HF Streaming enabled: ${this.preloadedBooks.length} books preloaded`);
|
90 |
} else {
|
91 |
// Fall back to local samples
|
@@ -126,12 +126,12 @@ class HuggingFaceDatasetService {
|
|
126 |
}
|
127 |
}
|
128 |
|
129 |
-
async preloadBooks(count =
|
130 |
if (!this.streamingEnabled) return;
|
131 |
|
132 |
try {
|
133 |
// Use random offset to avoid always getting the same books
|
134 |
-
const randomOffset = Math.floor(Math.random() *
|
135 |
const url = `${this.apiBase}/rows?dataset=${this.datasetName}&config=default&split=en&offset=${randomOffset}&length=${count}`;
|
136 |
const response = await fetch(url);
|
137 |
|
@@ -412,7 +412,7 @@ class HuggingFaceDatasetService {
|
|
412 |
|
413 |
// If no preloaded books, try to fetch directly
|
414 |
try {
|
415 |
-
const offset = Math.floor(Math.random() *
|
416 |
const url = `${this.apiBase}/rows?dataset=${this.datasetName}&config=default&split=en&offset=${offset}&length=1`;
|
417 |
const response = await fetch(url);
|
418 |
|
@@ -499,7 +499,7 @@ class HuggingFaceDatasetService {
|
|
499 |
// Refresh preloaded books cache
|
500 |
async refreshCache() {
|
501 |
if (this.streamingEnabled) {
|
502 |
-
await this.preloadBooks(
|
503 |
console.log(`π Cache refreshed: ${this.preloadedBooks.length} books`);
|
504 |
}
|
505 |
}
|
|
|
85 |
|
86 |
if (this.streamingEnabled) {
|
87 |
// Preload some books for immediate access
|
88 |
+
await this.preloadBooks(20);
|
89 |
console.log(`β
HF Streaming enabled: ${this.preloadedBooks.length} books preloaded`);
|
90 |
} else {
|
91 |
// Fall back to local samples
|
|
|
126 |
}
|
127 |
}
|
128 |
|
129 |
+
async preloadBooks(count = 20) {
|
130 |
if (!this.streamingEnabled) return;
|
131 |
|
132 |
try {
|
133 |
// Use random offset to avoid always getting the same books
|
134 |
+
const randomOffset = Math.floor(Math.random() * 1000); // Smaller random range
|
135 |
const url = `${this.apiBase}/rows?dataset=${this.datasetName}&config=default&split=en&offset=${randomOffset}&length=${count}`;
|
136 |
const response = await fetch(url);
|
137 |
|
|
|
412 |
|
413 |
// If no preloaded books, try to fetch directly
|
414 |
try {
|
415 |
+
const offset = Math.floor(Math.random() * 1000); // Smaller random range
|
416 |
const url = `${this.apiBase}/rows?dataset=${this.datasetName}&config=default&split=en&offset=${offset}&length=1`;
|
417 |
const response = await fetch(url);
|
418 |
|
|
|
499 |
// Refresh preloaded books cache
|
500 |
async refreshCache() {
|
501 |
if (this.streamingEnabled) {
|
502 |
+
await this.preloadBooks(20);
|
503 |
console.log(`π Cache refreshed: ${this.preloadedBooks.length} books`);
|
504 |
}
|
505 |
}
|