Memoroeisdead commited on
Commit
1ebfec3
·
verified ·
1 Parent(s): cb29e60

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +234 -450
app.py CHANGED
@@ -1,10 +1,6 @@
1
- def load_core_datasets():
2
- try:
3
- if not DATASETS_AVAILABLE:
4
- self.loading_status = "⚠️"""
5
- BILLION DOLLAR EDUCATION AI - GLOBAL SCALE
6
- The ChatGPT for Education - Optimized for Speed, Quality & Scalability
7
- Serving millions of students, teachers, and lifelong learners worldwide
8
  """
9
 
10
  import gradio as gr
@@ -13,340 +9,211 @@ import json
13
  import random
14
  import threading
15
  import time
16
- import hashlib
17
- from typing import Dict, List, Optional
18
- import asyncio
19
- from concurrent.futures import ThreadPoolExecutor
20
 
21
- # Import datasets library (ensure no circular import)
22
  try:
23
  from datasets import load_dataset
24
  DATASETS_AVAILABLE = True
25
- except ImportError as e:
26
- print(f"Datasets library not available: {e}")
27
  DATASETS_AVAILABLE = False
28
  def load_dataset(*args, **kwargs):
29
  return []
30
 
31
- class GlobalEducationAI:
32
  def __init__(self):
33
- # Production API configuration
34
  self.groq_api_key = "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"
35
  self.groq_url = "https://api.groq.com/openai/v1/chat/completions"
36
 
37
- # Scalable dataset architecture
38
  self.datasets = {}
39
- self.example_cache = {} # Speed optimization
40
- self.loading_status = "🚀 Initializing Global Education AI..."
41
- self.total_examples_loaded = 0
42
 
43
- # Multi-language support for global reach
44
- self.supported_languages = ["English", "Spanish", "French", "German", "Chinese", "Japanese"]
45
-
46
- # User analytics for billion-dollar insights
47
- self.usage_stats = {
48
  "total_queries": 0,
49
- "subject_distribution": {},
50
- "difficulty_preferences": {},
51
  "response_times": []
52
  }
53
 
54
- # Start optimized dataset loading
55
- self.initialize_datasets()
56
 
57
- def initialize_datasets(self):
58
- """Optimized parallel dataset loading for global scale"""
59
- def load_core_datasets():
60
  try:
61
- self.loading_status = "🔥 Loading Core Math Datasets (Competition + Problem Solving)..."
 
 
 
 
 
62
 
63
- # TIER 1: GSM8K (Reliable and high-quality)
64
  try:
65
- gsm8k = load_dataset("gsm8k", "main", split="train[:3000]", trust_remote_code=True)
66
- self.datasets['gsm8k'] = self.optimize_dataset(gsm8k, 'math_practical')
 
67
  print("✅ GSM8K loaded successfully")
68
  except Exception as e:
69
  print(f"GSM8K error: {e}")
70
 
71
- # TIER 2: Try MATH dataset (alternative to competition_math)
72
- try:
73
- math_dataset = load_dataset("lighteval/MATH", split="train[:1000]", trust_remote_code=True)
74
- self.datasets['math_competition'] = self.optimize_dataset(math_dataset, 'math_competition')
75
- print("✅ MATH dataset loaded successfully")
76
- except Exception as e:
77
- print(f"MATH dataset error: {e}")
78
-
79
- self.loading_status = "🧪 Loading Science & Reasoning Datasets..."
80
 
81
- # TIER 3: Science QA (Try alternative datasets)
82
  try:
83
- science_qa = load_dataset("sciq", split="train[:1000]", trust_remote_code=True)
84
- self.datasets['science_qa'] = self.optimize_dataset(science_qa, 'science')
 
85
  print("✅ SciQ loaded successfully")
86
  except Exception as e:
87
  print(f"SciQ error: {e}")
88
 
89
- # TIER 4: Try Arc dataset
90
- try:
91
- arc_dataset = load_dataset("allenai/ai2_arc", "ARC-Easy", split="train[:500]", trust_remote_code=True)
92
- self.datasets['arc'] = self.optimize_dataset(arc_dataset, 'science')
93
- print("✅ ARC dataset loaded successfully")
94
- except Exception as e:
95
- print(f"ARC error: {e}")
96
-
97
- # Create optimized example cache
98
- self.create_example_cache()
99
- self.total_examples_loaded = sum(len(cache) for cache in self.example_cache.values())
100
 
101
- if self.datasets:
102
- self.loading_status = f"✅ GLOBAL EDUCATION AI READY - {len(self.datasets)} datasets, {self.total_examples_loaded:,} examples"
103
- print(f"🌍 Global Education AI initialized with {self.total_examples_loaded:,} premium examples")
104
  else:
105
- # Fallback mode with high-quality prompts
106
- self.loading_status = "✅ AI READY - Premium prompts active (datasets unavailable)"
107
  self.create_fallback_examples()
108
- print("🌍 Running in premium prompt mode")
 
109
 
110
  except Exception as e:
111
- self.loading_status = f"✅ AI READY - Premium mode active"
112
  self.create_fallback_examples()
113
- print(f"Dataset loading info: {e}")
114
 
115
- # Load datasets in background for instant user experience
116
- thread = threading.Thread(target=load_core_datasets)
117
  thread.daemon = True
118
  thread.start()
119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  def create_fallback_examples(self):
121
- """Create high-quality fallback examples when datasets unavailable"""
122
- self.example_cache = {
123
- 'mathematics': [
124
  {
125
- 'question': 'Solve the quadratic equation x² + 5x + 6 = 0',
126
  'solution': 'Factor: (x + 2)(x + 3) = 0, so x = -2 or x = -3',
127
- 'type': 'algebra',
128
- 'difficulty': 'intermediate',
129
- 'category': 'mathematics',
130
- 'quality_score': 9
 
 
131
  }
132
  ],
133
  'science': [
134
  {
135
  'question': 'Why do ice cubes float in water?',
136
  'solution': 'Ice is less dense than liquid water due to its crystalline structure',
137
- 'type': 'physics',
138
- 'difficulty': 'beginner',
139
- 'category': 'science',
140
- 'quality_score': 8
 
 
141
  }
142
- ],
143
- 'general': []
144
- }
145
- self.total_examples_loaded = 10 # Fallback count
146
-
147
- # Load datasets in background for instant user experience
148
- thread = threading.Thread(target=load_core_datasets)
149
- thread.daemon = True
150
- thread.start()
151
-
152
- def optimize_dataset(self, dataset, category: str) -> List[Dict]:
153
- """Optimize dataset for speed and quality"""
154
- optimized = []
155
-
156
- for item in dataset:
157
- try:
158
- if category == 'math_competition':
159
- if item.get('problem') and item.get('solution') and len(item['problem']) > 20:
160
- optimized.append({
161
- 'question': item['problem'],
162
- 'solution': item['solution'],
163
- 'type': item.get('type', 'competition_math'),
164
- 'difficulty': 'competition',
165
- 'category': 'mathematics',
166
- 'quality_score': 10
167
- })
168
-
169
- elif category == 'math_practical':
170
- if item.get('question') and item.get('answer'):
171
- optimized.append({
172
- 'question': item['question'],
173
- 'solution': item['answer'],
174
- 'type': 'word_problem',
175
- 'difficulty': 'intermediate',
176
- 'category': 'mathematics',
177
- 'quality_score': 9
178
- })
179
-
180
- elif category == 'science':
181
- if item.get('question') and item.get('correct_answer'):
182
- # Handle SciQ format
183
- optimized.append({
184
- 'question': item['question'],
185
- 'solution': item['correct_answer'],
186
- 'type': 'science_qa',
187
- 'difficulty': 'intermediate',
188
- 'category': 'science',
189
- 'quality_score': 8
190
- })
191
- elif item.get('question_stem') and item.get('choices'):
192
- # Handle ARC format
193
- choices_text = "\n".join([f"{choice['label']}) {choice['text']}" for choice in item['choices']['text']])
194
- optimized.append({
195
- 'question': f"{item['question_stem']}\n\n{choices_text}",
196
- 'solution': f"Answer: {item['answerKey']}",
197
- 'type': 'science_reasoning',
198
- 'difficulty': 'intermediate',
199
- 'category': 'science',
200
- 'quality_score': 8
201
- })
202
-
203
- elif category in ['math_advanced', 'academic']:
204
- if item.get('question') and item.get('answer'):
205
- optimized.append({
206
- 'question': str(item['question'])[:500], # Truncate for speed
207
- 'solution': str(item['answer'])[:500],
208
- 'type': category,
209
- 'difficulty': 'advanced',
210
- 'category': 'general',
211
- 'quality_score': 8
212
- })
213
-
214
- except Exception:
215
- continue # Skip malformed entries
216
-
217
- return optimized[:500] # Limit for speed while maintaining quality
218
-
219
- def create_example_cache(self):
220
- """Create subject-specific example cache for instant retrieval"""
221
- self.example_cache = {
222
- 'mathematics': [],
223
- 'science': [],
224
- 'physics': [],
225
- 'chemistry': [],
226
- 'biology': [],
227
- 'general': [],
228
- 'competition': []
229
  }
230
-
231
- for dataset_name, examples in self.datasets.items():
232
- for example in examples:
233
- category = example.get('category', 'general')
234
- if category in self.example_cache:
235
- self.example_cache[category].append(example)
236
-
237
- # Add high-quality examples to general cache
238
- if example.get('quality_score', 0) >= 9:
239
- self.example_cache['general'].append(example)
240
-
241
- # Add competition problems to competition cache
242
- if example.get('difficulty') == 'competition':
243
- self.example_cache['competition'].append(example)
244
 
245
- def get_optimal_examples(self, question: str, subject: str, difficulty: str, num_examples: int = 2) -> List[Dict]:
246
- """Lightning-fast example retrieval optimized for quality and relevance"""
247
- # Map user inputs to cache categories
248
- subject_mapping = {
249
- 'mathematics': 'mathematics',
250
- 'math': 'mathematics',
251
  'science': 'science',
252
  'physics': 'science',
253
  'chemistry': 'science',
254
- 'biology': 'science',
255
- 'general': 'general'
256
  }
257
 
258
- target_subject = subject_mapping.get(subject.lower(), 'general')
259
-
260
- # Get examples from cache (instant retrieval)
261
- available_examples = self.example_cache.get(target_subject, [])
262
-
263
- # If requesting competition difficulty, prioritize competition examples
264
- if difficulty == 'competition' and self.example_cache.get('competition'):
265
- available_examples = self.example_cache['competition'][:50]
266
-
267
- # Filter by difficulty if possible
268
- if difficulty in ['advanced', 'competition']:
269
- filtered = [ex for ex in available_examples if ex.get('difficulty') in ['advanced', 'competition']]
270
- if filtered:
271
- available_examples = filtered
272
 
273
- # Return best examples (random selection for variety)
274
- if available_examples:
275
- return random.sample(available_examples, min(num_examples, len(available_examples)))
276
-
277
- # Fallback to general examples
278
- return random.sample(self.example_cache.get('general', []), min(num_examples, len(self.example_cache.get('general', []))))
279
 
280
- def create_premium_prompt(self, question: str, subject: str, difficulty: str) -> str:
281
- """Create optimized prompt with premium dataset examples"""
282
- examples = self.get_optimal_examples(question, subject, difficulty)
283
-
284
- if not examples:
285
- return f"You are an expert {subject} tutor. Provide detailed, step-by-step solutions."
286
-
287
- # Build efficient few-shot prompt
288
- prompt = f"""You are a world-class {subject} educator with access to premium educational datasets.
289
-
290
- TEACHING METHODOLOGY: Use these high-quality examples from educational datasets as your guide:
291
-
292
- """
293
 
294
- for i, example in enumerate(examples, 1):
295
- prompt += f"EXAMPLE {i} ({example.get('type', 'academic')}):\n"
296
- prompt += f"Q: {example['question'][:250]}{'...' if len(example['question']) > 250 else ''}\n"
297
- prompt += f"A: {example['solution'][:250]}{'...' if len(example['solution']) > 250 else ''}\n\n"
298
 
299
- prompt += f"""INSTRUCTION: Use the same rigorous, step-by-step approach shown above.
300
- - Subject Focus: {subject}
301
- - Difficulty Level: {difficulty}
302
- - Always show your work and explain reasoning
303
- - Make complex concepts accessible
304
- - Provide practical applications when relevant
305
-
306
- Now solve the student's question:"""
307
 
 
308
  return prompt
309
 
310
- def educate(self, question: str, subject: str = "general", difficulty: str = "intermediate", language: str = "English") -> str:
311
- """Main education function optimized for global scale and speed"""
312
-
313
- # Analytics tracking for billion-dollar insights
314
- self.usage_stats["total_queries"] += 1
315
- self.usage_stats["subject_distribution"][subject] = self.usage_stats["subject_distribution"].get(subject, 0) + 1
316
- self.usage_stats["difficulty_preferences"][difficulty] = self.usage_stats["difficulty_preferences"].get(difficulty, 0) + 1
317
 
 
 
 
318
  start_time = time.time()
319
 
320
  if not question.strip():
321
- return "🎓 Hello! I'm your AI education assistant. Ask me any academic question and I'll provide detailed, step-by-step explanations using premium educational datasets!"
322
-
323
- # Check system status
324
- if "Initializing" in self.loading_status or "Loading" in self.loading_status:
325
- return f"🔄 {self.loading_status}\n\nI'm still loading educational datasets. You can ask questions now, but responses will get even better in a moment!"
326
 
327
- # Create optimized prompt with dataset examples
328
- system_prompt = self.create_premium_prompt(question, subject, difficulty)
329
 
330
- # Add language instruction for global users
331
  if language != "English":
332
- system_prompt += f"\n\nIMPORTANT: Respond in {language}."
333
 
334
- # Optimized API request for speed
335
  headers = {
336
  "Authorization": f"Bearer {self.groq_api_key}",
337
  "Content-Type": "application/json"
338
  }
339
 
340
  payload = {
341
- "model": "llama3-70b-8192", # Fastest model for real-time responses
342
  "messages": [
343
  {"role": "system", "content": system_prompt},
344
  {"role": "user", "content": question}
345
  ],
346
- "temperature": 0.2, # Lower for consistency in education
347
- "max_tokens": 1500, # Optimized length
348
- "top_p": 0.9,
349
- "stream": False # Disable streaming for faster processing
350
  }
351
 
352
  try:
@@ -354,255 +221,172 @@ Now solve the student's question:"""
354
  self.groq_url,
355
  headers=headers,
356
  json=payload,
357
- timeout=15 # Aggressive timeout for speed
358
  )
359
 
360
  response_time = time.time() - start_time
361
- self.usage_stats["response_times"].append(response_time)
362
 
363
  if response.status_code == 200:
364
  result = response.json()
365
  answer = result["choices"][0]["message"]["content"]
366
 
367
- # Add premium branding with real stats
368
- dataset_count = len(self.datasets)
369
- example_count = self.total_examples_loaded
370
-
371
- footer = f"\n\n---\n*🌍 **Global Education AI** powered by {dataset_count} premium datasets ({example_count:,} examples) | Response time: {response_time:.2f}s | Query #{self.usage_stats['total_queries']:,}*"
372
-
373
  return answer + footer
374
  else:
375
- return f"⚠️ Service temporarily unavailable. Please try again in a moment. (Error: {response.status_code})"
376
 
377
- except requests.exceptions.Timeout:
378
- return "⚡ Response took too long. Please try a more specific question or try again."
379
  except Exception as e:
380
- return f"🔧 Technical issue occurred. Our team has been notified. Please try again shortly."
381
 
382
- def get_global_analytics(self) -> str:
383
- """Get analytics for billion-dollar insights"""
384
- total_queries = self.usage_stats["total_queries"]
385
- avg_response_time = sum(self.usage_stats["response_times"][-100:]) / len(self.usage_stats["response_times"][-100:]) if self.usage_stats["response_times"] else 0
386
 
387
- top_subjects = sorted(self.usage_stats["subject_distribution"].items(), key=lambda x: x[1], reverse=True)[:3]
388
- top_difficulties = sorted(self.usage_stats["difficulty_preferences"].items(), key=lambda x: x[1], reverse=True)[:3]
389
 
390
- analytics = f"""📊 **GLOBAL EDUCATION AI ANALYTICS**
391
 
392
- 🌍 **Scale Metrics:**
393
- • Total Queries Served: {total_queries:,}
394
- • Average Response Time: {avg_response_time:.2f}s
395
  • Datasets Loaded: {len(self.datasets)}
396
- • Examples Available: {self.total_examples_loaded:,}
397
 
398
  📚 **Popular Subjects:**"""
399
 
400
  for subject, count in top_subjects:
401
- analytics += f"\n• {subject.title()}: {count:,} queries"
402
-
403
- analytics += f"\n\n⚡ **Difficulty Distribution:**"
404
- for difficulty, count in top_difficulties:
405
- analytics += f"\n• {difficulty.title()}: {count:,} requests"
406
 
407
- analytics += f"\n\n🚀 **Status**: {self.loading_status}"
408
-
409
- return analytics
410
 
411
- # Initialize Global Education AI
412
- global_ai = GlobalEducationAI()
413
 
414
- def create_global_interface():
415
- """Create world-class education interface for global scale"""
416
 
417
  with gr.Blocks(
418
- theme=gr.themes.Origin(), # Professional theme
419
- title="🌍 Global Education AI - The ChatGPT for Education",
420
  css="""
421
- .gradio-container { max-width: 1400px !important; }
422
  .header {
423
  text-align: center;
424
- background: linear-gradient(135deg, #667eea 0%, #764ba2 50%, #f093fb 100%);
425
- padding: 3rem; border-radius: 20px; margin-bottom: 2rem;
426
- box-shadow: 0 10px 30px rgba(0,0,0,0.1);
427
- }
428
- .stats-panel {
429
- background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
430
- border-radius: 15px; padding: 1.5rem; margin: 1rem 0;
431
- border: 1px solid #e1e8ed;
432
- }
433
- .feature-box {
434
- background: white; border-radius: 10px; padding: 1rem;
435
- margin: 0.5rem 0; border-left: 4px solid #667eea;
436
  }
437
  """
438
- ) as interface:
439
 
440
- # Global Header
441
- with gr.Row():
442
- gr.HTML("""
443
- <div class="header">
444
- <h1 style="color: white; margin: 0; font-size: 3em; font-weight: 700;">🌍 Global Education AI</h1>
445
- <p style="color: #f0f0f0; margin: 1rem 0 0 0; font-size: 1.3em; font-weight: 300;">
446
- The ChatGPT for Education • Powered by Premium Datasets • Serving Millions Worldwide
447
- </p>
448
- <div style="margin-top: 1rem;">
449
- <span style="background: rgba(255,255,255,0.2); padding: 0.5rem 1rem; border-radius: 20px; margin: 0.25rem; display: inline-block; color: white;">Competition Math</span>
450
- <span style="background: rgba(255,255,255,0.2); padding: 0.5rem 1rem; border-radius: 20px; margin: 0.25rem; display: inline-block; color: white;">Advanced Science</span>
451
- <span style="background: rgba(255,255,255,0.2); padding: 0.5rem 1rem; border-radius: 20px; margin: 0.25rem; display: inline-block; color: white;">Microsoft Orca</span>
452
- <span style="background: rgba(255,255,255,0.2); padding: 0.5rem 1rem; border-radius: 20px; margin: 0.25rem; display: inline-block; color: white;">Multi-Language</span>
453
- </div>
454
- </div>
455
- """)
456
 
457
- # Main Interface
458
  with gr.Row():
459
- with gr.Column(scale=3):
460
- with gr.Group():
461
- question_input = gr.Textbox(
462
- label="🎓 Ask Your Educational Question",
463
- placeholder="Enter any question: math problems, science concepts, homework help, test prep, or academic topics...",
464
- lines=4,
465
- max_lines=10
 
 
 
 
 
466
  )
467
 
468
- with gr.Row():
469
- subject_dropdown = gr.Dropdown(
470
- choices=["general", "mathematics", "science", "physics", "chemistry", "biology", "english", "history", "computer_science"],
471
- label="📚 Subject",
472
- value="general",
473
- interactive=True
474
- )
475
-
476
- difficulty_dropdown = gr.Dropdown(
477
- choices=["beginner", "intermediate", "advanced", "competition", "graduate"],
478
- label="⚡ Difficulty",
479
- value="intermediate",
480
- interactive=True
481
- )
482
-
483
- language_dropdown = gr.Dropdown(
484
- choices=["English", "Spanish", "French", "German", "Chinese", "Japanese"],
485
- label="🌐 Language",
486
- value="English",
487
- interactive=True
488
- )
489
 
490
- submit_btn = gr.Button(
491
- "🚀 Get Expert Answer",
492
- variant="primary",
493
- size="lg",
494
- elem_classes="submit-button"
495
  )
 
 
496
 
497
  with gr.Column(scale=1):
498
- with gr.Group():
499
- gr.HTML('<div class="feature-box"><h3>🏆 Premium Features</h3></div>')
500
-
501
- analytics_display = gr.Textbox(
502
- label="📊 Live Analytics",
503
- value=global_ai.get_global_analytics(),
504
- lines=12,
505
- interactive=False
506
- )
507
-
508
- refresh_analytics = gr.Button("🔄 Refresh Analytics", size="sm")
509
-
510
- # Response Area
511
- answer_output = gr.Textbox(
512
- label="📖 Expert Educational Response",
513
- lines=18,
514
- max_lines=25,
515
- interactive=False,
516
- placeholder="Your detailed, step-by-step educational response will appear here..."
517
  )
518
 
519
- # Example Queries for Global Users
520
- with gr.Group():
521
- gr.HTML('<h3 style="text-align: center; margin: 1rem 0;">💡 Example Questions from Around the World</h3>')
522
-
523
- gr.Examples(
524
- examples=[
525
- # Math Examples
526
- ["Solve the quadratic equation: x² + 5x + 6 = 0", "mathematics", "intermediate", "English"],
527
- ["Find the derivative of f(x) = ln(x² + 1)", "mathematics", "advanced", "English"],
528
- ["Prove that the square root of 2 is irrational", "mathematics", "competition", "English"],
529
-
530
- # Science Examples
531
- ["Explain photosynthesis in detail with chemical equations", "biology", "intermediate", "English"],
532
- ["Why do ice cubes float on water? Explain the molecular basis", "chemistry", "beginner", "English"],
533
- ["Derive Newton's second law from first principles", "physics", "advanced", "English"],
534
-
535
- # Multi-language Examples
536
- ["¿Cómo funciona la fotosíntesis?", "science", "intermediate", "Spanish"],
537
- ["Comment résoudre une équation du second degré?", "mathematics", "intermediate", "French"],
538
-
539
- # Advanced Examples
540
- ["Explain quantum entanglement for a graduate student", "physics", "graduate", "English"],
541
- ["How do I prepare for the International Mathematical Olympiad?", "mathematics", "competition", "English"]
542
- ],
543
- inputs=[question_input, subject_dropdown, difficulty_dropdown, language_dropdown],
544
- outputs=answer_output,
545
- fn=global_ai.educate,
546
- cache_examples=False
547
- )
548
-
549
- # Event Handlers
550
- submit_btn.click(
551
- fn=global_ai.educate,
552
- inputs=[question_input, subject_dropdown, difficulty_dropdown, language_dropdown],
553
- outputs=answer_output,
554
- api_name="predict" # Global API endpoint
555
  )
556
 
557
- question_input.submit(
558
- fn=global_ai.educate,
559
- inputs=[question_input, subject_dropdown, difficulty_dropdown, language_dropdown],
560
- outputs=answer_output
 
 
561
  )
562
 
563
- refresh_analytics.click(
564
- fn=global_ai.get_global_analytics,
565
- outputs=analytics_display
 
566
  )
567
 
568
- # Auto-refresh analytics (fixed for newer Gradio)
569
- def refresh_analytics_periodically():
570
- while True:
571
- time.sleep(30)
572
- try:
573
- analytics_display.value = global_ai.get_global_analytics()
574
- except:
575
- pass
576
-
577
- refresh_thread = threading.Thread(target=refresh_analytics_periodically)
578
- refresh_thread.daemon = True
579
- refresh_thread.start()
580
 
581
- # Global Footer
582
  gr.HTML("""
583
- <div style="text-align: center; margin-top: 3rem; padding: 2rem; background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); border-radius: 15px;">
584
- <h3 style="color: #2c3e50; margin-bottom: 1rem;">🌍 Serving Global Education</h3>
585
- <p style="color: #34495e; margin-bottom: 1rem;"><strong>Competitive Advantage:</strong> Real dataset integration with Competition Math (AMC/AIME),
586
- Microsoft Orca Mathematical Reasoning, Advanced Science QA, and Academic Knowledge bases.</p>
587
- <p style="color: #7f8c8d; font-size: 0.9em;">
588
- 🚀 <strong>API Endpoint:</strong> https://memoroeisdead-your-education-api.hf.space/run/predict<br>
589
- 💡 <strong>Supported:</strong> 60+ subjects, 5 difficulty levels, 6 languages, unlimited scale<br>
590
- 🎯 <strong>Mission:</strong> Making world-class education accessible to everyone, everywhere
591
- </p>
592
  </div>
593
  """)
594
 
595
- return interface
596
 
597
- # Launch Global Education AI
598
  if __name__ == "__main__":
599
- interface = create_global_interface()
600
  interface.launch(
601
  server_name="0.0.0.0",
602
  server_port=7860,
603
  share=False,
604
- show_error=True,
605
- show_tips=True,
606
- enable_queue=True, # Handle high traffic
607
- max_threads=40 # Scale for millions of users
608
- )
 
1
+ """
2
+ BILLION DOLLAR EDUCATION AI - CLEAN VERSION
3
+ The ChatGPT for Education - Premium Dataset Integration
 
 
 
 
4
  """
5
 
6
  import gradio as gr
 
9
  import random
10
  import threading
11
  import time
12
+ from typing import Dict, List
 
 
 
13
 
14
+ # Safe dataset import
15
  try:
16
  from datasets import load_dataset
17
  DATASETS_AVAILABLE = True
18
+ except ImportError:
 
19
  DATASETS_AVAILABLE = False
20
  def load_dataset(*args, **kwargs):
21
  return []
22
 
23
+ class EducationAI:
24
  def __init__(self):
25
+ # Your API key
26
  self.groq_api_key = "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"
27
  self.groq_url = "https://api.groq.com/openai/v1/chat/completions"
28
 
29
+ # Dataset storage
30
  self.datasets = {}
31
+ self.examples = {}
32
+ self.loading_status = "🚀 Starting Premium Education AI..."
33
+ self.total_examples = 0
34
 
35
+ # Usage tracking
36
+ self.stats = {
 
 
 
37
  "total_queries": 0,
38
+ "subjects": {},
 
39
  "response_times": []
40
  }
41
 
42
+ # Load datasets in background
43
+ self.load_datasets()
44
 
45
+ def load_datasets(self):
46
+ """Load premium datasets in background"""
47
+ def load_thread():
48
  try:
49
+ if not DATASETS_AVAILABLE:
50
+ self.loading_status = "✅ Premium AI Ready (Dataset library unavailable)"
51
+ self.create_fallback_examples()
52
+ return
53
+
54
+ self.loading_status = "📚 Loading GSM8K Math Dataset..."
55
 
56
+ # Load GSM8K (reliable dataset)
57
  try:
58
+ gsm8k = load_dataset("gsm8k", "main", split="train[:1000]")
59
+ self.datasets['gsm8k'] = gsm8k
60
+ self.examples['math'] = self.extract_math_examples(gsm8k)
61
  print("✅ GSM8K loaded successfully")
62
  except Exception as e:
63
  print(f"GSM8K error: {e}")
64
 
65
+ self.loading_status = "🧪 Loading Science Dataset..."
 
 
 
 
 
 
 
 
66
 
67
+ # Load SciQ (science questions)
68
  try:
69
+ sciq = load_dataset("sciq", split="train[:500]")
70
+ self.datasets['sciq'] = sciq
71
+ self.examples['science'] = self.extract_science_examples(sciq)
72
  print("✅ SciQ loaded successfully")
73
  except Exception as e:
74
  print(f"SciQ error: {e}")
75
 
76
+ # Calculate totals
77
+ self.total_examples = sum(len(examples) for examples in self.examples.values())
 
 
 
 
 
 
 
 
 
78
 
79
+ if self.total_examples > 0:
80
+ self.loading_status = f"✅ PREMIUM AI READY - {len(self.datasets)} datasets, {self.total_examples} examples"
 
81
  else:
82
+ self.loading_status = "✅ Premium AI Ready (Using high-quality prompts)"
 
83
  self.create_fallback_examples()
84
+
85
+ print(f"🎓 Education AI ready with {self.total_examples} examples")
86
 
87
  except Exception as e:
88
+ self.loading_status = "✅ Premium AI Ready (Core functionality active)"
89
  self.create_fallback_examples()
90
+ print(f"Loading info: {e}")
91
 
92
+ # Start background loading
93
+ thread = threading.Thread(target=load_thread)
94
  thread.daemon = True
95
  thread.start()
96
 
97
+ def extract_math_examples(self, dataset):
98
+ """Extract math examples from GSM8K"""
99
+ examples = []
100
+ for item in dataset:
101
+ if item.get('question') and item.get('answer'):
102
+ examples.append({
103
+ 'question': item['question'],
104
+ 'solution': item['answer'],
105
+ 'type': 'math_word_problem'
106
+ })
107
+ return examples[:50] # Keep top 50
108
+
109
+ def extract_science_examples(self, dataset):
110
+ """Extract science examples from SciQ"""
111
+ examples = []
112
+ for item in dataset:
113
+ if item.get('question') and item.get('correct_answer'):
114
+ examples.append({
115
+ 'question': item['question'],
116
+ 'solution': item['correct_answer'],
117
+ 'type': 'science_qa'
118
+ })
119
+ return examples[:30] # Keep top 30
120
+
121
  def create_fallback_examples(self):
122
+ """Create high-quality examples when datasets unavailable"""
123
+ self.examples = {
124
+ 'math': [
125
  {
126
+ 'question': 'Solve x² + 5x + 6 = 0',
127
  'solution': 'Factor: (x + 2)(x + 3) = 0, so x = -2 or x = -3',
128
+ 'type': 'algebra'
129
+ },
130
+ {
131
+ 'question': 'Find the derivative of f(x) = x³ + 2x',
132
+ 'solution': "f'(x) = 3x² + 2",
133
+ 'type': 'calculus'
134
  }
135
  ],
136
  'science': [
137
  {
138
  'question': 'Why do ice cubes float in water?',
139
  'solution': 'Ice is less dense than liquid water due to its crystalline structure',
140
+ 'type': 'physics'
141
+ },
142
+ {
143
+ 'question': 'What is photosynthesis?',
144
+ 'solution': 'The process by which plants convert sunlight into chemical energy',
145
+ 'type': 'biology'
146
  }
147
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  }
149
+ self.total_examples = 4
 
 
 
 
 
 
 
 
 
 
 
 
 
150
 
151
+ def get_examples(self, subject):
152
+ """Get relevant examples for subject"""
153
+ subject_map = {
154
+ 'mathematics': 'math',
155
+ 'math': 'math',
 
156
  'science': 'science',
157
  'physics': 'science',
158
  'chemistry': 'science',
159
+ 'biology': 'science'
 
160
  }
161
 
162
+ target = subject_map.get(subject.lower(), 'math')
163
+ examples = self.examples.get(target, [])
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
+ # Return 2 random examples
166
+ if examples:
167
+ return random.sample(examples, min(2, len(examples)))
168
+ return []
 
 
169
 
170
+ def create_prompt(self, question, subject, difficulty):
171
+ """Create enhanced prompt with examples"""
172
+ examples = self.get_examples(subject)
 
 
 
 
 
 
 
 
 
 
173
 
174
+ prompt = f"You are an expert {subject} tutor. Provide detailed, step-by-step solutions."
 
 
 
175
 
176
+ if examples:
177
+ prompt += "\n\nHere are examples of high-quality educational responses:\n\n"
178
+ for i, ex in enumerate(examples, 1):
179
+ prompt += f"EXAMPLE {i}:\n"
180
+ prompt += f"Q: {ex['question'][:200]}...\n"
181
+ prompt += f"A: {ex['solution'][:200]}...\n\n"
 
 
182
 
183
+ prompt += f"Use the same detailed approach. Subject: {subject}, Difficulty: {difficulty}"
184
  return prompt
185
 
186
+ def educate(self, question, subject="general", difficulty="intermediate", language="English"):
187
+ """Main education function"""
 
 
 
 
 
188
 
189
+ # Track usage
190
+ self.stats["total_queries"] += 1
191
+ self.stats["subjects"][subject] = self.stats["subjects"].get(subject, 0) + 1
192
  start_time = time.time()
193
 
194
  if not question.strip():
195
+ return "🎓 Hi! I'm your AI education assistant. Ask me any academic question!"
 
 
 
 
196
 
197
+ # Create enhanced prompt
198
+ system_prompt = self.create_prompt(question, subject, difficulty)
199
 
 
200
  if language != "English":
201
+ system_prompt += f"\n\nRespond in {language}."
202
 
203
+ # API request
204
  headers = {
205
  "Authorization": f"Bearer {self.groq_api_key}",
206
  "Content-Type": "application/json"
207
  }
208
 
209
  payload = {
210
+ "model": "llama3-70b-8192",
211
  "messages": [
212
  {"role": "system", "content": system_prompt},
213
  {"role": "user", "content": question}
214
  ],
215
+ "temperature": 0.3,
216
+ "max_tokens": 1500
 
 
217
  }
218
 
219
  try:
 
221
  self.groq_url,
222
  headers=headers,
223
  json=payload,
224
+ timeout=20
225
  )
226
 
227
  response_time = time.time() - start_time
228
+ self.stats["response_times"].append(response_time)
229
 
230
  if response.status_code == 200:
231
  result = response.json()
232
  answer = result["choices"][0]["message"]["content"]
233
 
234
+ # Add footer
235
+ footer = f"\n\n---\n*🎓 Powered by {len(self.datasets)} premium datasets ({self.total_examples} examples) | {response_time:.2f}s | Query #{self.stats['total_queries']}*"
 
 
 
 
236
  return answer + footer
237
  else:
238
+ return f"⚠️ Service temporarily unavailable (Error {response.status_code})"
239
 
 
 
240
  except Exception as e:
241
+ return "🔧 Technical issue. Please try again in a moment."
242
 
243
+ def get_stats(self):
244
+ """Get usage statistics"""
245
+ total = self.stats["total_queries"]
246
+ avg_time = sum(self.stats["response_times"][-50:]) / len(self.stats["response_times"][-50:]) if self.stats["response_times"] else 0
247
 
248
+ top_subjects = sorted(self.stats["subjects"].items(), key=lambda x: x[1], reverse=True)[:3]
 
249
 
250
+ stats = f"""📊 **EDUCATION AI ANALYTICS**
251
 
252
+ 🚀 **Performance:**
253
+ • Total Queries: {total:,}
254
+ • Average Response Time: {avg_time:.2f}s
255
  • Datasets Loaded: {len(self.datasets)}
256
+ • Examples Available: {self.total_examples}
257
 
258
  📚 **Popular Subjects:**"""
259
 
260
  for subject, count in top_subjects:
261
+ stats += f"\n• {subject.title()}: {count} queries"
 
 
 
 
262
 
263
+ stats += f"\n\n🌟 **Status:** {self.loading_status}"
264
+ return stats
 
265
 
266
+ # Initialize AI
267
+ ai = EducationAI()
268
 
269
+ def create_interface():
270
+ """Create the education interface"""
271
 
272
  with gr.Blocks(
273
+ theme=gr.themes.Soft(),
274
+ title="🎓 Billion Dollar Education AI",
275
  css="""
 
276
  .header {
277
  text-align: center;
278
+ background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
279
+ padding: 2rem;
280
+ border-radius: 15px;
281
+ margin-bottom: 2rem;
 
 
 
 
 
 
 
 
282
  }
283
  """
284
+ ) as demo:
285
 
286
+ # Header
287
+ gr.HTML("""
288
+ <div class="header">
289
+ <h1 style="color: white; margin: 0; font-size: 2.5em;">🎓 Premium Education AI</h1>
290
+ <p style="color: #e8e8e8; margin: 0.5rem 0 0 0; font-size: 1.2em;">
291
+ The ChatGPT for Education Real Dataset Integration Global Scale
292
+ </p>
293
+ </div>
294
+ """)
 
 
 
 
 
 
 
295
 
296
+ # Main interface
297
  with gr.Row():
298
+ with gr.Column(scale=2):
299
+ question = gr.Textbox(
300
+ label="📝 Your Question",
301
+ placeholder="Ask any educational question: math, science, homework help...",
302
+ lines=4
303
+ )
304
+
305
+ with gr.Row():
306
+ subject = gr.Dropdown(
307
+ choices=["general", "mathematics", "science", "physics", "chemistry", "biology", "english"],
308
+ label="📚 Subject",
309
+ value="general"
310
  )
311
 
312
+ difficulty = gr.Dropdown(
313
+ choices=["beginner", "intermediate", "advanced", "competition"],
314
+ label=" Difficulty",
315
+ value="intermediate"
316
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
 
318
+ language = gr.Dropdown(
319
+ choices=["English", "Spanish", "French", "German"],
320
+ label="🌐 Language",
321
+ value="English"
 
322
  )
323
+
324
+ submit = gr.Button("🚀 Get Expert Answer", variant="primary", size="lg")
325
 
326
  with gr.Column(scale=1):
327
+ stats = gr.Textbox(
328
+ label="📊 Live Stats",
329
+ value=ai.get_stats(),
330
+ lines=12,
331
+ interactive=False
332
+ )
333
+ refresh = gr.Button("🔄 Refresh", size="sm")
334
+
335
+ # Output
336
+ answer = gr.Textbox(
337
+ label="📖 Expert Response",
338
+ lines=15,
339
+ interactive=False
 
 
 
 
 
 
340
  )
341
 
342
+ # Examples
343
+ gr.Examples(
344
+ examples=[
345
+ ["Solve x² + 5x + 6 = 0", "mathematics", "intermediate", "English"],
346
+ ["Explain photosynthesis step by step", "biology", "intermediate", "English"],
347
+ ["What is Newton's second law?", "physics", "beginner", "English"],
348
+ ["Find the derivative of ln(x)", "mathematics", "advanced", "English"],
349
+ ["¿Qué es la fotosíntesis?", "science", "intermediate", "Spanish"]
350
+ ],
351
+ inputs=[question, subject, difficulty, language],
352
+ outputs=answer,
353
+ fn=ai.educate
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
354
  )
355
 
356
+ # Events
357
+ submit.click(
358
+ fn=ai.educate,
359
+ inputs=[question, subject, difficulty, language],
360
+ outputs=answer,
361
+ api_name="predict"
362
  )
363
 
364
+ question.submit(
365
+ fn=ai.educate,
366
+ inputs=[question, subject, difficulty, language],
367
+ outputs=answer
368
  )
369
 
370
+ refresh.click(
371
+ fn=ai.get_stats,
372
+ outputs=stats
373
+ )
 
 
 
 
 
 
 
 
374
 
375
+ # Footer
376
  gr.HTML("""
377
+ <div style="text-align: center; margin-top: 2rem; padding: 1rem; background: #f8f9fa; border-radius: 10px;">
378
+ <p><strong>🌍 Billion Dollar Education AI</strong> - Real dataset integration with premium educational content</p>
379
+ <p><em>API: https://memoroeisdead-your-education-api.hf.space/run/predict</em></p>
 
 
 
 
 
 
380
  </div>
381
  """)
382
 
383
+ return demo
384
 
 
385
  if __name__ == "__main__":
386
+ interface = create_interface()
387
  interface.launch(
388
  server_name="0.0.0.0",
389
  server_port=7860,
390
  share=False,
391
+ show_error=True
392
+ )