Meet Patel commited on
Commit
15710ed
·
1 Parent(s): bbd9cd6

All the functionality working with mock data

Browse files
Files changed (4) hide show
  1. app.py +27 -43
  2. client.py +33 -12
  3. main.py +262 -0
  4. requirements.txt +3 -1
app.py CHANGED
@@ -2,12 +2,11 @@
2
  Gradio web interface for the TutorX MCP Server with SSE support
3
  """
4
 
 
5
  import gradio as gr
6
  import numpy as np
7
  import json
8
- import base64
9
- from io import BytesIO
10
- from PIL import Image
11
  from datetime import datetime
12
  import asyncio
13
  import aiohttp
@@ -21,15 +20,7 @@ from client import client
21
  SERVER_URL = "http://localhost:8001" # Default port is now 8001 to match main.py
22
 
23
  # Utility functions
24
- def image_to_base64(img):
25
- """Convert a PIL image or numpy array to base64 string"""
26
- if isinstance(img, np.ndarray):
27
- img = Image.fromarray(img)
28
-
29
- buffered = BytesIO()
30
- img.save(buffered, format="PNG")
31
- img_str = base64.b64encode(buffered.getvalue()).decode()
32
- return img_str
33
 
34
  async def load_concept_graph(concept_id: str = None):
35
  """
@@ -357,47 +348,40 @@ with gr.Blocks(title="TutorX Educational AI", theme=gr.themes.Soft()) as demo:
357
  outputs=[text_output]
358
  )
359
 
360
- gr.Markdown("## Handwriting Recognition")
361
-
362
  with gr.Row():
363
  with gr.Column():
364
- drawing_input = gr.Sketchpad(label="Draw an Equation")
365
- drawing_btn = gr.Button("Recognize")
366
 
367
  with gr.Column():
368
- drawing_output = gr.JSON(label="Recognition Results")
369
 
370
- async def handwriting_async(drawing):
371
- return await client.handwriting_recognition(image_to_base64(drawing), "student_12345")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
372
 
373
- drawing_btn.click(
374
- fn=handwriting_async,
375
- inputs=[drawing_input],
376
- outputs=[drawing_output]
377
  )
378
 
379
  # Tab 4: Analytics
380
  with gr.Tab("Analytics"):
381
- gr.Markdown("## Student Performance")
382
-
383
- # Error Pattern Analysis
384
- error_concept = gr.Dropdown(
385
- choices=["math_algebra_basics", "math_algebra_linear_equations", "math_algebra_quadratic_equations"],
386
- label="Select Concept for Analysis",
387
- value="math_algebra_linear_equations"
388
- )
389
- error_btn = gr.Button("Analyze Concept")
390
- error_output = gr.JSON(label="Analysis Results")
391
-
392
- async def analyze_errors_async(concept):
393
- return await client.analyze_error_patterns("student_12345", concept)
394
-
395
- error_btn.click(
396
- fn=analyze_errors_async,
397
- inputs=[error_concept],
398
- outputs=[error_output]
399
- )
400
-
401
  gr.Markdown("## Plagiarism Detection")
402
 
403
  with gr.Row():
 
2
  Gradio web interface for the TutorX MCP Server with SSE support
3
  """
4
 
5
+ import os
6
  import gradio as gr
7
  import numpy as np
8
  import json
9
+
 
 
10
  from datetime import datetime
11
  import asyncio
12
  import aiohttp
 
20
  SERVER_URL = "http://localhost:8001" # Default port is now 8001 to match main.py
21
 
22
  # Utility functions
23
+
 
 
 
 
 
 
 
 
24
 
25
  async def load_concept_graph(concept_id: str = None):
26
  """
 
348
  outputs=[text_output]
349
  )
350
 
351
+ gr.Markdown("## PDF OCR and Summarization (Coming Soon)")
 
352
  with gr.Row():
353
  with gr.Column():
354
+ pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
355
+ ocr_btn = gr.Button("Extract Text")
356
 
357
  with gr.Column():
358
+ summary_output = gr.JSON(label="Summary")
359
 
360
+ async def pdf_ocr_async(pdf_file):
361
+ if not pdf_file:
362
+ return {"error": "No PDF file provided", "success": False}
363
+ try:
364
+ # Get the file path from the Gradio file object
365
+ if isinstance(pdf_file, dict):
366
+ file_path = pdf_file.get("path", "")
367
+ else:
368
+ file_path = pdf_file
369
+
370
+ if not file_path or not os.path.exists(file_path):
371
+ return {"error": "File not found", "success": False}
372
+
373
+ return await client.pdf_ocr(file_path)
374
+ except Exception as e:
375
+ return {"error": f"Error processing PDF: {str(e)}", "success": False}
376
 
377
+ ocr_btn.click(
378
+ fn=pdf_ocr_async,
379
+ inputs=[pdf_input],
380
+ outputs=[summary_output]
381
  )
382
 
383
  # Tab 4: Analytics
384
  with gr.Tab("Analytics"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
  gr.Markdown("## Plagiarism Detection")
386
 
387
  with gr.Row():
client.py CHANGED
@@ -213,12 +213,7 @@ class TutorXClient:
213
  "difficulty": difficulty
214
  })
215
 
216
- async def analyze_error_patterns(self, student_id: str, concept_id: str) -> Dict[str, Any]:
217
- """Analyze common error patterns for a student on a specific concept"""
218
- return await self._call_tool("analyze_error_patterns", {
219
- "student_id": student_id,
220
- "concept_id": concept_id
221
- })
222
 
223
  # ------------ Advanced Features ------------
224
 
@@ -282,12 +277,7 @@ class TutorXClient:
282
  "student_id": student_id
283
  })
284
 
285
- async def handwriting_recognition(self, image_data_base64: str, student_id: str) -> Dict[str, Any]:
286
- """Process handwritten input from the student"""
287
- return await self._call_tool("handwriting_recognition", {
288
- "image_data_base64": image_data_base64,
289
- "student_id": student_id
290
- })
291
 
292
  # ------------ Assessment ------------
293
 
@@ -320,6 +310,37 @@ class TutorXClient:
320
  "submission": submission,
321
  "reference_sources": reference_sources
322
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
 
324
 
325
  async def get_curriculum_standards(self, country_code: str = "us") -> Dict[str, Any]:
 
213
  "difficulty": difficulty
214
  })
215
 
216
+
 
 
 
 
 
217
 
218
  # ------------ Advanced Features ------------
219
 
 
277
  "student_id": student_id
278
  })
279
 
280
+
 
 
 
 
 
281
 
282
  # ------------ Assessment ------------
283
 
 
310
  "submission": submission,
311
  "reference_sources": reference_sources
312
  })
313
+
314
+ async def pdf_ocr(self, pdf_file: str) -> Dict[str, Any]:
315
+ """
316
+ Extract text from a PDF file using OCR
317
+
318
+ Args:
319
+ pdf_file: Path to the PDF file
320
+
321
+ Returns:
322
+ Dictionary containing extracted text and metadata
323
+ """
324
+ try:
325
+ # Read the PDF file as binary data
326
+ with open(pdf_file, "rb") as f:
327
+ pdf_data = f.read()
328
+
329
+ # Convert to base64 for transmission
330
+ pdf_base64 = base64.b64encode(pdf_data).decode('utf-8')
331
+
332
+ # Call the server's PDF OCR endpoint
333
+ return await self._call_tool("pdf_ocr", {
334
+ "pdf_data": pdf_base64,
335
+ "filename": os.path.basename(pdf_file)
336
+ })
337
+
338
+ except Exception as e:
339
+ return {
340
+ "error": f"Failed to process PDF: {str(e)}",
341
+ "success": False,
342
+ "timestamp": datetime.now().isoformat()
343
+ }
344
 
345
 
346
  async def get_curriculum_standards(self, country_code: str = "us") -> Dict[str, Any]:
main.py CHANGED
@@ -11,6 +11,15 @@ from datetime import datetime
11
  from fastapi import FastAPI, HTTPException, Query, Request
12
  from fastapi.responses import JSONResponse
13
  from fastapi.middleware.cors import CORSMiddleware
 
 
 
 
 
 
 
 
 
14
 
15
  # Filter out the tool registration warning
16
  warnings.filterwarnings("ignore", message="Tool already exists")
@@ -31,6 +40,7 @@ from utils.assessment import (
31
  from typing import List, Dict, Any, Optional, Union
32
  import random
33
  from datetime import datetime, timedelta, timezone
 
34
 
35
  # Get server configuration from environment variables with defaults
36
  SERVER_HOST = os.getenv("MCP_HOST", "0.0.0.0") # Allow connections from any IP
@@ -573,6 +583,258 @@ async def get_curriculum_standards_api(country: str = "us"):
573
  detail=f"Failed to fetch curriculum standards: {str(e)}"
574
  )
575
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
576
  # Mount MCP app to /mcp path
577
  mcp.app = api_app
578
 
 
11
  from fastapi import FastAPI, HTTPException, Query, Request
12
  from fastapi.responses import JSONResponse
13
  from fastapi.middleware.cors import CORSMiddleware
14
+ from difflib import SequenceMatcher
15
+ import re
16
+ import base64
17
+ import tempfile
18
+ import fitz # PyMuPDF
19
+ import pytesseract
20
+ from PIL import Image
21
+ import io
22
+ import numpy as np
23
 
24
  # Filter out the tool registration warning
25
  warnings.filterwarnings("ignore", message="Tool already exists")
 
40
  from typing import List, Dict, Any, Optional, Union
41
  import random
42
  from datetime import datetime, timedelta, timezone
43
+ import json
44
 
45
  # Get server configuration from environment variables with defaults
46
  SERVER_HOST = os.getenv("MCP_HOST", "0.0.0.0") # Allow connections from any IP
 
583
  detail=f"Failed to fetch curriculum standards: {str(e)}"
584
  )
585
 
586
+ @mcp.tool()
587
+ async def text_interaction(query: str, student_id: str) -> Dict[str, Any]:
588
+ """
589
+ Process a text query from a student and provide an educational response
590
+
591
+ Args:
592
+ query: The student's question or input text
593
+ student_id: Unique identifier for the student
594
+
595
+ Returns:
596
+ Dictionary containing the response and metadata
597
+ """
598
+ # In a real implementation, this would use an LLM to generate a response
599
+ # For now, we'll return a mock response
600
+ responses = {
601
+ "how do i solve a quadratic equation?": {
602
+ "response": "To solve a quadratic equation in the form ax² + bx + c = 0, you can use the quadratic formula: x = [-b ± √(b² - 4ac)] / (2a). First, identify the coefficients a, b, and c from your equation. Then plug them into the formula and simplify.",
603
+ "related_concepts": ["quadratic_equations", "algebra"],
604
+ "difficulty": "intermediate"
605
+ },
606
+ "what is photosynthesis?": {
607
+ "response": "Photosynthesis is the process by which green plants, algae, and some bacteria convert light energy, usually from the sun, into chemical energy. The overall reaction can be summarized as: 6CO₂ + 6H₂O + light energy → C₆H₁₂O₆ + 6O₂. This process occurs in the chloroplasts of plant cells.",
608
+ "related_concepts": ["biology", "plant_biology", "cellular_processes"],
609
+ "difficulty": "beginner"
610
+ },
611
+ "explain newton's laws of motion": {
612
+ "response": "Newton's three laws of motion are fundamental principles of physics:\\n\\n1. First Law (Inertia): An object at rest stays at rest, and an object in motion stays in motion at constant velocity unless acted upon by an external force.\\n2. Second Law: The acceleration of an object is directly proportional to the net force acting on it and inversely proportional to its mass (F=ma).\\n3. Third Law: For every action, there is an equal and opposite reaction.",
613
+ "related_concepts": ["physics", "mechanics", "newtonian_physics"],
614
+ "difficulty": "intermediate"
615
+ }
616
+ }
617
+
618
+ # Convert query to lowercase for case-insensitive matching
619
+ query_lower = query.lower()
620
+
621
+ # Check if we have a predefined response
622
+ if query_lower in responses:
623
+ response = responses[query_lower]
624
+ else:
625
+ # Default response for unknown queries
626
+ response = {
627
+ "response": f"I'm sorry, I don't have a specific response for that question. Could you rephrase or ask about something else?\\n\\nYour question was: {query}",
628
+ "related_concepts": [],
629
+ "difficulty": "unknown"
630
+ }
631
+
632
+ return {
633
+ "query": query,
634
+ "student_id": student_id,
635
+ "timestamp": datetime.utcnow().isoformat(),
636
+ **response
637
+ }
638
+
639
+ # Add API endpoint for text interaction
640
+ @api_app.post("/api/text_interaction")
641
+ async def api_text_interaction(request: Dict[str, Any]):
642
+ """
643
+ Handle text interaction requests from the client
644
+
645
+ Expected request format:
646
+ {
647
+ "query": "user's question",
648
+ "student_id": "student_12345"
649
+ }
650
+ """
651
+ try:
652
+ # Validate request
653
+ if not isinstance(request, dict) or "query" not in request:
654
+ raise HTTPException(
655
+ status_code=400,
656
+ detail="Request must be a JSON object with 'query' key"
657
+ )
658
+
659
+ # Get parameters
660
+ query = request.get("query", "")
661
+ student_id = request.get("student_id", "anonymous")
662
+
663
+ # Process the query
664
+ result = await text_interaction(query, student_id)
665
+ return result
666
+
667
+ except HTTPException:
668
+ raise
669
+ except Exception as e:
670
+ raise HTTPException(status_code=500, detail=f"Failed to process text interaction: {str(e)}")
671
+
672
+ @api_app.post("/api/check_submission_originality")
673
+ async def check_submission_originality(request: Dict[str, Any]) -> Dict[str, Any]:
674
+ """
675
+ Check a student's submission for potential plagiarism against reference sources.
676
+
677
+ Args:
678
+ request: Dictionary containing:
679
+ - submission: The student's submission text
680
+ - reference_sources: List of reference texts to check against
681
+
682
+ Returns:
683
+ Dictionary with originality analysis results
684
+ """
685
+ submission = request.get("submission", "")
686
+ reference_sources = request.get("reference_sources", [])
687
+
688
+ if not submission or not reference_sources:
689
+ return {
690
+ "error": "Both submission and reference_sources are required",
691
+ "score": 0.0,
692
+ "is_original": False
693
+ }
694
+
695
+ def calculate_similarity(text1: str, text2: str) -> float:
696
+ """Calculate similarity between two texts (0.0 to 1.0)"""
697
+ # Simple similarity using SequenceMatcher
698
+ return SequenceMatcher(None, text1.lower(), text2.lower()).ratio()
699
+
700
+ # Clean and preprocess texts
701
+ def preprocess(text: str) -> str:
702
+ # Remove extra whitespace and normalize
703
+ text = re.sub(r'\s+', ' ', text).strip()
704
+ # Remove common words and punctuation for better matching
705
+ common_words = {'the', 'a', 'an', 'and', 'or', 'but', 'is', 'are', 'was', 'were'}
706
+ words = [word for word in re.findall(r'\w+', text.lower()) if word not in common_words]
707
+ return ' '.join(words)
708
+
709
+ # Calculate similarity scores against all references
710
+ preprocessed_submission = preprocess(submission)
711
+ matches = []
712
+
713
+ for i, ref in enumerate(reference_sources):
714
+ if not ref:
715
+ continue
716
+
717
+ preprocessed_ref = preprocess(ref)
718
+ similarity = calculate_similarity(preprocessed_submission, preprocessed_ref)
719
+
720
+ matches.append({
721
+ "reference_index": i,
722
+ "similarity_score": round(similarity, 4),
723
+ "is_potential_plagiarism": similarity > 0.7 # Threshold can be adjusted
724
+ })
725
+
726
+ # Calculate overall originality score (1.0 - max similarity)
727
+ max_similarity = max((m["similarity_score"] for m in matches), default=0.0)
728
+ originality_score = 1.0 - max_similarity
729
+
730
+ # Basic plagiarism detection
731
+ is_original = all(m["similarity_score"] < 0.7 for m in matches)
732
+
733
+ return {
734
+ "submission_length": len(submission),
735
+ "reference_count": len(reference_sources),
736
+ "originality_score": round(originality_score, 4),
737
+ "is_original": is_original,
738
+ "matches": matches,
739
+ "analysis": {
740
+ "similarity_threshold": 0.7,
741
+ "detection_method": "text_similarity"
742
+ }
743
+ }
744
+
745
+ @api_app.post("/api/pdf_ocr")
746
+ async def pdf_ocr(request: Dict[str, Any]) -> Dict[str, Any]:
747
+ """
748
+ Extract text from a PDF file using OCR
749
+
750
+ Args:
751
+ request: Dictionary containing:
752
+ - pdf_data: Base64 encoded PDF data
753
+ - filename: Original filename (for reference)
754
+
755
+ Returns:
756
+ Dictionary containing extracted text and metadata
757
+ """
758
+ try:
759
+ # Get the base64 encoded PDF data
760
+ pdf_base64 = request.get("pdf_data")
761
+ if not pdf_base64:
762
+ return {"error": "No PDF data provided", "success": False}
763
+
764
+ # Decode the base64 data
765
+ pdf_bytes = base64.b64decode(pdf_base64)
766
+
767
+ # Create a temporary file to store the PDF
768
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
769
+ temp_pdf.write(pdf_bytes)
770
+ temp_pdf_path = temp_pdf.name
771
+
772
+ try:
773
+ # Extract text using PyMuPDF
774
+ text_content = []
775
+ image_pages = []
776
+
777
+ # Open the PDF
778
+ doc = fitz.open(temp_pdf_path)
779
+
780
+ # Extract text from each page
781
+ for page_num in range(len(doc)):
782
+ page = doc.load_page(page_num)
783
+
784
+ # First try to extract text directly
785
+ page_text = page.get_text()
786
+ if page_text.strip():
787
+ text_content.append(page_text)
788
+ else:
789
+ # If no text found, try OCR on the page image
790
+ pix = page.get_pixmap()
791
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
792
+ image_pages.append(img)
793
+
794
+ # Close the document
795
+ doc.close()
796
+
797
+ # If we have images to OCR, process them
798
+ if image_pages:
799
+ for img in image_pages:
800
+ # Convert to grayscale for better OCR
801
+ img_gray = img.convert('L')
802
+ # Use pytesseract to do OCR on the image
803
+ text = pytesseract.image_to_string(img_gray)
804
+ if text.strip():
805
+ text_content.append(text)
806
+
807
+ # Combine all text
808
+ full_text = "\n\n".join(text_content).strip()
809
+
810
+ # Generate a summary (this is a placeholder - you might want to use an LLM for better summarization)
811
+ summary = "\n".join([line for line in full_text.split('\n') if line.strip()][:10]) + "..."
812
+
813
+ return {
814
+ "success": True,
815
+ "filename": request.get("filename", "document.pdf"),
816
+ "page_count": len(doc),
817
+ "text": full_text,
818
+ "summary": summary,
819
+ "has_ocr_applied": len(image_pages) > 0,
820
+ "ocr_page_count": len(image_pages)
821
+ }
822
+
823
+ finally:
824
+ # Clean up the temporary file
825
+ try:
826
+ os.unlink(temp_pdf_path)
827
+ except:
828
+ pass
829
+
830
+ except Exception as e:
831
+ import traceback
832
+ traceback.print_exc()
833
+ return {
834
+ "error": f"Failed to process PDF: {str(e)}",
835
+ "success": False
836
+ }
837
+
838
  # Mount MCP app to /mcp path
839
  mcp.app = api_app
840
 
requirements.txt CHANGED
@@ -18,4 +18,6 @@ isort>=5.10.0
18
  mypy>=0.910
19
  ruff>=0.0.262
20
  networkx>=3.0
21
- matplotlib>=3.5.0
 
 
 
18
  mypy>=0.910
19
  ruff>=0.0.262
20
  networkx>=3.0
21
+ matplotlib>=3.5.0
22
+ PyMuPDF>=1.19.0
23
+ pytesseract>=0.3.8