Spaces:

RedRepter
/

TutorX-MCP

Sleeping

App Files Files Community

Meet Patel commited on Jun 7

Commit

15710ed

1 Parent(s): bbd9cd6

All the functionality working with mock data

Browse files

Files changed (4) hide show

app.py +27 -43
client.py +33 -12
main.py +262 -0
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -2,12 +2,11 @@
 Gradio web interface for the TutorX MCP Server with SSE support
 """
 import gradio as gr
 import numpy as np
 import json
-import base64
-from io import BytesIO
-from PIL import Image
 from datetime import datetime
 import asyncio
 import aiohttp
@@ -21,15 +20,7 @@ from client import client
 SERVER_URL = "http://localhost:8001"  # Default port is now 8001 to match main.py
 # Utility functions
-def image_to_base64(img):
-    """Convert a PIL image or numpy array to base64 string"""
-    if isinstance(img, np.ndarray):
-        img = Image.fromarray(img)
-    buffered = BytesIO()
-    img.save(buffered, format="PNG")
-    img_str = base64.b64encode(buffered.getvalue()).decode()
-    return img_str
 async def load_concept_graph(concept_id: str = None):
     """
@@ -357,47 +348,40 @@ with gr.Blocks(title="TutorX Educational AI", theme=gr.themes.Soft()) as demo:
                 outputs=[text_output]
             )
-            gr.Markdown("## Handwriting Recognition")
             with gr.Row():
                 with gr.Column():
-                    drawing_input = gr.Sketchpad(label="Draw an Equation")
-                    drawing_btn = gr.Button("Recognize")
                 with gr.Column():
-                    drawing_output = gr.JSON(label="Recognition Results")
-            async def handwriting_async(drawing):
-                return await client.handwriting_recognition(image_to_base64(drawing), "student_12345")
-            drawing_btn.click(
-                fn=handwriting_async,
-                inputs=[drawing_input],
-                outputs=[drawing_output]
             )
         # Tab 4: Analytics
         with gr.Tab("Analytics"):
-            gr.Markdown("## Student Performance")
-            # Error Pattern Analysis
-            error_concept = gr.Dropdown(
-                choices=["math_algebra_basics", "math_algebra_linear_equations", "math_algebra_quadratic_equations"],
-                label="Select Concept for Analysis",
-                value="math_algebra_linear_equations"
-            )
-            error_btn = gr.Button("Analyze Concept")
-            error_output = gr.JSON(label="Analysis Results")
-            async def analyze_errors_async(concept):
-                return await client.analyze_error_patterns("student_12345", concept)
-            error_btn.click(
-                fn=analyze_errors_async,
-                inputs=[error_concept],
-                outputs=[error_output]
-            )
             gr.Markdown("## Plagiarism Detection")
             with gr.Row():

 Gradio web interface for the TutorX MCP Server with SSE support
 """
+import os
 import gradio as gr
 import numpy as np
 import json
 from datetime import datetime
 import asyncio
 import aiohttp
 SERVER_URL = "http://localhost:8001"  # Default port is now 8001 to match main.py
 # Utility functions
 async def load_concept_graph(concept_id: str = None):
     """
                 outputs=[text_output]
             )
+            gr.Markdown("## PDF OCR and Summarization (Coming Soon)")
             with gr.Row():
                 with gr.Column():
+                    pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
+                    ocr_btn = gr.Button("Extract Text")
                 with gr.Column():
+                    summary_output = gr.JSON(label="Summary")
+            async def pdf_ocr_async(pdf_file):
+                if not pdf_file:
+                    return {"error": "No PDF file provided", "success": False}
+                try:
+                    # Get the file path from the Gradio file object
+                    if isinstance(pdf_file, dict):
+                        file_path = pdf_file.get("path", "")
+                    else:
+                        file_path = pdf_file
+                    if not file_path or not os.path.exists(file_path):
+                        return {"error": "File not found", "success": False}
+                    return await client.pdf_ocr(file_path)
+                except Exception as e:
+                    return {"error": f"Error processing PDF: {str(e)}", "success": False}
+            ocr_btn.click(
+                fn=pdf_ocr_async,
+                inputs=[pdf_input],
+                outputs=[summary_output]
             )
         # Tab 4: Analytics
         with gr.Tab("Analytics"):
             gr.Markdown("## Plagiarism Detection")
             with gr.Row():

client.py CHANGED Viewed

@@ -213,12 +213,7 @@ class TutorXClient:
             "difficulty": difficulty
         })
-    async def analyze_error_patterns(self, student_id: str, concept_id: str) -> Dict[str, Any]:
-        """Analyze common error patterns for a student on a specific concept"""
-        return await self._call_tool("analyze_error_patterns", {
-            "student_id": student_id,
-            "concept_id": concept_id
-        })
     # ------------ Advanced Features ------------
@@ -282,12 +277,7 @@ class TutorXClient:
             "student_id": student_id
         })
-    async def handwriting_recognition(self, image_data_base64: str, student_id: str) -> Dict[str, Any]:
-        """Process handwritten input from the student"""
-        return await self._call_tool("handwriting_recognition", {
-            "image_data_base64": image_data_base64,
-            "student_id": student_id
-        })
     # ------------ Assessment ------------
@@ -320,6 +310,37 @@ class TutorXClient:
             "submission": submission,
             "reference_sources": reference_sources
         })
     async def get_curriculum_standards(self, country_code: str = "us") -> Dict[str, Any]:

             "difficulty": difficulty
         })
     # ------------ Advanced Features ------------
             "student_id": student_id
         })
     # ------------ Assessment ------------
             "submission": submission,
             "reference_sources": reference_sources
         })
+    async def pdf_ocr(self, pdf_file: str) -> Dict[str, Any]:
+        """
+        Extract text from a PDF file using OCR
+        Args:
+            pdf_file: Path to the PDF file
+        Returns:
+            Dictionary containing extracted text and metadata
+        """
+        try:
+            # Read the PDF file as binary data
+            with open(pdf_file, "rb") as f:
+                pdf_data = f.read()
+            # Convert to base64 for transmission
+            pdf_base64 = base64.b64encode(pdf_data).decode('utf-8')
+            # Call the server's PDF OCR endpoint
+            return await self._call_tool("pdf_ocr", {
+                "pdf_data": pdf_base64,
+                "filename": os.path.basename(pdf_file)
+            })
+        except Exception as e:
+            return {
+                "error": f"Failed to process PDF: {str(e)}",
+                "success": False,
+                "timestamp": datetime.now().isoformat()
+            }
     async def get_curriculum_standards(self, country_code: str = "us") -> Dict[str, Any]:

main.py CHANGED Viewed

@@ -11,6 +11,15 @@ from datetime import datetime
 from fastapi import FastAPI, HTTPException, Query, Request
 from fastapi.responses import JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
 # Filter out the tool registration warning
 warnings.filterwarnings("ignore", message="Tool already exists")
@@ -31,6 +40,7 @@ from utils.assessment import (
 from typing import List, Dict, Any, Optional, Union
 import random
 from datetime import datetime, timedelta, timezone
 # Get server configuration from environment variables with defaults
 SERVER_HOST = os.getenv("MCP_HOST", "0.0.0.0")  # Allow connections from any IP
@@ -573,6 +583,258 @@ async def get_curriculum_standards_api(country: str = "us"):
             detail=f"Failed to fetch curriculum standards: {str(e)}"
         )
 # Mount MCP app to /mcp path
 mcp.app = api_app

 from fastapi import FastAPI, HTTPException, Query, Request
 from fastapi.responses import JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
+from difflib import SequenceMatcher
+import re
+import base64
+import tempfile
+import fitz  # PyMuPDF
+import pytesseract
+from PIL import Image
+import io
+import numpy as np
 # Filter out the tool registration warning
 warnings.filterwarnings("ignore", message="Tool already exists")
 from typing import List, Dict, Any, Optional, Union
 import random
 from datetime import datetime, timedelta, timezone
+import json
 # Get server configuration from environment variables with defaults
 SERVER_HOST = os.getenv("MCP_HOST", "0.0.0.0")  # Allow connections from any IP
             detail=f"Failed to fetch curriculum standards: {str(e)}"
         )
+@mcp.tool()
+async def text_interaction(query: str, student_id: str) -> Dict[str, Any]:
+    """
+    Process a text query from a student and provide an educational response
+    Args:
+        query: The student's question or input text
+        student_id: Unique identifier for the student
+    Returns:
+        Dictionary containing the response and metadata
+    """
+    # In a real implementation, this would use an LLM to generate a response
+    # For now, we'll return a mock response
+    responses = {
+        "how do i solve a quadratic equation?": {
+            "response": "To solve a quadratic equation in the form ax² + bx + c = 0, you can use the quadratic formula: x = [-b ± √(b² - 4ac)] / (2a). First, identify the coefficients a, b, and c from your equation. Then plug them into the formula and simplify.",
+            "related_concepts": ["quadratic_equations", "algebra"],
+            "difficulty": "intermediate"
+        },
+        "what is photosynthesis?": {
+            "response": "Photosynthesis is the process by which green plants, algae, and some bacteria convert light energy, usually from the sun, into chemical energy. The overall reaction can be summarized as: 6CO₂ + 6H₂O + light energy → C₆H₁₂O₆ + 6O₂. This process occurs in the chloroplasts of plant cells.",
+            "related_concepts": ["biology", "plant_biology", "cellular_processes"],
+            "difficulty": "beginner"
+        },
+        "explain newton's laws of motion": {
+            "response": "Newton's three laws of motion are fundamental principles of physics:\\n\\n1. First Law (Inertia): An object at rest stays at rest, and an object in motion stays in motion at constant velocity unless acted upon by an external force.\\n2. Second Law: The acceleration of an object is directly proportional to the net force acting on it and inversely proportional to its mass (F=ma).\\n3. Third Law: For every action, there is an equal and opposite reaction.",
+            "related_concepts": ["physics", "mechanics", "newtonian_physics"],
+            "difficulty": "intermediate"
+        }
+    }
+    # Convert query to lowercase for case-insensitive matching
+    query_lower = query.lower()
+    # Check if we have a predefined response
+    if query_lower in responses:
+        response = responses[query_lower]
+    else:
+        # Default response for unknown queries
+        response = {
+            "response": f"I'm sorry, I don't have a specific response for that question. Could you rephrase or ask about something else?\\n\\nYour question was: {query}",
+            "related_concepts": [],
+            "difficulty": "unknown"
+        }
+    return {
+        "query": query,
+        "student_id": student_id,
+        "timestamp": datetime.utcnow().isoformat(),
+        **response
+    }
+# Add API endpoint for text interaction
+@api_app.post("/api/text_interaction")
+async def api_text_interaction(request: Dict[str, Any]):
+    """
+    Handle text interaction requests from the client
+    Expected request format:
+    {
+        "query": "user's question",
+        "student_id": "student_12345"
+    }
+    """
+    try:
+        # Validate request
+        if not isinstance(request, dict) or "query" not in request:
+            raise HTTPException(
+                status_code=400,
+                detail="Request must be a JSON object with 'query' key"
+            )
+        # Get parameters
+        query = request.get("query", "")
+        student_id = request.get("student_id", "anonymous")
+        # Process the query
+        result = await text_interaction(query, student_id)
+        return result
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to process text interaction: {str(e)}")
+@api_app.post("/api/check_submission_originality")
+async def check_submission_originality(request: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Check a student's submission for potential plagiarism against reference sources.
+    Args:
+        request: Dictionary containing:
+            - submission: The student's submission text
+            - reference_sources: List of reference texts to check against
+    Returns:
+        Dictionary with originality analysis results
+    """
+    submission = request.get("submission", "")
+    reference_sources = request.get("reference_sources", [])
+    if not submission or not reference_sources:
+        return {
+            "error": "Both submission and reference_sources are required",
+            "score": 0.0,
+            "is_original": False
+        }
+    def calculate_similarity(text1: str, text2: str) -> float:
+        """Calculate similarity between two texts (0.0 to 1.0)"""
+        # Simple similarity using SequenceMatcher
+        return SequenceMatcher(None, text1.lower(), text2.lower()).ratio()
+    # Clean and preprocess texts
+    def preprocess(text: str) -> str:
+        # Remove extra whitespace and normalize
+        text = re.sub(r'\s+', ' ', text).strip()
+        # Remove common words and punctuation for better matching
+        common_words = {'the', 'a', 'an', 'and', 'or', 'but', 'is', 'are', 'was', 'were'}
+        words = [word for word in re.findall(r'\w+', text.lower()) if word not in common_words]
+        return ' '.join(words)
+    # Calculate similarity scores against all references
+    preprocessed_submission = preprocess(submission)
+    matches = []
+    for i, ref in enumerate(reference_sources):
+        if not ref:
+            continue
+        preprocessed_ref = preprocess(ref)
+        similarity = calculate_similarity(preprocessed_submission, preprocessed_ref)
+        matches.append({
+            "reference_index": i,
+            "similarity_score": round(similarity, 4),
+            "is_potential_plagiarism": similarity > 0.7  # Threshold can be adjusted
+        })
+    # Calculate overall originality score (1.0 - max similarity)
+    max_similarity = max((m["similarity_score"] for m in matches), default=0.0)
+    originality_score = 1.0 - max_similarity
+    # Basic plagiarism detection
+    is_original = all(m["similarity_score"] < 0.7 for m in matches)
+    return {
+        "submission_length": len(submission),
+        "reference_count": len(reference_sources),
+        "originality_score": round(originality_score, 4),
+        "is_original": is_original,
+        "matches": matches,
+        "analysis": {
+            "similarity_threshold": 0.7,
+            "detection_method": "text_similarity"
+        }
+    }
+@api_app.post("/api/pdf_ocr")
+async def pdf_ocr(request: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Extract text from a PDF file using OCR
+    Args:
+        request: Dictionary containing:
+            - pdf_data: Base64 encoded PDF data
+            - filename: Original filename (for reference)
+    Returns:
+        Dictionary containing extracted text and metadata
+    """
+    try:
+        # Get the base64 encoded PDF data
+        pdf_base64 = request.get("pdf_data")
+        if not pdf_base64:
+            return {"error": "No PDF data provided", "success": False}
+        # Decode the base64 data
+        pdf_bytes = base64.b64decode(pdf_base64)
+        # Create a temporary file to store the PDF
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
+            temp_pdf.write(pdf_bytes)
+            temp_pdf_path = temp_pdf.name
+        try:
+            # Extract text using PyMuPDF
+            text_content = []
+            image_pages = []
+            # Open the PDF
+            doc = fitz.open(temp_pdf_path)
+            # Extract text from each page
+            for page_num in range(len(doc)):
+                page = doc.load_page(page_num)
+                # First try to extract text directly
+                page_text = page.get_text()
+                if page_text.strip():
+                    text_content.append(page_text)
+                else:
+                    # If no text found, try OCR on the page image
+                    pix = page.get_pixmap()
+                    img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+                    image_pages.append(img)
+            # Close the document
+            doc.close()
+            # If we have images to OCR, process them
+            if image_pages:
+                for img in image_pages:
+                    # Convert to grayscale for better OCR
+                    img_gray = img.convert('L')
+                    # Use pytesseract to do OCR on the image
+                    text = pytesseract.image_to_string(img_gray)
+                    if text.strip():
+                        text_content.append(text)
+            # Combine all text
+            full_text = "\n\n".join(text_content).strip()
+            # Generate a summary (this is a placeholder - you might want to use an LLM for better summarization)
+            summary = "\n".join([line for line in full_text.split('\n') if line.strip()][:10]) + "..."
+            return {
+                "success": True,
+                "filename": request.get("filename", "document.pdf"),
+                "page_count": len(doc),
+                "text": full_text,
+                "summary": summary,
+                "has_ocr_applied": len(image_pages) > 0,
+                "ocr_page_count": len(image_pages)
+            }
+        finally:
+            # Clean up the temporary file
+            try:
+                os.unlink(temp_pdf_path)
+            except:
+                pass
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        return {
+            "error": f"Failed to process PDF: {str(e)}",
+            "success": False
+        }
 # Mount MCP app to /mcp path
 mcp.app = api_app

requirements.txt CHANGED Viewed

@@ -18,4 +18,6 @@ isort>=5.10.0
 mypy>=0.910
 ruff>=0.0.262
 networkx>=3.0
-matplotlib>=3.5.0

 mypy>=0.910
 ruff>=0.0.262
 networkx>=3.0
+matplotlib>=3.5.0
+PyMuPDF>=1.19.0
+pytesseract>=0.3.8