Spaces:
Sleeping
Sleeping
# Import necessary libraries from FastAPI for creating the API, handling uploads, forms, and HTTP exceptions | |
from fastapi import FastAPI, UploadFile, Form, HTTPException | |
from pydantic import BaseModel | |
import uvicorn | |
from fastapi.responses import JSONResponse | |
from typing import Dict | |
import hashlib | |
from openai import OpenAI | |
from dotenv import load_dotenv | |
from fastapi.middleware.cors import CORSMiddleware | |
from firebase_admin import firestore | |
import json | |
import re | |
import pandas as pd | |
import google.generativeai as genai | |
from google.generativeai import GenerativeModel | |
import os | |
load_dotenv() | |
client = OpenAI(api_key=os.getenv('DEEPSEEK_API_KEY'), base_url="https://api.deepseek.com",) | |
# Initialize Gemini LLM | |
genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) | |
model = genai.GenerativeModel("gemini-2.0-flash") | |
# Firebase Admin SDK Initialization | |
# Get the Firebase credentials (as a JSON string) from environment variables | |
import firebase_admin | |
from firebase_admin import credentials | |
cred_dic = os.getenv("Firebase_cred") | |
# Parse the JSON string into a Python dictionary | |
cred_dict = json.loads(cred_dic) | |
# Initialize the Firebase Admin app with the credentials | |
# This needs to be done only once | |
cred = credentials.Certificate(cred_dict) | |
firebase_admin.initialize_app(cred) | |
# Create an instance of the FastAPI application | |
app = FastAPI() | |
# Add CORSMiddleware to the application | |
# This allows requests from any origin ("*"), supports credentials, | |
# allows all HTTP methods ("*"), and allows all headers ("*"). | |
# Be cautious with "*" in production environments; specify origins if possible. | |
app.add_middleware( | |
CORSMiddleware, | |
allow_origins=["*"], | |
allow_credentials=True, | |
allow_methods=["*"], | |
allow_headers=["*"], | |
) | |
def generate_df(): | |
data = [] # Initialize an empty list to store document data | |
db = firestore.client() # Get a Firestore client instance | |
docs = db.collection("test_results").get()# Retrieve all documents from the "test_results" collection | |
for doc in docs: | |
doc_data = doc.to_dict() # Convert Firestore document to a dictionary | |
doc_data['id'] = doc.id # Add the document ID to the dictionary | |
data.append(doc_data) # Append the document data to the list | |
df = pd.DataFrame(data) | |
return df | |
def generate_feedback(email, test_id): | |
""" | |
Generates feedback for a specific test taken by a student. | |
It filters the test results by email and test_id, then uses the Gemini model | |
to generate constructive feedback based on the student's responses for that test. | |
Args: | |
email (str): The email of the student. | |
test_id (str): The ID of the test. | |
Returns: | |
str: The generated feedback text, or None if no test results are found. | |
""" | |
df = generate_df() | |
df_email = df[df['email'] == email] # Get the DataFrame of all test results | |
df_test_id = df_email[df_email['id'] == test_id] # Filter by student's email | |
if not df_test_id.empty: # Check if any matching test result was found | |
response = df_test_id['responses'].values[0] # Get the 'responses' field from the first (and only) matching row | |
# Prepare the prompt for the Gemini model | |
feedback = model.generate_content(f"""You are an experienced tutor analyzing a student's test responses to provide constructive feedback. Below is the student's test history in JSON format. Your task is to: | |
Identify Strengths: Highlight areas where the student performed well, demonstrating a strong understanding of the concepts. | |
Identify Weaknesses: Point out areas where the student struggled or made consistent errors, indicating gaps in understanding. | |
Provide Actionable Suggestions: Offer specific advice on how the student can improve their performance in future tests. | |
Encourage and Motivate: End with positive reinforcement to keep the student motivated. | |
Test History:{str(response)} """) | |
return feedback.text # Return the text part of the response | |
else: | |
print("No test results found for this id") | |
def generate_overall_feedback(email): | |
""" | |
Generates overall feedback for a student based on all their test results. | |
It filters test results by email and uses the Gemini model to provide | |
a holistic view of the student's performance. | |
Args: | |
email (str): The email of the student. | |
Returns: | |
str: The generated overall feedback text, or None if no test results are found. | |
""" | |
df = generate_df() | |
df_email = df[df['email'] == email] | |
if not df_email.empty: | |
response = df_email['responses'].values | |
feedback = model.generate_content(f"""You are an experienced tutor analyzing a student's test responses to provide constructive feedback. Below is the student's test history in list format. Your task is to: | |
Identify Strengths: Highlight areas where the student performed well, demonstrating a strong understanding of the concepts. | |
Identify Weaknesses: Point out areas where the student struggled or made consistent errors, indicating gaps in understanding. | |
Provide Actionable Suggestions: Offer specific advice on how the student can improve their performance in future tests. | |
Encourage and Motivate: End with positive reinforcement to keep the student motivated. | |
Test History:{str(response)} """) | |
return feedback.text | |
else: | |
print("Please try again with a valid email") | |
async def get_single_feedback(email: str, test_id: str): | |
""" | |
API endpoint to get feedback for a single test. | |
Expects 'email' and 'test_id' as form data. | |
""" | |
feedback = generate_feedback(email, test_id) | |
return JSONResponse(content={"feedback": feedback}) | |
async def get_overall_feedback(email: str): | |
# """ | |
# API endpoint to get overall feedback for a student. | |
# Expects 'email' as form data. | |
# """ | |
feedback = generate_overall_feedback(email) # Call the helper function to generate overall feedback | |
return JSONResponse(content={"feedback": feedback}) | |
async def get_strong_weak_topics(email: str): | |
# """ | |
# API endpoint to identify strong and weak topics for a student. | |
# Requires at least 10 test attempts. Uses DeepSeek API for analysis. | |
# Expects 'email' as form data. | |
# """ | |
df = generate_df() | |
df_email = df[df['email'] == email] | |
# Check if the student has attempted at least 10 tests | |
if len(df_email)<10: | |
# The original condition was `len(df)>=10`, which seems to check the total number of tests in the DB, | |
# not for the specific user. Changed to `len(df_email) >= 10`. | |
return JSONResponse(content={"message": "Please attempt atleast 10 tests to enable this feature"}) | |
elif len(df)>=10: | |
# Get responses from the latest 10 tests (or all if fewer than 10, but the check above ensures at least 10) | |
response = df_email['responses'].values[:10] | |
# Assuming response is a list of responses | |
formatted_data = str(response) # Convert response to a string format suitable for the API call | |
section_info = { | |
'filename': 'student_performance', | |
'schema': { | |
'weak_topics': ['Topic#1', 'Topic#2', '...'], | |
'strong_topics': ['Topic#1', 'Topic#2', '...'] | |
} | |
} | |
# Generate response using the client | |
completion = client.chat.completions.create( | |
model="deepseek-chat", | |
response_format={"type": "json_object"}, | |
messages=[ | |
{ | |
"role": "system", | |
"content": f"""You are an Educational Performance Analyst focusing on {section_info['filename'].replace('_', ' ')}. | |
Analyze the provided student responses to identify and categorize topics into 'weak' and 'strong' based on their performance. Try to give | |
high level topics like algebra, trignometry, geometry etc in your response. | |
Do not add any explanations, introduction, or comments - return ONLY valid JSON. | |
""" | |
}, | |
{ | |
"role": "user", | |
"content": f""" | |
Here is the raw data for {section_info['filename']}: | |
{formatted_data} | |
Convert this data into JSON that matches this schema: | |
{json.dumps(section_info['schema'], indent=2)} | |
""" | |
} | |
], | |
temperature=0.0 # Set temperature to 0 for deterministic output | |
) | |
# Extract the JSON content from the completion object | |
strong_weak_topics = completion.choices[0].message.content # Access the content attribute directly | |
return JSONResponse(content=json.loads(strong_weak_topics)) | |
else: | |
return JSONResponse(content={"error": "No test results found for this email"}) | |
async def generate_flashcards(email: str): | |
""" | |
API endpoint to generate flashcards for a student's weak topics. | |
Requires at least 10 test attempts. | |
First, it identifies weak topics using DeepSeek. | |
Then, it generates flashcards for these topics using Gemini. | |
Expects 'email' as form data. | |
""" | |
df = generate_df() # Get all test results | |
df_email = df[df['email'] == email] | |
if len(df_email) < 10: | |
return JSONResponse(content={"message": "Please attempt at least 10 tests to enable flashcard generation."}) | |
# Step 1: Get the weak topics via DeepSeek | |
# Get responses from the latest 10 tests | |
response = df_email['responses'].values[:10] | |
formatted_data = str(response) | |
schema = { | |
'weak_topics': ['Topic#1', 'Topic#2', '...'], | |
'strong_topics': ['Topic#1', 'Topic#2', '...'] | |
} | |
completion = client.chat.completions.create( | |
model="deepseek-chat", | |
response_format={"type": "json_object"}, | |
messages=[ | |
{ | |
"role": "system", | |
"content": f"""You are an Educational Performance Analyst focusing on student performance. | |
Analyze the provided student responses to identify and categorize topics into 'weak' and 'strong' based on their performance. | |
Do not add any explanations - return ONLY valid JSON.""" | |
}, | |
{ | |
"role": "user", | |
"content": f""" | |
Here is the raw data: | |
{formatted_data} | |
Convert this data into JSON that matches this schema: | |
{json.dumps(schema, indent=2)} | |
""" | |
} | |
], | |
temperature=0.0 | |
) | |
# Extract weak topics | |
strong_weak_json = json.loads(completion.choices[0].message.content) | |
weak_topics = strong_weak_json.get("weak_topics", []) | |
if not weak_topics: | |
return JSONResponse(content={"message": "Could not extract weak topics."}) | |
# Step 2: Generate flashcards using Gemini for the identified weak topics | |
topic_str = ", ".join(weak_topics) | |
flashcard_prompt = f"""Create 5 concise, simple, straightforward and distinct Anki cards to study the following topic, each with a front and back. | |
Avoid repeating the content in the front on the back of the card. Avoid explicitly referring to the author or the article. | |
Use the following format: | |
Front: [front section of card 1] | |
Back: [back section of card 1] | |
... | |
The topics: {topic_str} | |
""" | |
flashcard_response = model.generate_content(flashcard_prompt) | |
# Step 3: Parse Gemini response into JSON format | |
flashcards_raw = flashcard_response.text.strip() | |
flashcard_pattern = re.findall(r"Front:\s*(.*?)\nBack:\s*(.*?)(?=\nFront:|\Z)", flashcards_raw, re.DOTALL) | |
# Use regex to find all "Front:" and "Back:" pairs | |
flashcards = [{"Front": front.strip(), "Back": back.strip()} for front, back in flashcard_pattern] | |
return JSONResponse(content=flashcards) | |
async def generate_detailed_summary(email: str): | |
""" | |
API endpoint to generate detailed summaries for a student's weak topics. | |
Requires at least 10 test attempts. | |
First, it identifies weak topics using DeepSeek. | |
Then, it generates summaries for these topics using Gemini. | |
Expects 'email' as form data. | |
""" | |
df = generate_df() | |
df_email = df[df['email'] == email] | |
if len(df_email) < 10: | |
return JSONResponse(content={"message": "Please attempt at least 10 tests to enable detailed summary generation."}) | |
# Step 1: Get the weak topics via DeepSeek | |
response = df_email['responses'].values[:10] | |
formatted_data = str(response) | |
schema = { | |
'weak_topics': ['Topic#1', 'Topic#2', '...'], | |
'strong_topics': ['Topic#1', 'Topic#2', '...'] | |
} | |
completion = client.chat.completions.create( | |
model="deepseek-chat", | |
response_format={"type": "json_object"}, | |
messages=[ | |
{ | |
"role": "system", | |
"content": f"""You are an Educational Performance Analyst focusing on student performance. | |
Analyze the provided student responses to identify and categorize topics into 'weak' and 'strong' based on their performance. | |
Do not add any explanations - return ONLY valid JSON.""" | |
}, | |
{ | |
"role": "user", | |
"content": f""" | |
Here is the raw data: | |
{formatted_data} | |
Convert this data into JSON that matches this schema: | |
{json.dumps(schema, indent=2)} | |
""" | |
} | |
], | |
temperature=0.0 | |
) | |
# Extract weak topics | |
strong_weak_json = json.loads(completion.choices[0].message.content) | |
weak_topics = strong_weak_json.get("weak_topics", []) | |
if not weak_topics: | |
return JSONResponse(content={"message": "Could not extract weak topics."}) | |
# Step 2: Generate flashcards using Gemini | |
topic_str = ", ".join(weak_topics) | |
# flashcard_prompt = f"""Create 5 concise, simple, straightforward and distinct Anki cards to study the following topic, each with a front and back. | |
# Avoid repeating the content in the front on the back of the card. Avoid explicitly referring to the author or the article. | |
# Use the following format: | |
# Front: [front section of card 1] | |
# Back: [back section of card 1] | |
# ... | |
# The topics: {topic_str} | |
# """ | |
# flashcard_response = model.generate_content(flashcard_prompt) | |
# # Step 3: Parse Gemini response into JSON format | |
# flashcards_raw = flashcard_response.text.strip() | |
# flashcard_pattern = re.findall(r"Front:\s*(.*?)\nBack:\s*(.*?)(?=\nFront:|\Z)", flashcards_raw, re.DOTALL) | |
# flashcards = [{"Front": front.strip(), "Back": back.strip()} for front, back in flashcard_pattern] | |
summarization_prompt = f""" | |
Write an informative and concise summary (approximately 200 words) for each of the following topics. | |
Do not mention the author or source. Use clear and academic language. | |
List of topics: | |
{topic_str} | |
Use the following format: | |
Topic: [topic name] | |
Summary: [200-word summary] | |
""" | |
summary_response = model.generate_content(summarization_prompt) | |
# Step 3: Parse response into JSON format | |
summary_raw = summary_response.text.strip() | |
summary_pattern = re.findall(r"Topic:\s*(.*?)\nSummary:\s*(.*?)(?=\nTopic:|\Z)", summary_raw, re.DOTALL) | |
summaries = [{topic.strip(): summary.strip() for topic, summary in summary_pattern}] | |
return JSONResponse(content=summaries) | |
if __name__ == "__main__": | |
uvicorn.run(app, host="0.0.0.0", port=7860) |