video-scoring / router.py
bearking58's picture
feat: level 0 inference + improve model accuracy
3515f11
from fastapi import APIRouter, Request
from schema import PredictData
from collections import defaultdict
from utils import compute_normalized_score
import torch
import json
router = APIRouter(prefix="/predict")
@router.post("/english")
async def predict_english(request: Request, data: PredictData):
transcripts_by_competency = defaultdict(list)
for q in data.questions:
transcripts_by_competency[q.competency_id].append(q.transcript)
sorted_competencies = sorted(data.competency_sets, key=lambda c: c.competency_id)
ordered_transcripts = [
" ".join(transcripts_by_competency.get(comp.competency_id, []))
for comp in sorted_competencies
]
ordered_competence_sets = [comp.descriptions for comp in sorted_competencies]
print("Calculating scores with these inputs:\n")
print(
f"Ordered transcript: {json.dumps(ordered_transcripts, indent=4, ensure_ascii=False)}\n"
)
print(
f"Ordered competence sets: {json.dumps(ordered_competence_sets, indent=4, ensure_ascii=False)}\n"
)
english_scoring_model = request.app.state.english_scoring_model
with torch.no_grad():
raw_scores = english_scoring_model(ordered_transcripts, ordered_competence_sets)
final_scores = []
for score_array, comp_descriptions in zip(
raw_scores.tolist(), ordered_competence_sets
):
expected_levels = len(comp_descriptions)
normalized_score = compute_normalized_score(score_array, expected_levels)
final_scores.append(normalized_score)
raw_list = raw_scores.tolist()
details = []
for comp, score_array in zip(sorted_competencies, raw_list):
k = len(comp.descriptions)
details.append(
{"competency_id": comp.competency_id, "scores": score_array[: k + 1]}
)
return {
"score": sum(final_scores) / len(final_scores),
"details": details,
}
@router.post("/indonesian")
async def predict_indonesian(request: Request, data: PredictData):
transcripts_by_competency = defaultdict(list)
for q in data.questions:
transcripts_by_competency[q.competency_id].append(q.transcript)
sorted_competencies = sorted(data.competency_sets, key=lambda c: c.competency_id)
ordered_transcripts = [
" ".join(transcripts_by_competency.get(comp.competency_id, []))
for comp in sorted_competencies
]
ordered_competence_sets = [comp.descriptions for comp in sorted_competencies]
print("Calculating scores with these inputs:\n")
print(
f"Ordered transcript: {json.dumps(ordered_transcripts, indent=4, ensure_ascii=False)}\n"
)
print(
f"Ordered competence sets: {json.dumps(ordered_competence_sets, indent=4, ensure_ascii=False)}\n"
)
indonesian_scoring_model = request.app.state.indonesian_scoring_model
with torch.no_grad():
raw_scores = indonesian_scoring_model(
ordered_transcripts, ordered_competence_sets
)
final_scores = []
for score_array, comp_descriptions in zip(
raw_scores.tolist(), ordered_competence_sets
):
expected_levels = len(comp_descriptions)
normalized_score = compute_normalized_score(score_array, expected_levels)
final_scores.append(normalized_score)
raw_list = raw_scores.tolist()
details = []
for comp, score_array in zip(sorted_competencies, raw_list):
k = len(comp.descriptions)
details.append(
{"competency_id": comp.competency_id, "scores": score_array[: k + 1]}
)
return {
"score": sum(final_scores) / len(final_scores),
"details": details,
}