tts_labeling / scripts /report_annotator_progress.py
vargha's picture
script for database management
1000353
raw
history blame
3.65 kB
import argparse
import sys
import os
from sqlalchemy import func, and_
# Add project root to Python path
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
if project_root not in sys.path:
sys.path.insert(0, project_root)
from utils.database import get_db
from data.models import Annotator, Annotation, AnnotationInterval, TTSData
from utils.logger import Logger
log = Logger()
def generate_annotator_progress_report():
"""
Generates and prints a progress report for each annotator.
Progress is defined as the percentage of assigned TTSData items
that have a non-empty annotation.
"""
with get_db() as db:
try:
annotators = db.query(Annotator).filter(Annotator.is_active == True).all()
if not annotators:
log.info("No active annotators found.")
return
log.info("--- Annotator Progress Report ---")
for annotator in annotators:
# Calculate total assigned items for the annotator
total_assigned_query = db.query(func.sum(AnnotationInterval.end_index - AnnotationInterval.start_index + 1))\
.filter(AnnotationInterval.annotator_id == annotator.id)
total_assigned_result = total_assigned_query.scalar()
total_assigned = total_assigned_result if total_assigned_result is not None else 0
# Calculate completed items by this annotator within their assigned intervals
# An item is considered completed if annotated_sentence is not None and not an empty string.
completed_count_query = db.query(func.count(Annotation.id))\
.join(TTSData, Annotation.tts_data_id == TTSData.id)\
.join(AnnotationInterval,
and_(AnnotationInterval.annotator_id == annotator.id,
TTSData.id >= AnnotationInterval.start_index,
TTSData.id <= AnnotationInterval.end_index))\
.filter(Annotation.annotator_id == annotator.id,
Annotation.annotated_sentence != None,
Annotation.annotated_sentence != "")
completed_count_result = completed_count_query.scalar()
completed_count = completed_count_result if completed_count_result is not None else 0
percentage_completed = 0
if total_assigned > 0:
percentage_completed = (completed_count / total_assigned) * 100
log.info(f"Annotator: {annotator.name} (ID: {annotator.id})")
log.info(f" Total Assigned Items: {total_assigned}")
log.info(f" Completed Items: {completed_count}")
log.info(f" Progress: {percentage_completed:.2f}%")
log.info("-" * 30)
except Exception as e:
# For errors, we might still want the full log details
log.error(f"Failed to generate annotator progress report: {e}")
if __name__ == "__main__":
# No arguments needed for this script, it reports on all active annotators
# parser = argparse.ArgumentParser(description="Generate a progress report for all active annotators.")
# args = parser.parse_args() # Not needed for now
generate_annotator_progress_report()