Spaces:
Runtime error
Runtime error
# models/text_quality.py | |
from .model_loader import load_model | |
from .logging_config import logger | |
def assess_text_quality(text): | |
try: | |
if not text or len(str(text).strip()) < 20: | |
return { | |
'assessment': 'insufficient', | |
'score': 0, | |
'reasoning': 'Text too short.', | |
'is_ai_generated': False, | |
'quality_metrics': {} | |
} | |
try: | |
classifier = load_model("zero-shot-classification", "typeform/mobilebert-uncased-mnli") | |
except Exception as e: | |
logger.error(f"Error loading model in text quality: {str(e)}") | |
return { | |
'assessment': 'error', | |
'score': 0, | |
'reasoning': f'Model loading error: {str(e)}', | |
'is_ai_generated': False, | |
'quality_metrics': {}, | |
'top_classifications': [] | |
} | |
# Enhanced quality categories with more specific indicators | |
quality_categories = [ | |
"detailed and informative", | |
"adequately detailed", | |
"basic information", | |
"vague description", | |
"misleading content", | |
"professional listing", | |
"amateur listing", | |
"spam-like content", | |
"template-based content", | |
"authentic description" | |
] | |
# Analyze text with multiple aspects | |
quality_result = classifier(text[:1000], quality_categories, multi_label=True) | |
# Get top classifications with confidence scores | |
top_classifications = [] | |
for label, score in zip(quality_result['labels'][:3], quality_result['scores'][:3]): | |
if score > 0.3: # Only include if confidence is above 30% | |
top_classifications.append({ | |
'classification': label, | |
'confidence': float(score) | |
}) | |
# AI generation detection with multiple models | |
ai_check = classifier(text[:1000], ["human-written", "AI-generated", "template-based", "authentic"]) | |
is_ai_generated = ( | |
(ai_check['labels'][0] == "AI-generated" and ai_check['scores'][0] > 0.6) or | |
(ai_check['labels'][0] == "template-based" and ai_check['scores'][0] > 0.7) | |
) | |
# Calculate quality metrics | |
quality_metrics = { | |
'detail_level': sum(score for label, score in zip(quality_result['labels'], quality_result['scores']) | |
if label in ['detailed and informative', 'adequately detailed']), | |
'professionalism': sum(score for label, score in zip(quality_result['labels'], quality_result['scores']) | |
if label in ['professional listing', 'authentic description']), | |
'clarity': sum(score for label, score in zip(quality_result['labels'], quality_result['scores']) | |
if label not in ['vague description', 'misleading content', 'spam-like content']), | |
'authenticity': 1.0 - sum(score for label, score in zip(quality_result['labels'], quality_result['scores']) | |
if label in ['template-based content', 'spam-like content']) | |
} | |
# Calculate overall score with weighted metrics | |
weights = { | |
'detail_level': 0.3, | |
'professionalism': 0.25, | |
'clarity': 0.25, | |
'authenticity': 0.2 | |
} | |
score = sum(metric * weights[metric_name] for metric_name, metric in quality_metrics.items()) | |
score = score * 100 # Convert to percentage | |
# Adjust score for AI-generated content | |
if is_ai_generated: | |
score = score * 0.7 # Reduce score by 30% for AI-generated content | |
# Generate detailed reasoning | |
reasoning_parts = [] | |
if top_classifications: | |
primary_class = top_classifications[0]['classification'] | |
reasoning_parts.append(f"Primary assessment: {primary_class}") | |
if quality_metrics['detail_level'] > 0.7: | |
reasoning_parts.append("Contains comprehensive details") | |
elif quality_metrics['detail_level'] > 0.4: | |
reasoning_parts.append("Contains adequate details") | |
else: | |
reasoning_parts.append("Lacks important details") | |
if quality_metrics['professionalism'] > 0.7: | |
reasoning_parts.append("Professional listing style") | |
elif quality_metrics['professionalism'] < 0.4: | |
reasoning_parts.append("Amateur listing style") | |
if quality_metrics['clarity'] < 0.5: | |
reasoning_parts.append("Content clarity issues detected") | |
if is_ai_generated: | |
reasoning_parts.append("Content appears to be AI-generated") | |
return { | |
'assessment': top_classifications[0]['classification'] if top_classifications else 'could not assess', | |
'score': int(score), | |
'reasoning': '. '.join(reasoning_parts), | |
'is_ai_generated': is_ai_generated, | |
'quality_metrics': quality_metrics, | |
'top_classifications': top_classifications | |
} | |
except Exception as e: | |
logger.error(f"Error assessing text quality: {str(e)}") | |
return { | |
'assessment': 'could not assess', | |
'score': 50, | |
'reasoning': 'Technical error.', | |
'is_ai_generated': False, | |
'quality_metrics': {}, | |
'top_classifications': [] | |
} | |