Spaces:

davanstrien
/

ocr-time-capsule

Running

File size: 8,102 Bytes

/**
 * Reasoning Trace Parser
 * Handles parsing and formatting of model reasoning traces from OCR outputs
 */

class ReasoningParser {
    /**
     * Detect if text contains reasoning trace markers
     * @param {string} text - The text to check
     * @returns {boolean} - True if reasoning trace is detected
     */
    static detectReasoningTrace(text) {
        if (!text || typeof text !== 'string') return false;
        
        // Check for complete reasoning trace patterns (both opening and closing tags)
        const completePatterns = [
            { start: /<think>/i, end: /<\/think>/i },
            { start: /<thinking>/i, end: /<\/thinking>/i },
            { start: /<reasoning>/i, end: /<\/reasoning>/i },
            { start: /<thought>/i, end: /<\/thought>/i }
        ];
        
        // Only return true if we find BOTH opening and closing tags
        return completePatterns.some(pattern => 
            pattern.start.test(text) && pattern.end.test(text)
        );
    }
    
    /**
     * Parse reasoning content from text
     * @param {string} text - The text containing reasoning trace
     * @returns {object} - Object with reasoning and answer sections
     */
    static parseReasoningContent(text) {
        if (!text) {
            return { reasoning: null, answer: null, original: text };
        }
        
        // Try multiple patterns for flexibility
        const patterns = [
            {
                start: /<think>/i,
                end: /<\/think>/i,
                answerStart: /<answer>/i,
                answerEnd: /<\/answer>/i
            },
            {
                start: /<thinking>/i,
                end: /<\/thinking>/i,
                answerStart: /<answer>/i,
                answerEnd: /<\/answer>/i
            },
            {
                start: /<reasoning>/i,
                end: /<\/reasoning>/i,
                answerStart: /<output>/i,
                answerEnd: /<\/output>/i
            }
        ];
        
        for (const pattern of patterns) {
            const reasoningMatch = text.match(new RegExp(
                pattern.start.source + '([\\s\\S]*?)' + pattern.end.source,
                'i'
            ));
            
            const answerMatch = text.match(new RegExp(
                pattern.answerStart.source + '([\\s\\S]*?)' + pattern.answerEnd.source,
                'i'
            ));
            
            if (reasoningMatch || answerMatch) {
                return {
                    reasoning: reasoningMatch ? reasoningMatch[1].trim() : null,
                    answer: answerMatch ? answerMatch[1].trim() : null,
                    hasReasoning: !!reasoningMatch,
                    hasAnswer: !!answerMatch,
                    original: text
                };
            }
        }
        
        // Check if there are incomplete reasoning tags (opening but no closing)
        const hasOpeningTag = /<think>|<thinking>|<reasoning>|<thought>/i.test(text);
        if (hasOpeningTag) {
            console.warn('Incomplete reasoning trace detected - missing closing tags');
        }
        
        // If no patterns match, return original text as answer
        return {
            reasoning: null,
            answer: text,
            hasReasoning: false,
            hasAnswer: true,
            original: text
        };
    }
    
    /**
     * Format reasoning steps for display
     * @param {string} reasoningText - The raw reasoning text
     * @returns {object} - Formatted reasoning with steps and metadata
     */
    static formatReasoningSteps(reasoningText) {
        if (!reasoningText) return null;
        
        // Parse numbered steps (e.g., "1. Step content")
        const stepPattern = /^\d+\.\s+\*\*(.+?)\*\*(.+?)(?=^\d+\.\s|\z)/gms;
        const steps = [];
        let match;
        
        while ((match = stepPattern.exec(reasoningText)) !== null) {
            steps.push({
                title: match[1].trim(),
                content: match[2].trim()
            });
        }
        
        // If no numbered steps found, try to parse by line breaks
        if (steps.length === 0) {
            const lines = reasoningText.split('\n').filter(line => line.trim());
            lines.forEach((line, index) => {
                // Check if line starts with a number
                const numberedMatch = line.match(/^(\d+)\.\s*(.+)/);
                if (numberedMatch) {
                    const title = numberedMatch[2].replace(/\*\*/g, '').trim();
                    steps.push({
                        number: numberedMatch[1],
                        title: title,
                        content: ''
                    });
                } else if (steps.length > 0) {
                    // Add to previous step's content
                    steps[steps.length - 1].content += '\n' + line;
                }
            });
        }
        
        return {
            steps: steps,
            rawText: reasoningText,
            stepCount: steps.length,
            characterCount: reasoningText.length,
            wordCount: reasoningText.split(/\s+/).filter(w => w).length
        };
    }
    
    /**
     * Extract key insights from reasoning
     * @param {string} reasoningText - The reasoning text
     * @returns {array} - Array of key insights or decisions
     */
    static extractInsights(reasoningText) {
        if (!reasoningText) return [];
        
        const insights = [];
        
        // Look for decision points and key observations
        const patterns = [
            /decision:\s*(.+)/gi,
            /observation:\s*(.+)/gi,
            /note:\s*(.+)/gi,
            /important:\s*(.+)/gi,
            /key finding:\s*(.+)/gi
        ];
        
        patterns.forEach(pattern => {
            let match;
            while ((match = pattern.exec(reasoningText)) !== null) {
                insights.push(match[1].trim());
            }
        });
        
        return insights;
    }
    
    /**
     * Get summary statistics about the reasoning trace
     * @param {object} parsedContent - Parsed reasoning content
     * @returns {object} - Statistics about the reasoning
     */
    static getReasoningStats(parsedContent) {
        if (!parsedContent || !parsedContent.reasoning) {
            return {
                hasReasoning: false,
                reasoningLength: 0,
                answerLength: 0,
                reasoningRatio: 0
            };
        }
        
        const reasoningLength = parsedContent.reasoning.length;
        const answerLength = parsedContent.answer ? parsedContent.answer.length : 0;
        const totalLength = reasoningLength + answerLength;
        
        return {
            hasReasoning: true,
            reasoningLength: reasoningLength,
            answerLength: answerLength,
            totalLength: totalLength,
            reasoningRatio: totalLength > 0 ? (reasoningLength / totalLength * 100).toFixed(1) : 0,
            reasoningWords: parsedContent.reasoning.split(/\s+/).filter(w => w).length,
            answerWords: parsedContent.answer ? parsedContent.answer.split(/\s+/).filter(w => w).length : 0
        };
    }
    
    /**
     * Format reasoning for export
     * @param {object} parsedContent - Parsed reasoning content
     * @param {boolean} includeReasoning - Whether to include reasoning in export
     * @returns {string} - Formatted text for export
     */
    static formatForExport(parsedContent, includeReasoning = true) {
        if (!parsedContent) return '';
        
        let exportText = '';
        
        if (includeReasoning && parsedContent.reasoning) {
            exportText += '=== MODEL REASONING ===\n\n';
            exportText += parsedContent.reasoning;
            exportText += '\n\n=== FINAL OUTPUT ===\n\n';
        }
        
        if (parsedContent.answer) {
            exportText += parsedContent.answer;
        }
        
        return exportText;
    }
}

// Export for use in other scripts
window.ReasoningParser = ReasoningParser;