Spaces:

CanCLID
/

srt-eval

Sleeping

App Files Files Community

laubonghaudoi commited on Oct 28, 2024

Commit

a233921

0 Parent(s):

initial commit

Browse files

Files changed (10) hide show

.gitattributes +35 -0
LICENSE +21 -0
README.md +35 -0
app.py +63 -0
bert.srt +87 -0
gold.srt +63 -0
main.py +178 -0
requirements.txt +2 -0
utils.py +50 -0
visualize.py +269 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 laubonghaudoi
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md ADDED Viewed

	@@ -0,0 +1,35 @@

+---
+title: Srt Eval
+emoji: 🌍
+colorFrom: green
+colorTo: indigo
+sdk: gradio
+sdk_version: 5.4.0
+app_file: app.py
+pinned: false
+license: mit
+short_description: Visualize CER / WER for SRT subtitles
+---
+# SRT Evaluation Tool
+This Gradio app compares two SRT files and calculates Character Error Rate (CER) and Word Error Rate (WER) metrics, with and without punctuation. It provides a detailed visualization of the differences between the files.
+## Features
+- Upload and compare two SRT files
+- Calculate CER/WER metrics
+- Visualize text differences
+- Download visualization as PNG or PDF
+- Example files included for testing
+## Usage
+1. Upload a reference (golden) SRT file
+2. Upload a target SRT file for comparison
+3. Click "Process Files" to see the results
+4. Or use "Load Example" to try with sample files
+## About
+This tool is particularly useful for evaluating machine-generated subtitles against human-created references, supporting both Chinese and English text.

app.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import gradio as gr
+from main import calculate_cer_both_versions, read_srt_text
+from visualize import generate_html_report
+def process_srt_files(reference_file, hypothesis_file):
+    try:
+        # Handle both file objects and string paths
+        ref_path = reference_file.name if hasattr(
+            reference_file, 'name') else reference_file
+        hyp_path = hypothesis_file.name if hasattr(
+            hypothesis_file, 'name') else hypothesis_file
+        reference_text = read_srt_text(ref_path)
+        hypothesis_text = read_srt_text(hyp_path)
+        metrics_no_punct, metrics_with_punct = calculate_cer_both_versions(
+            ref_path, hyp_path)
+        html_content = generate_html_report(
+            reference_text, hypothesis_text, metrics_no_punct, metrics_with_punct)
+        return html_content
+    except Exception as e:
+        return f"An error occurred: {str(e)}"
+def load_example():
+    return "gold.srt", "bert.srt"
+with gr.Blocks() as iface:
+    gr.Markdown("# SRT File Comparison and CER Calculation")
+    gr.Markdown(
+        "## Please upload the golden reference SRT and the target SRT for calculating the CER.")
+    gr.Markdown(
+        "Note: Only CER is supported at the moment, WER will be added in a future version.")
+    with gr.Row():
+        ref_file = gr.File(label="Reference (Golden) SRT File")
+        hyp_file = gr.File(label="Target SRT File")
+    with gr.Row():
+        example_btn = gr.Button("Load Example")
+        process_btn = gr.Button("Get CER", variant="primary")
+    output = gr.HTML(label="Results")
+    process_btn.click(
+        fn=process_srt_files,
+        inputs=[ref_file, hyp_file],
+        outputs=output
+    )
+    example_btn.click(
+        fn=load_example,
+        inputs=None,
+        outputs=[ref_file, hyp_file]
+    )
+if __name__ == "__main__":
+    iface.launch()

bert.srt ADDED Viewed

	@@ -0,0 +1,87 @@

+0
+00:00:01,617 --> 00:00:02,760
+各位朋友。
+1
+00:00:03,755 --> 00:00:06,119
+喺講三國演義之前啊。
+2
+00:00:06,785 --> 00:00:09,040
+我念一手持俾大家聽下嚇。
+3
+00:00:10,720 --> 00:00:17,120
+滾滾長江東逝水浪花陶盡英雄。
+4
+00:00:18,082 --> 00:00:26,530
+是非成敗轉頭空青山依舊在幾道夕陽紅。
+5
+00:00:27,986 --> 00:00:33,870
+白發如潮江主上慣看秋月春風。
+6
+00:00:34,800 --> 00:00:43,829
+一壺濁酒喜相逢古今多少事都付笑談中，
+7
+00:00:45,717 --> 00:00:50,379
+好一個古今多少事都呼笑談中啊。
+8
+00:00:51,203 --> 00:00:55,940
+一部三國演義係講魏術吳三國嘅曆史。
+9
+00:00:56,943 --> 00:01:10,283
+由東漢靈帝中平元年即係公元一八四年黃巾起義嗰陣開始一直係寫到晉武帝太康元年即係公元二八零年啦，
+10
+00:01:10,698 --> 00:01:15,560
+吳國滅亡嗰陣爲止差唔多成個世紀咁長。
+11
+00:01:16,552 --> 00:01:20,250
+佢離而傢咧有成一千八百年噉耐嘞。
+12
+00:01:20,870 --> 00:01:24,960
+你話多少英雄豪傑啊已經化爲烏有。
+13
+00:01:25,938 --> 00:01:31,700
+但系佢哋嘅事跡就好似滾滾長江一直流傳到今日。
+14
+00:01:32,894 --> 00:01:40,574
+而家講起桃園結義三英戰呂報火燒赤壁六出岐山等等，
+15
+00:01:40,692 --> 00:01:42,920
+唉真系感慨好多啊。
+16
+00:01:43,895 --> 00:01:48,110
+咁至于繫唔係講完聽完只係得啖笑呢，
+17
+00:01:48,526 --> 00:01:53,929
+我睇亦未必古時啊一個朝代嘅聖衰興亡，
+18
+00:01:54,405 --> 00:01:56,483
+一個人物嘅成功失敗，
+19
+00:01:56,958 --> 00:02:01,411
+總係可以使我哋今人從中得到啲啓發同教益嘅，
+20
+00:02:01,827 --> 00:02:02,480
+好啦好啦，
+21
+00:02:03,133 --> 00:02:05,390
+閒話幽題言歸正傳。

gold.srt ADDED Viewed

	@@ -0,0 +1,63 @@

+1
+00:00:01,491 --> 00:00:09,158
+各位朋友，喺講《三國演義》之前啊，我唸一首詞畀大家聽下吓。
+2
+00:00:10,342 --> 00:00:17,103
+滾滾長江東逝水，浪花淘盡英雄。
+3
+00:00:17,786 --> 00:00:26,490
+是非成敗轉頭空，青山依舊在，幾度夕陽紅。
+4
+00:00:27,695 --> 00:00:33,815
+白髮漁樵江渚上，慣看秋月春風。
+5
+00:00:34,373 --> 00:00:44,340
+一壺濁酒喜相逢，古今多少事，都付笑談中。
+6
+00:00:44,649 --> 00:00:50,377
+哈哈哈，好一個古今多少事，都付笑談中啊。
+7
+00:00:50,940 --> 00:00:55,857
+一部《三國演義》，係講魏蜀吳三國嘅歷史。
+8
+00:00:56,711 --> 00:01:04,396
+由東漢靈帝中平元年，即係公元一八四年，黃巾起義嗰陣開始。
+9
+00:01:04,796 --> 00:01:15,538
+一直寫到晉武帝太康元年，即係公元二八零年嘞，吳國滅亡嗰陣為止，差唔多成個世紀咁長。
+10
+00:01:16,295 --> 00:01:20,228
+佢哋而家呢，有成一千八百年咁耐嘞。
+11
+00:01:20,670 --> 00:01:24,920
+你話多少英雄豪傑啊，已經化為烏有。
+12
+00:01:25,745 --> 00:01:31,689
+但係佢哋嘅事蹟，就好似滾滾長江一直流傳到今日。
+13
+00:01:32,566 --> 00:01:42,883
+而家講起桃園結義、三英戰呂布、火燒赤壁、六出祁山等等，嗨真係感慨好多啊。
+14
+00:01:43,695 --> 00:01:49,863
+噉至於係唔係講完聽完，只係得啖笑呢？我睇亦未必。
+15
+00:01:50,296 --> 00:02:01,787
+古時啊，一個朝代嘅盛衰興亡，一個人物嘅成功失敗，總係可以使我哋今人從中得到啲啓發同教育嘅。
+16
+00:02:01,787 --> 00:02:05,403
+好嘞好嘞，閒話休提言歸正傳嘞。

main.py ADDED Viewed

	@@ -0,0 +1,178 @@

+import argparse
+from typing import Dict, Tuple
+from visualize import generate_html_report
+from utils import read_srt_text, preprocess_chinese_text
+import jiwer
+# Expose read_srt_text function
+from utils import read_srt_text
+def parse_arguments():
+    parser = argparse.ArgumentParser(
+        description="Calculate Character Error Rate (CER) for Chinese SRT files",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "-r",
+        "--reference",
+        required=True,
+        help="Path to the reference (golden) SRT file",
+    )
+    parser.add_argument(
+        "-i", "--input", required=True, help="Path to the input (hypothesis) SRT file"
+    )
+    parser.add_argument("-o", "--output", help="Path to save the results (optional)")
+    parser.add_argument("--html", help="Path to save the HTML visualization (optional)")
+    return parser.parse_args()
+def calculate_cer_both_versions(
+    reference_path: str, hypothesis_path: str
+) -> Tuple[Dict, Dict]:
+    """
+    Calculate CER and related metrics between reference and hypothesis SRT files,
+    both with and without punctuation.
+    Args:
+        reference_path (str): Path to the reference SRT file
+        hypothesis_path (str): Path to the hypothesis SRT file
+    Returns:
+        Tuple[Dict, Dict]: Two dictionaries containing metrics (with and without punctuation)
+    """
+    # Read files
+    reference_text = read_srt_text(reference_path)
+    hypothesis_text = read_srt_text(hypothesis_path)
+    # Calculate metrics without punctuation
+    reference_chars_no_punct = preprocess_chinese_text(
+        reference_text, include_punctuation=False
+    )
+    hypothesis_chars_no_punct = preprocess_chinese_text(
+        hypothesis_text, include_punctuation=False
+    )
+    metrics_no_punct = jiwer.compute_measures(
+        reference_chars_no_punct, hypothesis_chars_no_punct
+    )
+    # Calculate metrics with punctuation
+    reference_chars_with_punct = preprocess_chinese_text(
+        reference_text, include_punctuation=True
+    )
+    hypothesis_chars_with_punct = preprocess_chinese_text(
+        hypothesis_text, include_punctuation=True
+    )
+    metrics_with_punct = jiwer.compute_measures(
+        reference_chars_with_punct, hypothesis_chars_with_punct
+    )
+    # Add character counts
+    metrics_no_punct["total_ref_chars"] = len(reference_chars_no_punct.replace(" ", ""))
+    metrics_no_punct["total_hyp_chars"] = len(
+        hypothesis_chars_no_punct.replace(" ", "")
+    )
+    metrics_with_punct["total_ref_chars"] = len(
+        reference_chars_with_punct.replace(" ", "")
+    )
+    metrics_with_punct["total_hyp_chars"] = len(
+        hypothesis_chars_with_punct.replace(" ", "")
+    )
+    return metrics_no_punct, metrics_with_punct
+def format_metrics(metrics: dict, version: str) -> str:
+    """
+    Format metrics into a string.
+    Args:
+        metrics (dict): Dictionary of metric values
+        version (str): String indicating which version of metrics these are
+    Returns:
+        str: Formatted metrics string
+    """
+    output = []
+    output.append(f"\n=== {version} ===")
+    output.append(f"Character Error Rate (CER): {metrics['wer']:.3f}")
+    output.append(f"Total Reference Characters: {metrics['total_ref_chars']}")
+    output.append(f"Total Hypothesis Characters: {metrics['total_hyp_chars']}")
+    output.append("\nDetailed Statistics:")
+    output.append(f"Correct Characters: {metrics['hits']}")
+    output.append(f"Substitutions: {metrics['substitutions']}")
+    output.append(f"Deletions: {metrics['deletions']}")
+    output.append(f"Insertions: {metrics['insertions']}")
+    # Calculate and print percentage stats
+    total_errors = (
+        metrics["substitutions"] + metrics["deletions"] + metrics["insertions"]
+    )
+    total_chars = metrics["total_ref_chars"]
+    output.append(f"\nError Analysis:")
+    output.append(f"Total Errors: {total_errors}")
+    output.append(f"Substitution Rate: {metrics['substitutions']/total_chars:.3f}")
+    output.append(f"Deletion Rate: {metrics['deletions']/total_chars:.3f}")
+    output.append(f"Insertion Rate: {metrics['insertions']/total_chars:.3f}")
+    return "\n".join(output)
+if __name__ == "__main__":
+    args = parse_arguments()
+    try:
+        # Read the original texts
+        reference_text = read_srt_text(args.reference)
+        hypothesis_text = read_srt_text(args.input)
+        # Calculate metrics
+        metrics_no_punct, metrics_with_punct = calculate_cer_both_versions(
+            args.reference, args.input
+        )
+        # Generate and save HTML report if requested
+        if args.html:
+            html_content = generate_html_report(
+                reference_text, hypothesis_text, metrics_no_punct, metrics_with_punct
+            )
+            with open(args.html, "w", encoding="utf-8") as f:
+                f.write(html_content)
+            print(f"\nHTML visualization has been saved to: {args.html}")
+        # Original metrics output
+        output_text = []
+        output_text.append(
+            format_metrics(metrics_no_punct, "Metrics Without Punctuation")
+        )
+        output_text.append(
+            format_metrics(metrics_with_punct, "Metrics With Punctuation")
+        )
+        output_text.append("\n=== Comparison ===")
+        output_text.append(f"CER without punctuation: {metrics_no_punct['wer']:.3f}")
+        output_text.append(f"CER with punctuation: {metrics_with_punct['wer']:.3f}")
+        output_text.append(
+            f"Difference: {abs(metrics_with_punct['wer'] - metrics_no_punct['wer']):.3f}"
+        )
+        final_output = "\n".join(output_text)
+        print(final_output)
+        if args.output:
+            with open(args.output, "w", encoding="utf-8") as f:
+                f.write(final_output)
+            print(f"\nResults have been saved to: {args.output}")
+    except FileNotFoundError as e:
+        print(f"Error: Could not find one of the input files - {str(e)}")
+    except Exception as e:
+        print(f"Error occurred: {str(e)}")

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ jiwer==3.0.3
2	+ gradio==3.50.2

utils.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import re
+def read_srt_text(file_path: str) -> str:
+    """
+    Read an SRT file and extract only the text content, ignoring timestamps.
+    Args:
+        file_path (str): Path to the SRT file
+    Returns:
+        str: Concatenated text content from the SRT file
+    """
+    with open(file_path, "r", encoding="utf-8") as f:
+        content = f.read()
+    # Split content into subtitle blocks
+    blocks = content.strip().split("\n\n")
+    # Extract only the text lines (not numbers or timestamps)
+    text_lines = []
+    for block in blocks:
+        lines = block.split("\n")
+        # Skip the subtitle number and timestamp lines
+        text = " ".join(lines[2:])  # Join all lines after timestamp
+        text_lines.append(text)
+    return " ".join(text_lines)
+def preprocess_chinese_text(text: str, include_punctuation: bool = False) -> str:
+    """
+    Preprocess Chinese text for CER calculation.
+    Args:
+        text (str): Input Chinese text
+        include_punctuation (bool): Whether to include punctuation in the calculation
+    Returns:
+        str: Preprocessed text with characters separated by spaces
+    """
+    # Remove any English characters, numbers, and extra spaces
+    text = re.sub(r"[a-zA-Z0-9\s]+", "", text)
+    if not include_punctuation:
+        # Remove both Chinese and English punctuation with properly escaped characters
+        text = re.sub(
+            r'[，。！？：；""' "（）【】《》、,\.!?:;\"'\\(\\)\\[\\]\\{\\}]", "", text
+        )
+    # Convert to list of characters and join with spaces
+    return " ".join(list(text))

visualize.py ADDED Viewed

	@@ -0,0 +1,269 @@

+import difflib
+from dataclasses import dataclass
+from html import escape
+from typing import List, Tuple
+from utils import preprocess_chinese_text
+@dataclass
+class DiffResult:
+    reference_display: str
+    hypothesis_display: str
+    error_pairs: List[Tuple[str, str]]
+def visualize_differences(
+    ref_text: str, hyp_text: str, include_punctuation: bool = False
+) -> DiffResult:
+    """
+    Create a visualization of the differences between reference and hypothesis texts.
+    Args:
+        ref_text (str): Reference text
+        hyp_text (str): Hypothesis text
+        include_punctuation (bool): Whether to include punctuation
+    Returns:
+        DiffResult: Containing formatted reference and hypothesis texts with error highlighting
+    """
+    # Preprocess texts
+    ref_processed = preprocess_chinese_text(ref_text, include_punctuation)
+    hyp_processed = preprocess_chinese_text(hyp_text, include_punctuation)
+    # Split into characters
+    ref_chars = ref_processed.split()
+    hyp_chars = hyp_processed.split()
+    # Get sequence matcher
+    matcher = difflib.SequenceMatcher(None, ref_chars, hyp_chars)
+    ref_formatted = []
+    hyp_formatted = []
+    error_pairs = []
+    for op, ref_start, ref_end, hyp_start, hyp_end in matcher.get_opcodes():
+        if op == "equal":
+            ref_formatted.extend(ref_chars[ref_start:ref_end])
+            hyp_formatted.extend(hyp_chars[hyp_start:hyp_end])
+        elif op == "delete":
+            # Deletion - character in reference but not in hypothesis
+            for char in ref_chars[ref_start:ref_end]:
+                ref_formatted.append(f"[DEL]{char}[/DEL]")
+                hyp_formatted.append("[DEL]_[/DEL]")
+                error_pairs.append((char, "_"))
+        elif op == "insert":
+            # Insertion - character in hypothesis but not in reference
+            for char in hyp_chars[hyp_start:hyp_end]:
+                ref_formatted.append("[INS]_[/INS]")
+                hyp_formatted.append(f"[INS]{char}[/INS]")
+                error_pairs.append(("_", char))
+        elif op == "replace":
+            # Substitution - different characters in reference and hypothesis
+            for ref_char, hyp_char in zip(
+                ref_chars[ref_start:ref_end], hyp_chars[hyp_start:hyp_end]
+            ):
+                ref_formatted.append(f"[SUB]{ref_char}[/SUB]")
+                hyp_formatted.append(f"[SUB]{hyp_char}[/SUB]")
+                error_pairs.append((ref_char, hyp_char))
+    return DiffResult(
+        reference_display="".join(ref_formatted),
+        hypothesis_display="".join(hyp_formatted),
+        error_pairs=error_pairs,
+    )
+def generate_html_report(
+    ref_text: str, hyp_text: str, metrics_no_punct: dict, metrics_with_punct: dict
+) -> str:
+    """
+    Generate an HTML report with error visualization and metrics.
+    """
+    # Get visualizations for both versions
+    diff_no_punct = visualize_differences(ref_text, hyp_text, False)
+    diff_with_punct = visualize_differences(ref_text, hyp_text, True)
+    def format_text_for_html(text: str) -> str:
+        """Format text with HTML spans for coloring"""
+        text = escape(text)
+        text = text.replace("[DEL]", '<span class="deletion">')
+        text = text.replace("[/DEL]", "</span>")
+        text = text.replace("[INS]", '<span class="insertion">')
+        text = text.replace("[/INS]", "</span>")
+        text = text.replace("[SUB]", '<span class="substitution">')
+        text = text.replace("[/SUB]", "</span>")
+        return text
+    def format_error_pairs(pairs: List[Tuple[str, str]]) -> str:
+        """Format error pairs into HTML table rows"""
+        rows = []
+        for ref_char, hyp_char in pairs:
+            rows.append(
+                f"<tr><td>{escape(ref_char)}</td><td>{escape(hyp_char)}</td></tr>"
+            )
+        return "\n".join(rows)
+    # Calculate metrics for no punctuation
+    ref_no_punct = preprocess_chinese_text(ref_text, False)
+    total_chars_no_punct = len(ref_no_punct.split())
+    # total_words_no_punct = len([w for w in ref_no_punct.split() if w.strip()])
+    cer_no_punct = metrics_no_punct['wer']
+    total_errors_no_punct = metrics_no_punct['substitutions'] + \
+        metrics_no_punct['deletions'] + metrics_no_punct['insertions']
+    substitutions_no_punct = metrics_no_punct['substitutions']
+    deletions_no_punct = metrics_no_punct['deletions']
+    insertions_no_punct = metrics_no_punct['insertions']
+    # Calculate metrics for with punctuation
+    ref_with_punct = preprocess_chinese_text(ref_text, True)
+    total_chars_punct = len(ref_with_punct.split())
+    # total_words_punct = len([w for w in ref_with_punct.split() if w.strip()])
+    cer_punct = metrics_with_punct['wer']
+    total_errors_punct = metrics_with_punct['substitutions'] + \
+        metrics_with_punct['deletions'] + metrics_with_punct['insertions']
+    substitutions_punct = metrics_with_punct['substitutions']
+    deletions_punct = metrics_with_punct['deletions']
+    insertions_punct = metrics_with_punct['insertions']
+    html_template = """
+    <!DOCTYPE html>
+    <html>
+    <head>
+        <meta charset="UTF-8">
+        <title>CER Analysis Report</title>
+        <style>
+            body {{ font-family: Arial, sans-serif; margin: 20px; }}
+            .container {{ max-width: 100%; margin: 0 auto; }}
+            .metrics {{ margin: 20px 0; padding: 10px; background: #f5f5f5; }}
+            .visualization {{ margin: 20px 0; }}
+            .deletion {{ background-color: #ffd7d7; text-decoration: line-through; }}
+            .insertion {{ background-color: #d7ffd7; }}
+            .substitution {{ background-color: #fff3d7; }}
+            .text-display {{ font-size: 16px; line-height: 1.6; white-space: pre-wrap; }}
+            table {{ border-collapse: collapse; margin: 10px 0; }}
+            th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
+            th {{ background-color: #f5f5f5; }}
+            .legend {{ margin: 20px 0; }}
+            .legend span {{ padding: 2px 5px; margin-right: 10px; }}
+            h2 {{ margin-top: 30px; }}
+            .grid-container {{  display: grid; grid-template-columns: auto auto; column-gap: 24px;}}
+            .grid-item {{  }}
+        </style>
+    </head>
+    <body>
+        <div class="container">
+            <h1>Character Error Rate Analysis Report</h1>
+            <div class="legend">
+                <h3>Legend:</h3>
+                <span class="deletion">Deletion</span>
+                <span class="insertion">Insertion</span>
+                <span class="substitution">Substitution</span>
+            </div>
+            <div class="grid-container">
+              <div class="grid-item">
+                <h2>Without Punctuation</h2>
+                <table class="metrics">
+                <thead>
+                    <tr>
+                    <th>Total Chars</th>
+                    <th>CER</th>
+                    <th>Total Errors</th>
+                    <th>Substitutions</th>
+                    <th>Deletions</th>
+                    <th>Insertions</th>
+                    </tr>
+                </thead>
+                <tbody>
+                    <tr>
+                    <td>{total_chars_no_punct}</td>
+                    <td>{cer_no_punct:.3f}</td>
+                    <td>{total_errors_no_punct}</td>
+                    <td>{substitutions_no_punct}</td>
+                    <td>{deletions_no_punct}</td>
+                    <td>{insertions_no_punct}</td>
+                    </tr>
+                </tbody>
+                </table>
+                <div class="visualization">
+                    <h3>Reference Text:</h3>
+                    <div class="text-display">{ref_no_punct}</div>
+                    <h3>Hypothesis Text:</h3>
+                    <div class="text-display">{hyp_no_punct}</div>
+                    <h3>Error Pairs:</h3>
+                    <table>
+                        <tr><th>Reference</th><th>Hypothesis</th></tr>
+                        {pairs_no_punct}
+                    </table>
+                </div>
+              </div>
+              <div class="grid-item">
+                <h2>With Punctuation</h2>
+                <table class="metrics">
+                <thead>
+                    <tr>
+                    <th>Total Chars</th>
+                    <th>CER</th>
+                    <th>Total Errors</th>
+                    <th>Substitutions</th>
+                    <th>Deletions</th>
+                    <th>Insertions</th>
+                    </tr>
+                </thead>
+                <tbody>
+                    <tr>
+                    <td>{total_chars_punct}</td>
+                    <td>{cer_punct:.3f}</td>
+                    <td>{total_errors_punct}</td>
+                    <td>{substitutions_punct}</td>
+                    <td>{deletions_punct}</td>
+                    <td>{insertions_punct}</td>
+                    </tr>
+                </tbody>
+                </table>
+                <div class="visualization">
+                  <h3>Reference Text:</h3>
+                  <div class="text-display">{ref_with_punct}</div>
+                  <h3>Hypothesis Text:</h3>
+                  <div class="text-display">{hyp_with_punct}</div>
+                  <h3>Error Pairs:</h3>
+                  <table>
+                      <tr><th>Reference</th><th>Hypothesis</th></tr>
+                      {pairs_with_punct}
+                  </table>
+                </div>
+              </div>
+            </div>
+        </div>
+    </body>
+    </html>
+    """
+    return html_template.format(
+        cer_no_punct=cer_no_punct,
+        total_errors_no_punct=total_errors_no_punct,
+        insertions_no_punct=insertions_no_punct,
+        deletions_no_punct=deletions_no_punct,
+        substitutions_no_punct=substitutions_no_punct,
+        cer_punct=cer_punct,
+        total_errors_punct=total_errors_punct,
+        insertions_punct=insertions_punct,
+        deletions_punct=deletions_punct,
+        substitutions_punct=substitutions_punct,
+        total_chars_no_punct=total_chars_no_punct,
+        total_chars_punct=total_chars_punct,
+        ref_no_punct=format_text_for_html(diff_no_punct.reference_display),
+        hyp_no_punct=format_text_for_html(diff_no_punct.hypothesis_display),
+        pairs_no_punct=format_error_pairs(diff_no_punct.error_pairs),
+        ref_with_punct=format_text_for_html(diff_with_punct.reference_display),
+        hyp_with_punct=format_text_for_html(
+            diff_with_punct.hypothesis_display),
+        pairs_with_punct=format_error_pairs(diff_with_punct.error_pairs),
+    )