olmocr-demo / mathml_utils.py
leonarb's picture
Create mathml_utils.py
758988d verified
raw
history blame contribute delete
704 Bytes
import re
from latex2mathml.converter import convert as latex_to_mathml
def convert_inline_and_block_latex_to_mathml(text):
def block_replacer(match):
try:
mathml = latex_to_mathml(match.group(1))
return f"<div class='math'>{mathml}</div>"
except Exception:
return match.group(0)
def inline_replacer(match):
try:
mathml = latex_to_mathml(match.group(1))
return f"<span class='math'>{mathml}</span>"
except Exception:
return match.group(0)
text = re.sub(r"\$\$(.+?)\$\$", block_replacer, text, flags=re.DOTALL)
text = re.sub(r"\\\((.+?)\\\)", inline_replacer, text)
return text