GameServerZ

Sleeping

App Files Files Community

GameServerZ / MLPY /Lib /site-packages /markdown /extensions /md_in_html.py

Kano001

Upload 376 files

122d3ff verified 12 months ago

raw

history blame

16.5 kB

	# Python-Markdown Markdown in HTML Extension
	# ===============================

	# An implementation of [PHP Markdown Extra](http://michelf.com/projects/php-markdown/extra/)'s
	# parsing of Markdown syntax in raw HTML.

	# See https://Python-Markdown.github.io/extensions/raw_html
	# for documentation.

	# Copyright The Python Markdown Project

	# License: [BSD](https://opensource.org/licenses/bsd-license.php)

	"""
	An implementation of [PHP Markdown Extra](http://michelf.com/projects/php-markdown/extra/)'s
	parsing of Markdown syntax in raw HTML.

	See the [documentation](https://Python-Markdown.github.io/extensions/raw_html)
	for details.
	"""

	from __future__ import annotations

	from . import Extension
	from ..blockprocessors import BlockProcessor
	from ..preprocessors import Preprocessor
	from ..postprocessors import RawHtmlPostprocessor
	from .. import util
	from ..htmlparser import HTMLExtractor, blank_line_re
	import xml.etree.ElementTree as etree
	from typing import TYPE_CHECKING, Literal, Mapping

	if TYPE_CHECKING: # pragma: no cover
	from markdown import Markdown


	class HTMLExtractorExtra(HTMLExtractor):
	"""
	Override `HTMLExtractor` and create `etree` `Elements` for any elements which should have content parsed as
	Markdown.
	"""

	def __init__(self, md: Markdown, args, *kwargs):
	# All block-level tags.
	self.block_level_tags = set(md.block_level_elements.copy())
	# Block-level tags in which the content only gets span level parsing
	self.span_tags = set(
	['address', 'dd', 'dt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'legend', 'li', 'p', 'summary', 'td', 'th']
	)
	# Block-level tags which never get their content parsed.
	self.raw_tags = set(['canvas', 'math', 'option', 'pre', 'script', 'style', 'textarea'])

	super().__init__(md, args, *kwargs)

	# Block-level tags in which the content gets parsed as blocks
	self.block_tags = set(self.block_level_tags) - (self.span_tags \| self.raw_tags \| self.empty_tags)
	self.span_and_blocks_tags = self.block_tags \| self.span_tags

	def reset(self):
	"""Reset this instance. Loses all unprocessed data."""
	self.mdstack: list[str] = [] # When markdown=1, stack contains a list of tags
	self.treebuilder = etree.TreeBuilder()
	self.mdstate: list[Literal['block', 'span', 'off', None]] = []
	super().reset()

	def close(self):
	"""Handle any buffered data."""
	super().close()
	# Handle any unclosed tags.
	if self.mdstack:
	# Close the outermost parent. `handle_endtag` will close all unclosed children.
	self.handle_endtag(self.mdstack[0])

	def get_element(self) -> etree.Element:
	""" Return element from `treebuilder` and reset `treebuilder` for later use. """
	element = self.treebuilder.close()
	self.treebuilder = etree.TreeBuilder()
	return element

	def get_state(self, tag, attrs: Mapping[str, str]) -> Literal['block', 'span', 'off', None]:
	""" Return state from tag and `markdown` attribute. One of 'block', 'span', or 'off'. """
	md_attr = attrs.get('markdown', '0')
	if md_attr == 'markdown':
	# `<tag markdown>` is the same as `<tag markdown='1'>`.
	md_attr = '1'
	parent_state = self.mdstate[-1] if self.mdstate else None
	if parent_state == 'off' or (parent_state == 'span' and md_attr != '0'):
	# Only use the parent state if it is more restrictive than the markdown attribute.
	md_attr = parent_state
	if ((md_attr == '1' and tag in self.block_tags) or
	(md_attr == 'block' and tag in self.span_and_blocks_tags)):
	return 'block'
	elif ((md_attr == '1' and tag in self.span_tags) or
	(md_attr == 'span' and tag in self.span_and_blocks_tags)):
	return 'span'
	elif tag in self.block_level_tags:
	return 'off'
	else: # pragma: no cover
	return None

	def handle_starttag(self, tag, attrs):
	# Handle tags that should always be empty and do not specify a closing tag
	if tag in self.empty_tags and (self.at_line_start() or self.intail):
	attrs = {key: value if value is not None else key for key, value in attrs}
	if "markdown" in attrs:
	attrs.pop('markdown')
	element = etree.Element(tag, attrs)
	data = etree.tostring(element, encoding='unicode', method='html')
	else:
	data = self.get_starttag_text()
	self.handle_empty_tag(data, True)
	return

	if tag in self.block_level_tags and (self.at_line_start() or self.intail):
	# Valueless attribute (ex: `<tag checked>`) results in `[('checked', None)]`.
	# Convert to `{'checked': 'checked'}`.
	attrs = {key: value if value is not None else key for key, value in attrs}
	state = self.get_state(tag, attrs)
	if self.inraw or (state in [None, 'off'] and not self.mdstack):
	# fall back to default behavior
	attrs.pop('markdown', None)
	super().handle_starttag(tag, attrs)
	else:
	if 'p' in self.mdstack and tag in self.block_level_tags:
	# Close unclosed 'p' tag
	self.handle_endtag('p')
	self.mdstate.append(state)
	self.mdstack.append(tag)
	attrs['markdown'] = state
	self.treebuilder.start(tag, attrs)
	else:
	# Span level tag
	if self.inraw:
	super().handle_starttag(tag, attrs)
	else:
	text = self.get_starttag_text()
	if self.mdstate and self.mdstate[-1] == "off":
	self.handle_data(self.md.htmlStash.store(text))
	else:
	self.handle_data(text)
	if tag in self.CDATA_CONTENT_ELEMENTS:
	# This is presumably a standalone tag in a code span (see #1036).
	self.clear_cdata_mode()

	def handle_endtag(self, tag):
	if tag in self.block_level_tags:
	if self.inraw:
	super().handle_endtag(tag)
	elif tag in self.mdstack:
	# Close element and any unclosed children
	while self.mdstack:
	item = self.mdstack.pop()
	self.mdstate.pop()
	self.treebuilder.end(item)
	if item == tag:
	break
	if not self.mdstack:
	# Last item in stack is closed. Stash it
	element = self.get_element()
	# Get last entry to see if it ends in newlines
	# If it is an element, assume there is no newlines
	item = self.cleandoc[-1] if self.cleandoc else ''
	# If we only have one newline before block element, add another
	if not item.endswith('\n\n') and item.endswith('\n'):
	self.cleandoc.append('\n')
	self.cleandoc.append(self.md.htmlStash.store(element))
	self.cleandoc.append('\n\n')
	self.state = []
	# Check if element has a tail
	if not blank_line_re.match(
	self.rawdata[self.line_offset + self.offset + len(self.get_endtag_text(tag)):]):
	# More content exists after `endtag`.
	self.intail = True
	else:
	# Treat orphan closing tag as a span level tag.
	text = self.get_endtag_text(tag)
	if self.mdstate and self.mdstate[-1] == "off":
	self.handle_data(self.md.htmlStash.store(text))
	else:
	self.handle_data(text)
	else:
	# Span level tag
	if self.inraw:
	super().handle_endtag(tag)
	else:
	text = self.get_endtag_text(tag)
	if self.mdstate and self.mdstate[-1] == "off":
	self.handle_data(self.md.htmlStash.store(text))
	else:
	self.handle_data(text)

	def handle_startendtag(self, tag, attrs):
	if tag in self.empty_tags:
	attrs = {key: value if value is not None else key for key, value in attrs}
	if "markdown" in attrs:
	attrs.pop('markdown')
	element = etree.Element(tag, attrs)
	data = etree.tostring(element, encoding='unicode', method='html')
	else:
	data = self.get_starttag_text()
	else:
	data = self.get_starttag_text()
	self.handle_empty_tag(data, is_block=self.md.is_block_level(tag))

	def handle_data(self, data):
	if self.intail and '\n' in data:
	self.intail = False
	if self.inraw or not self.mdstack:
	super().handle_data(data)
	else:
	self.treebuilder.data(data)

	def handle_empty_tag(self, data, is_block):
	if self.inraw or not self.mdstack:
	super().handle_empty_tag(data, is_block)
	else:
	if self.at_line_start() and is_block:
	self.handle_data('\n' + self.md.htmlStash.store(data) + '\n\n')
	else:
	self.handle_data(self.md.htmlStash.store(data))

	def parse_pi(self, i: int) -> int:
	if self.at_line_start() or self.intail or self.mdstack:
	# The same override exists in `HTMLExtractor` without the check
	# for `mdstack`. Therefore, use parent of `HTMLExtractor` instead.
	return super(HTMLExtractor, self).parse_pi(i)
	# This is not the beginning of a raw block so treat as plain data
	# and avoid consuming any tags which may follow (see #1066).
	self.handle_data('<?')
	return i + 2

	def parse_html_declaration(self, i: int) -> int:
	if self.at_line_start() or self.intail or self.mdstack:
	# The same override exists in `HTMLExtractor` without the check
	# for `mdstack`. Therefore, use parent of `HTMLExtractor` instead.
	return super(HTMLExtractor, self).parse_html_declaration(i)
	# This is not the beginning of a raw block so treat as plain data
	# and avoid consuming any tags which may follow (see #1066).
	self.handle_data('<!')
	return i + 2


	class HtmlBlockPreprocessor(Preprocessor):
	"""Remove html blocks from the text and store them for later retrieval."""

	def run(self, lines: list[str]) -> list[str]:
	source = '\n'.join(lines)
	parser = HTMLExtractorExtra(self.md)
	parser.feed(source)
	parser.close()
	return ''.join(parser.cleandoc).split('\n')


	class MarkdownInHtmlProcessor(BlockProcessor):
	"""Process Markdown Inside HTML Blocks which have been stored in the `HtmlStash`."""

	def test(self, parent: etree.Element, block: str) -> bool:
	# Always return True. `run` will return `False` it not a valid match.
	return True

	def parse_element_content(self, element: etree.Element) -> None:
	"""
	Recursively parse the text content of an `etree` Element as Markdown.

	Any block level elements generated from the Markdown will be inserted as children of the element in place
	of the text content. All `markdown` attributes are removed. For any elements in which Markdown parsing has
	been disabled, the text content of it and its children are wrapped in an `AtomicString`.
	"""

	md_attr = element.attrib.pop('markdown', 'off')

	if md_attr == 'block':
	# Parse content as block level
	# The order in which the different parts are parsed (text, children, tails) is important here as the
	# order of elements needs to be preserved. We can't be inserting items at a later point in the current
	# iteration as we don't want to do raw processing on elements created from parsing Markdown text (for
	# example). Therefore, the order of operations is children, tails, text.

	# Recursively parse existing children from raw HTML
	for child in list(element):
	self.parse_element_content(child)

	# Parse Markdown text in tail of children. Do this separate to avoid raw HTML parsing.
	# Save the position of each item to be inserted later in reverse.
	tails = []
	for pos, child in enumerate(element):
	if child.tail:
	block = child.tail.rstrip('\n')
	child.tail = ''
	# Use a dummy placeholder element.
	dummy = etree.Element('div')
	self.parser.parseBlocks(dummy, block.split('\n\n'))
	children = list(dummy)
	children.reverse()
	tails.append((pos + 1, children))

	# Insert the elements created from the tails in reverse.
	tails.reverse()
	for pos, tail in tails:
	for item in tail:
	element.insert(pos, item)

	# Parse Markdown text content. Do this last to avoid raw HTML parsing.
	if element.text:
	block = element.text.rstrip('\n')
	element.text = ''
	# Use a dummy placeholder element as the content needs to get inserted before existing children.
	dummy = etree.Element('div')
	self.parser.parseBlocks(dummy, block.split('\n\n'))
	children = list(dummy)
	children.reverse()
	for child in children:
	element.insert(0, child)

	elif md_attr == 'span':
	# Span level parsing will be handled by inline processors.
	# Walk children here to remove any `markdown` attributes.
	for child in list(element):
	self.parse_element_content(child)

	else:
	# Disable inline parsing for everything else
	if element.text is None:
	element.text = ''
	element.text = util.AtomicString(element.text)
	for child in list(element):
	self.parse_element_content(child)
	if child.tail:
	child.tail = util.AtomicString(child.tail)

	def run(self, parent: etree.Element, blocks: list[str]) -> bool:
	m = util.HTML_PLACEHOLDER_RE.match(blocks[0])
	if m:
	index = int(m.group(1))
	element = self.parser.md.htmlStash.rawHtmlBlocks[index]
	if isinstance(element, etree.Element):
	# We have a matched element. Process it.
	blocks.pop(0)
	self.parse_element_content(element)
	parent.append(element)
	# Cleanup stash. Replace element with empty string to avoid confusing postprocessor.
	self.parser.md.htmlStash.rawHtmlBlocks.pop(index)
	self.parser.md.htmlStash.rawHtmlBlocks.insert(index, '')
	# Confirm the match to the `blockparser`.
	return True
	# No match found.
	return False


	class MarkdownInHTMLPostprocessor(RawHtmlPostprocessor):
	def stash_to_string(self, text: str \| etree.Element) -> str:
	""" Override default to handle any `etree` elements still in the stash. """
	if isinstance(text, etree.Element):
	return self.md.serializer(text)
	else:
	return str(text)


	class MarkdownInHtmlExtension(Extension):
	"""Add Markdown parsing in HTML to Markdown class."""

	def extendMarkdown(self, md):
	""" Register extension instances. """

	# Replace raw HTML preprocessor
	md.preprocessors.register(HtmlBlockPreprocessor(md), 'html_block', 20)
	# Add `blockprocessor` which handles the placeholders for `etree` elements
	md.parser.blockprocessors.register(
	MarkdownInHtmlProcessor(md.parser), 'markdown_block', 105
	)
	# Replace raw HTML postprocessor
	md.postprocessors.register(MarkdownInHTMLPostprocessor(md), 'raw_html', 30)


	def makeExtension(**kwargs): # pragma: no cover
	return MarkdownInHtmlExtension(**kwargs)