Spaces:
Sleeping
Sleeping
;(function (sax) { // wrapper for non-node envs | |
sax.parser = function (strict, opt) { return new SAXParser(strict, opt) } | |
sax.SAXParser = SAXParser | |
// When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns. | |
// When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)), | |
// since that's the earliest that a buffer overrun could occur. This way, checks are | |
// as rare as required, but as often as necessary to ensure never crossing this bound. | |
// Furthermore, buffers are only tested at most once per write(), so passing a very | |
// large string into write() might have undesirable effects, but this is manageable by | |
// the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme | |
// edge case, result in creating at most one complete copy of the string passed in. | |
// Set to Infinity to have unlimited buffers. | |
sax.MAX_BUFFER_LENGTH = 64 * 1024 | |
var buffers = [ | |
'comment', 'sgmlDecl', 'textNode', 'tagName', 'doctype', | |
'procInstName', 'procInstBody', 'entity', 'attribName', | |
'attribValue', 'cdata', 'script' | |
] | |
sax.EVENTS = [ | |
'text', | |
'processinginstruction', | |
'sgmldeclaration', | |
'doctype', | |
'comment', | |
'opentagstart', | |
'attribute', | |
'opentag', | |
'closetag', | |
'opencdata', | |
'cdata', | |
'closecdata', | |
'error', | |
'end', | |
'ready', | |
'script', | |
'opennamespace', | |
'closenamespace' | |
] | |
function SAXParser (strict, opt) { | |
if (!(this instanceof SAXParser)) { | |
return new SAXParser(strict, opt) | |
} | |
var parser = this | |
clearBuffers(parser) | |
parser.q = parser.c = '' | |
parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH | |
parser.opt = opt || {} | |
parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags | |
parser.looseCase = parser.opt.lowercase ? 'toLowerCase' : 'toUpperCase' | |
parser.tags = [] | |
parser.closed = parser.closedRoot = parser.sawRoot = false | |
parser.tag = parser.error = null | |
parser.strict = !!strict | |
parser.noscript = !!(strict || parser.opt.noscript) | |
parser.state = S.BEGIN | |
parser.strictEntities = parser.opt.strictEntities | |
parser.ENTITIES = parser.strictEntities ? Object.create(sax.XML_ENTITIES) : Object.create(sax.ENTITIES) | |
parser.attribList = [] | |
// namespaces form a prototype chain. | |
// it always points at the current tag, | |
// which protos to its parent tag. | |
if (parser.opt.xmlns) { | |
parser.ns = Object.create(rootNS) | |
} | |
// mostly just for error reporting | |
parser.trackPosition = parser.opt.position !== false | |
if (parser.trackPosition) { | |
parser.position = parser.line = parser.column = 0 | |
} | |
emit(parser, 'onready') | |
} | |
if (!Object.create) { | |
Object.create = function (o) { | |
function F () {} | |
F.prototype = o | |
var newf = new F() | |
return newf | |
} | |
} | |
if (!Object.keys) { | |
Object.keys = function (o) { | |
var a = [] | |
for (var i in o) if (o.hasOwnProperty(i)) a.push(i) | |
return a | |
} | |
} | |
function checkBufferLength (parser) { | |
var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10) | |
var maxActual = 0 | |
for (var i = 0, l = buffers.length; i < l; i++) { | |
var len = parser[buffers[i]].length | |
if (len > maxAllowed) { | |
// Text/cdata nodes can get big, and since they're buffered, | |
// we can get here under normal conditions. | |
// Avoid issues by emitting the text node now, | |
// so at least it won't get any bigger. | |
switch (buffers[i]) { | |
case 'textNode': | |
closeText(parser) | |
break | |
case 'cdata': | |
emitNode(parser, 'oncdata', parser.cdata) | |
parser.cdata = '' | |
break | |
case 'script': | |
emitNode(parser, 'onscript', parser.script) | |
parser.script = '' | |
break | |
default: | |
error(parser, 'Max buffer length exceeded: ' + buffers[i]) | |
} | |
} | |
maxActual = Math.max(maxActual, len) | |
} | |
// schedule the next check for the earliest possible buffer overrun. | |
var m = sax.MAX_BUFFER_LENGTH - maxActual | |
parser.bufferCheckPosition = m + parser.position | |
} | |
function clearBuffers (parser) { | |
for (var i = 0, l = buffers.length; i < l; i++) { | |
parser[buffers[i]] = '' | |
} | |
} | |
function flushBuffers (parser) { | |
closeText(parser) | |
if (parser.cdata !== '') { | |
emitNode(parser, 'oncdata', parser.cdata) | |
parser.cdata = '' | |
} | |
if (parser.script !== '') { | |
emitNode(parser, 'onscript', parser.script) | |
parser.script = '' | |
} | |
} | |
SAXParser.prototype = { | |
end: function () { end(this) }, | |
write: write, | |
resume: function () { this.error = null; return this }, | |
close: function () { return this.write(null) }, | |
flush: function () { flushBuffers(this) } | |
} | |
// this really needs to be replaced with character classes. | |
// XML allows all manner of ridiculous numbers and digits. | |
var CDATA = '[CDATA[' | |
var DOCTYPE = 'DOCTYPE' | |
var XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace' | |
var XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/' | |
var rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE } | |
// http://www.w3.org/TR/REC-xml/#NT-NameStartChar | |
// This implementation works on strings, a single character at a time | |
// as such, it cannot ever support astral-plane characters (10000-EFFFF) | |
// without a significant breaking change to either this parser, or the | |
// JavaScript language. Implementation of an emoji-capable xml parser | |
// is left as an exercise for the reader. | |
var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/ | |
var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/ | |
var entityStart = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/ | |
var entityBody = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/ | |
function isWhitespace (c) { | |
return c === ' ' || c === '\n' || c === '\r' || c === '\t' | |
} | |
function isQuote (c) { | |
return c === '"' || c === '\'' | |
} | |
function isAttribEnd (c) { | |
return c === '>' || isWhitespace(c) | |
} | |
function isMatch (regex, c) { | |
return regex.test(c) | |
} | |
function notMatch (regex, c) { | |
return !isMatch(regex, c) | |
} | |
var S = 0 | |
sax.STATE = { | |
BEGIN: S++, // leading byte order mark or whitespace | |
BEGIN_WHITESPACE: S++, // leading whitespace | |
TEXT: S++, // general stuff | |
TEXT_ENTITY: S++, // & and such. | |
OPEN_WAKA: S++, // < | |
SGML_DECL: S++, // <!BLARG | |
SGML_DECL_QUOTED: S++, // <!BLARG foo "bar | |
DOCTYPE: S++, // <!DOCTYPE | |
DOCTYPE_QUOTED: S++, // <!DOCTYPE "//blah | |
DOCTYPE_DTD: S++, // <!DOCTYPE "//blah" [ ... | |
DOCTYPE_DTD_QUOTED: S++, // <!DOCTYPE "//blah" [ "foo | |
COMMENT_STARTING: S++, // <!- | |
COMMENT: S++, // <!-- | |
COMMENT_ENDING: S++, // <!-- blah - | |
COMMENT_ENDED: S++, // <!-- blah -- | |
CDATA: S++, // <![CDATA[ something | |
CDATA_ENDING: S++, // ] | |
CDATA_ENDING_2: S++, // ]] | |
PROC_INST: S++, // <?hi | |
PROC_INST_BODY: S++, // <?hi there | |
PROC_INST_ENDING: S++, // <?hi "there" ? | |
OPEN_TAG: S++, // <strong | |
OPEN_TAG_SLASH: S++, // <strong / | |
ATTRIB: S++, // <a | |
ATTRIB_NAME: S++, // <a foo | |
ATTRIB_NAME_SAW_WHITE: S++, // <a foo _ | |
ATTRIB_VALUE: S++, // <a foo= | |
ATTRIB_VALUE_QUOTED: S++, // <a foo="bar | |
ATTRIB_VALUE_CLOSED: S++, // <a foo="bar" | |
ATTRIB_VALUE_UNQUOTED: S++, // <a foo=bar | |
ATTRIB_VALUE_ENTITY_Q: S++, // <foo bar=""" | |
ATTRIB_VALUE_ENTITY_U: S++, // <foo bar=" | |
CLOSE_TAG: S++, // </a | |
CLOSE_TAG_SAW_WHITE: S++, // </a > | |
SCRIPT: S++, // <script> ... | |
SCRIPT_ENDING: S++ // <script> ... < | |
} | |
sax.XML_ENTITIES = { | |
'amp': '&', | |
'gt': '>', | |
'lt': '<', | |
'quot': '"', | |
'apos': "'" | |
} | |
sax.ENTITIES = { | |
'amp': '&', | |
'gt': '>', | |
'lt': '<', | |
'quot': '"', | |
'apos': "'", | |
'AElig': 198, | |
'Aacute': 193, | |
'Acirc': 194, | |
'Agrave': 192, | |
'Aring': 197, | |
'Atilde': 195, | |
'Auml': 196, | |
'Ccedil': 199, | |
'ETH': 208, | |
'Eacute': 201, | |
'Ecirc': 202, | |
'Egrave': 200, | |
'Euml': 203, | |
'Iacute': 205, | |
'Icirc': 206, | |
'Igrave': 204, | |
'Iuml': 207, | |
'Ntilde': 209, | |
'Oacute': 211, | |
'Ocirc': 212, | |
'Ograve': 210, | |
'Oslash': 216, | |
'Otilde': 213, | |
'Ouml': 214, | |
'THORN': 222, | |
'Uacute': 218, | |
'Ucirc': 219, | |
'Ugrave': 217, | |
'Uuml': 220, | |
'Yacute': 221, | |
'aacute': 225, | |
'acirc': 226, | |
'aelig': 230, | |
'agrave': 224, | |
'aring': 229, | |
'atilde': 227, | |
'auml': 228, | |
'ccedil': 231, | |
'eacute': 233, | |
'ecirc': 234, | |
'egrave': 232, | |
'eth': 240, | |
'euml': 235, | |
'iacute': 237, | |
'icirc': 238, | |
'igrave': 236, | |
'iuml': 239, | |
'ntilde': 241, | |
'oacute': 243, | |
'ocirc': 244, | |
'ograve': 242, | |
'oslash': 248, | |
'otilde': 245, | |
'ouml': 246, | |
'szlig': 223, | |
'thorn': 254, | |
'uacute': 250, | |
'ucirc': 251, | |
'ugrave': 249, | |
'uuml': 252, | |
'yacute': 253, | |
'yuml': 255, | |
'copy': 169, | |
'reg': 174, | |
'nbsp': 160, | |
'iexcl': 161, | |
'cent': 162, | |
'pound': 163, | |
'curren': 164, | |
'yen': 165, | |
'brvbar': 166, | |
'sect': 167, | |
'uml': 168, | |
'ordf': 170, | |
'laquo': 171, | |
'not': 172, | |
'shy': 173, | |
'macr': 175, | |
'deg': 176, | |
'plusmn': 177, | |
'sup1': 185, | |
'sup2': 178, | |
'sup3': 179, | |
'acute': 180, | |
'micro': 181, | |
'para': 182, | |
'middot': 183, | |
'cedil': 184, | |
'ordm': 186, | |
'raquo': 187, | |
'frac14': 188, | |
'frac12': 189, | |
'frac34': 190, | |
'iquest': 191, | |
'times': 215, | |
'divide': 247, | |
'OElig': 338, | |
'oelig': 339, | |
'Scaron': 352, | |
'scaron': 353, | |
'Yuml': 376, | |
'fnof': 402, | |
'circ': 710, | |
'tilde': 732, | |
'Alpha': 913, | |
'Beta': 914, | |
'Gamma': 915, | |
'Delta': 916, | |
'Epsilon': 917, | |
'Zeta': 918, | |
'Eta': 919, | |
'Theta': 920, | |
'Iota': 921, | |
'Kappa': 922, | |
'Lambda': 923, | |
'Mu': 924, | |
'Nu': 925, | |
'Xi': 926, | |
'Omicron': 927, | |
'Pi': 928, | |
'Rho': 929, | |
'Sigma': 931, | |
'Tau': 932, | |
'Upsilon': 933, | |
'Phi': 934, | |
'Chi': 935, | |
'Psi': 936, | |
'Omega': 937, | |
'alpha': 945, | |
'beta': 946, | |
'gamma': 947, | |
'delta': 948, | |
'epsilon': 949, | |
'zeta': 950, | |
'eta': 951, | |
'theta': 952, | |
'iota': 953, | |
'kappa': 954, | |
'lambda': 955, | |
'mu': 956, | |
'nu': 957, | |
'xi': 958, | |
'omicron': 959, | |
'pi': 960, | |
'rho': 961, | |
'sigmaf': 962, | |
'sigma': 963, | |
'tau': 964, | |
'upsilon': 965, | |
'phi': 966, | |
'chi': 967, | |
'psi': 968, | |
'omega': 969, | |
'thetasym': 977, | |
'upsih': 978, | |
'piv': 982, | |
'ensp': 8194, | |
'emsp': 8195, | |
'thinsp': 8201, | |
'zwnj': 8204, | |
'zwj': 8205, | |
'lrm': 8206, | |
'rlm': 8207, | |
'ndash': 8211, | |
'mdash': 8212, | |
'lsquo': 8216, | |
'rsquo': 8217, | |
'sbquo': 8218, | |
'ldquo': 8220, | |
'rdquo': 8221, | |
'bdquo': 8222, | |
'dagger': 8224, | |
'Dagger': 8225, | |
'bull': 8226, | |
'hellip': 8230, | |
'permil': 8240, | |
'prime': 8242, | |
'Prime': 8243, | |
'lsaquo': 8249, | |
'rsaquo': 8250, | |
'oline': 8254, | |
'frasl': 8260, | |
'euro': 8364, | |
'image': 8465, | |
'weierp': 8472, | |
'real': 8476, | |
'trade': 8482, | |
'alefsym': 8501, | |
'larr': 8592, | |
'uarr': 8593, | |
'rarr': 8594, | |
'darr': 8595, | |
'harr': 8596, | |
'crarr': 8629, | |
'lArr': 8656, | |
'uArr': 8657, | |
'rArr': 8658, | |
'dArr': 8659, | |
'hArr': 8660, | |
'forall': 8704, | |
'part': 8706, | |
'exist': 8707, | |
'empty': 8709, | |
'nabla': 8711, | |
'isin': 8712, | |
'notin': 8713, | |
'ni': 8715, | |
'prod': 8719, | |
'sum': 8721, | |
'minus': 8722, | |
'lowast': 8727, | |
'radic': 8730, | |
'prop': 8733, | |
'infin': 8734, | |
'ang': 8736, | |
'and': 8743, | |
'or': 8744, | |
'cap': 8745, | |
'cup': 8746, | |
'int': 8747, | |
'there4': 8756, | |
'sim': 8764, | |
'cong': 8773, | |
'asymp': 8776, | |
'ne': 8800, | |
'equiv': 8801, | |
'le': 8804, | |
'ge': 8805, | |
'sub': 8834, | |
'sup': 8835, | |
'nsub': 8836, | |
'sube': 8838, | |
'supe': 8839, | |
'oplus': 8853, | |
'otimes': 8855, | |
'perp': 8869, | |
'sdot': 8901, | |
'lceil': 8968, | |
'rceil': 8969, | |
'lfloor': 8970, | |
'rfloor': 8971, | |
'lang': 9001, | |
'rang': 9002, | |
'loz': 9674, | |
'spades': 9824, | |
'clubs': 9827, | |
'hearts': 9829, | |
'diams': 9830 | |
} | |
Object.keys(sax.ENTITIES).forEach(function (key) { | |
var e = sax.ENTITIES[key] | |
var s = typeof e === 'number' ? String.fromCharCode(e) : e | |
sax.ENTITIES[key] = s | |
}) | |
for (var s in sax.STATE) { | |
sax.STATE[sax.STATE[s]] = s | |
} | |
// shorthand | |
S = sax.STATE | |
function emit (parser, event, data) { | |
parser[event] && parser[event](data) | |
} | |
function emitNode (parser, nodeType, data) { | |
if (parser.textNode) closeText(parser) | |
emit(parser, nodeType, data) | |
} | |
function closeText (parser) { | |
parser.textNode = textopts(parser.opt, parser.textNode) | |
if (parser.textNode) emit(parser, 'ontext', parser.textNode) | |
parser.textNode = '' | |
} | |
function textopts (opt, text) { | |
if (opt.trim) text = text.trim() | |
if (opt.normalize) text = text.replace(/\s+/g, ' ') | |
return text | |
} | |
function error (parser, reason) { | |
closeText(parser) | |
const message = reason + | |
'\nLine: ' + parser.line + | |
'\nColumn: ' + parser.column + | |
'\nChar: ' + parser.c | |
const error = new Error(message) | |
error.reason = reason | |
error.line = parser.line | |
error.column = parser.column | |
parser.error = error | |
emit(parser, 'onerror', error) | |
return parser | |
} | |
function end (parser) { | |
if (parser.sawRoot && !parser.closedRoot) strictFail(parser, 'Unclosed root tag') | |
if ((parser.state !== S.BEGIN) && | |
(parser.state !== S.BEGIN_WHITESPACE) && | |
(parser.state !== S.TEXT)) { | |
error(parser, 'Unexpected end') | |
} | |
closeText(parser) | |
parser.c = '' | |
parser.closed = true | |
emit(parser, 'onend') | |
SAXParser.call(parser, parser.strict, parser.opt) | |
return parser | |
} | |
function strictFail (parser, message) { | |
if (typeof parser !== 'object' || !(parser instanceof SAXParser)) { | |
throw new Error('bad call to strictFail') | |
} | |
if (parser.strict) { | |
error(parser, message) | |
} | |
} | |
function newTag (parser) { | |
if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]() | |
var parent = parser.tags[parser.tags.length - 1] || parser | |
var tag = parser.tag = { name: parser.tagName, attributes: {} } | |
// will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar" | |
if (parser.opt.xmlns) { | |
tag.ns = parent.ns | |
} | |
parser.attribList.length = 0 | |
emitNode(parser, 'onopentagstart', tag) | |
} | |
function qname (name, attribute) { | |
var i = name.indexOf(':') | |
var qualName = i < 0 ? [ '', name ] : name.split(':') | |
var prefix = qualName[0] | |
var local = qualName[1] | |
// <x "xmlns"="http://foo"> | |
if (attribute && name === 'xmlns') { | |
prefix = 'xmlns' | |
local = '' | |
} | |
return { prefix: prefix, local: local } | |
} | |
function attrib (parser) { | |
if (!parser.strict) { | |
parser.attribName = parser.attribName[parser.looseCase]() | |
} | |
if (parser.attribList.indexOf(parser.attribName) !== -1 || | |
parser.tag.attributes.hasOwnProperty(parser.attribName)) { | |
parser.attribName = parser.attribValue = '' | |
return | |
} | |
if (parser.opt.xmlns) { | |
var qn = qname(parser.attribName, true) | |
var prefix = qn.prefix | |
var local = qn.local | |
if (prefix === 'xmlns') { | |
// namespace binding attribute. push the binding into scope | |
if (local === 'xml' && parser.attribValue !== XML_NAMESPACE) { | |
strictFail(parser, | |
'xml: prefix must be bound to ' + XML_NAMESPACE + '\n' + | |
'Actual: ' + parser.attribValue) | |
} else if (local === 'xmlns' && parser.attribValue !== XMLNS_NAMESPACE) { | |
strictFail(parser, | |
'xmlns: prefix must be bound to ' + XMLNS_NAMESPACE + '\n' + | |
'Actual: ' + parser.attribValue) | |
} else { | |
var tag = parser.tag | |
var parent = parser.tags[parser.tags.length - 1] || parser | |
if (tag.ns === parent.ns) { | |
tag.ns = Object.create(parent.ns) | |
} | |
tag.ns[local] = parser.attribValue | |
} | |
} | |
// defer onattribute events until all attributes have been seen | |
// so any new bindings can take effect. preserve attribute order | |
// so deferred events can be emitted in document order | |
parser.attribList.push([parser.attribName, parser.attribValue]) | |
} else { | |
// in non-xmlns mode, we can emit the event right away | |
parser.tag.attributes[parser.attribName] = parser.attribValue | |
emitNode(parser, 'onattribute', { | |
name: parser.attribName, | |
value: parser.attribValue | |
}) | |
} | |
parser.attribName = parser.attribValue = '' | |
} | |
function openTag (parser, selfClosing) { | |
if (parser.opt.xmlns) { | |
// emit namespace binding events | |
var tag = parser.tag | |
// add namespace info to tag | |
var qn = qname(parser.tagName) | |
tag.prefix = qn.prefix | |
tag.local = qn.local | |
tag.uri = tag.ns[qn.prefix] || '' | |
if (tag.prefix && !tag.uri) { | |
strictFail(parser, 'Unbound namespace prefix: ' + | |
JSON.stringify(parser.tagName)) | |
tag.uri = qn.prefix | |
} | |
var parent = parser.tags[parser.tags.length - 1] || parser | |
if (tag.ns && parent.ns !== tag.ns) { | |
Object.keys(tag.ns).forEach(function (p) { | |
emitNode(parser, 'onopennamespace', { | |
prefix: p, | |
uri: tag.ns[p] | |
}) | |
}) | |
} | |
// handle deferred onattribute events | |
// Note: do not apply default ns to attributes: | |
// http://www.w3.org/TR/REC-xml-names/#defaulting | |
for (var i = 0, l = parser.attribList.length; i < l; i++) { | |
var nv = parser.attribList[i] | |
var name = nv[0] | |
var value = nv[1] | |
var qualName = qname(name, true) | |
var prefix = qualName.prefix | |
var local = qualName.local | |
var uri = prefix === '' ? '' : (tag.ns[prefix] || '') | |
var a = { | |
name: name, | |
value: value, | |
prefix: prefix, | |
local: local, | |
uri: uri | |
} | |
// if there's any attributes with an undefined namespace, | |
// then fail on them now. | |
if (prefix && prefix !== 'xmlns' && !uri) { | |
strictFail(parser, 'Unbound namespace prefix: ' + | |
JSON.stringify(prefix)) | |
a.uri = prefix | |
} | |
parser.tag.attributes[name] = a | |
emitNode(parser, 'onattribute', a) | |
} | |
parser.attribList.length = 0 | |
} | |
parser.tag.isSelfClosing = !!selfClosing | |
// process the tag | |
parser.sawRoot = true | |
parser.tags.push(parser.tag) | |
emitNode(parser, 'onopentag', parser.tag) | |
if (!selfClosing) { | |
// special case for <script> in non-strict mode. | |
if (!parser.noscript && parser.tagName.toLowerCase() === 'script') { | |
parser.state = S.SCRIPT | |
} else { | |
parser.state = S.TEXT | |
} | |
parser.tag = null | |
parser.tagName = '' | |
} | |
parser.attribName = parser.attribValue = '' | |
parser.attribList.length = 0 | |
} | |
function closeTag (parser) { | |
if (!parser.tagName) { | |
strictFail(parser, 'Weird empty close tag.') | |
parser.textNode += '</>' | |
parser.state = S.TEXT | |
return | |
} | |
if (parser.script) { | |
if (parser.tagName !== 'script') { | |
parser.script += '</' + parser.tagName + '>' | |
parser.tagName = '' | |
parser.state = S.SCRIPT | |
return | |
} | |
emitNode(parser, 'onscript', parser.script) | |
parser.script = '' | |
} | |
// first make sure that the closing tag actually exists. | |
// <a><b></c></b></a> will close everything, otherwise. | |
var t = parser.tags.length | |
var tagName = parser.tagName | |
if (!parser.strict) { | |
tagName = tagName[parser.looseCase]() | |
} | |
var closeTo = tagName | |
while (t--) { | |
var close = parser.tags[t] | |
if (close.name !== closeTo) { | |
// fail the first time in strict mode | |
strictFail(parser, 'Unexpected close tag') | |
} else { | |
break | |
} | |
} | |
// didn't find it. we already failed for strict, so just abort. | |
if (t < 0) { | |
strictFail(parser, 'Unmatched closing tag: ' + parser.tagName) | |
parser.textNode += '</' + parser.tagName + '>' | |
parser.state = S.TEXT | |
return | |
} | |
parser.tagName = tagName | |
var s = parser.tags.length | |
while (s-- > t) { | |
var tag = parser.tag = parser.tags.pop() | |
parser.tagName = parser.tag.name | |
emitNode(parser, 'onclosetag', parser.tagName) | |
var x = {} | |
for (var i in tag.ns) { | |
x[i] = tag.ns[i] | |
} | |
var parent = parser.tags[parser.tags.length - 1] || parser | |
if (parser.opt.xmlns && tag.ns !== parent.ns) { | |
// remove namespace bindings introduced by tag | |
Object.keys(tag.ns).forEach(function (p) { | |
var n = tag.ns[p] | |
emitNode(parser, 'onclosenamespace', { prefix: p, uri: n }) | |
}) | |
} | |
} | |
if (t === 0) parser.closedRoot = true | |
parser.tagName = parser.attribValue = parser.attribName = '' | |
parser.attribList.length = 0 | |
parser.state = S.TEXT | |
} | |
function parseEntity (parser) { | |
var entity = parser.entity | |
var entityLC = entity.toLowerCase() | |
var num | |
var numStr = '' | |
if (parser.ENTITIES[entity]) { | |
return parser.ENTITIES[entity] | |
} | |
if (parser.ENTITIES[entityLC]) { | |
return parser.ENTITIES[entityLC] | |
} | |
entity = entityLC | |
if (entity.charAt(0) === '#') { | |
if (entity.charAt(1) === 'x') { | |
entity = entity.slice(2) | |
num = parseInt(entity, 16) | |
numStr = num.toString(16) | |
} else { | |
entity = entity.slice(1) | |
num = parseInt(entity, 10) | |
numStr = num.toString(10) | |
} | |
} | |
entity = entity.replace(/^0+/, '') | |
if (isNaN(num) || numStr.toLowerCase() !== entity) { | |
strictFail(parser, 'Invalid character entity') | |
return '&' + parser.entity + ';' | |
} | |
return String.fromCodePoint(num) | |
} | |
function beginWhiteSpace (parser, c) { | |
if (c === '<') { | |
parser.state = S.OPEN_WAKA | |
parser.startTagPosition = parser.position | |
} else if (!isWhitespace(c)) { | |
// have to process this as a text node. | |
// weird, but happens. | |
strictFail(parser, 'Non-whitespace before first tag.') | |
parser.textNode = c | |
parser.state = S.TEXT | |
} | |
} | |
function charAt (chunk, i) { | |
var result = '' | |
if (i < chunk.length) { | |
result = chunk.charAt(i) | |
} | |
return result | |
} | |
function write (chunk) { | |
var parser = this | |
if (this.error) { | |
throw this.error | |
} | |
if (parser.closed) { | |
return error(parser, | |
'Cannot write after close. Assign an onready handler.') | |
} | |
if (chunk === null) { | |
return end(parser) | |
} | |
if (typeof chunk === 'object') { | |
chunk = chunk.toString() | |
} | |
var i = 0 | |
var c = '' | |
while (true) { | |
c = charAt(chunk, i++) | |
parser.c = c | |
if (!c) { | |
break | |
} | |
if (parser.trackPosition) { | |
parser.position++ | |
if (c === '\n') { | |
parser.line++ | |
parser.column = 0 | |
} else { | |
parser.column++ | |
} | |
} | |
switch (parser.state) { | |
case S.BEGIN: | |
parser.state = S.BEGIN_WHITESPACE | |
if (c === '\uFEFF') { | |
continue | |
} | |
beginWhiteSpace(parser, c) | |
continue | |
case S.BEGIN_WHITESPACE: | |
beginWhiteSpace(parser, c) | |
continue | |
case S.TEXT: | |
if (parser.sawRoot && !parser.closedRoot) { | |
var starti = i - 1 | |
while (c && c !== '<' && c !== '&') { | |
c = charAt(chunk, i++) | |
if (c && parser.trackPosition) { | |
parser.position++ | |
if (c === '\n') { | |
parser.line++ | |
parser.column = 0 | |
} else { | |
parser.column++ | |
} | |
} | |
} | |
parser.textNode += chunk.substring(starti, i - 1) | |
} | |
if (c === '<' && !(parser.sawRoot && parser.closedRoot && !parser.strict)) { | |
parser.state = S.OPEN_WAKA | |
parser.startTagPosition = parser.position | |
} else { | |
if (!isWhitespace(c) && (!parser.sawRoot || parser.closedRoot)) { | |
strictFail(parser, 'Text data outside of root node.') | |
} | |
if (c === '&') { | |
parser.state = S.TEXT_ENTITY | |
} else { | |
parser.textNode += c | |
} | |
} | |
continue | |
case S.SCRIPT: | |
// only non-strict | |
if (c === '<') { | |
parser.state = S.SCRIPT_ENDING | |
} else { | |
parser.script += c | |
} | |
continue | |
case S.SCRIPT_ENDING: | |
if (c === '/') { | |
parser.state = S.CLOSE_TAG | |
} else { | |
parser.script += '<' + c | |
parser.state = S.SCRIPT | |
} | |
continue | |
case S.OPEN_WAKA: | |
// either a /, ?, !, or text is coming next. | |
if (c === '!') { | |
parser.state = S.SGML_DECL | |
parser.sgmlDecl = '' | |
} else if (isWhitespace(c)) { | |
// wait for it... | |
} else if (isMatch(nameStart, c)) { | |
parser.state = S.OPEN_TAG | |
parser.tagName = c | |
} else if (c === '/') { | |
parser.state = S.CLOSE_TAG | |
parser.tagName = '' | |
} else if (c === '?') { | |
parser.state = S.PROC_INST | |
parser.procInstName = parser.procInstBody = '' | |
} else { | |
strictFail(parser, 'Unencoded <') | |
// if there was some whitespace, then add that in. | |
if (parser.startTagPosition + 1 < parser.position) { | |
var pad = parser.position - parser.startTagPosition | |
c = new Array(pad).join(' ') + c | |
} | |
parser.textNode += '<' + c | |
parser.state = S.TEXT | |
} | |
continue | |
case S.SGML_DECL: | |
if ((parser.sgmlDecl + c).toUpperCase() === CDATA) { | |
emitNode(parser, 'onopencdata') | |
parser.state = S.CDATA | |
parser.sgmlDecl = '' | |
parser.cdata = '' | |
} else if (parser.sgmlDecl + c === '--') { | |
parser.state = S.COMMENT | |
parser.comment = '' | |
parser.sgmlDecl = '' | |
} else if ((parser.sgmlDecl + c).toUpperCase() === DOCTYPE) { | |
parser.state = S.DOCTYPE | |
if (parser.doctype || parser.sawRoot) { | |
strictFail(parser, | |
'Inappropriately located doctype declaration') | |
} | |
parser.doctype = '' | |
parser.sgmlDecl = '' | |
} else if (c === '>') { | |
emitNode(parser, 'onsgmldeclaration', parser.sgmlDecl) | |
parser.sgmlDecl = '' | |
parser.state = S.TEXT | |
} else if (isQuote(c)) { | |
parser.state = S.SGML_DECL_QUOTED | |
parser.sgmlDecl += c | |
} else { | |
parser.sgmlDecl += c | |
} | |
continue | |
case S.SGML_DECL_QUOTED: | |
if (c === parser.q) { | |
parser.state = S.SGML_DECL | |
parser.q = '' | |
} | |
parser.sgmlDecl += c | |
continue | |
case S.DOCTYPE: | |
if (c === '>') { | |
parser.state = S.TEXT | |
emitNode(parser, 'ondoctype', parser.doctype) | |
parser.doctype = true // just remember that we saw it. | |
} else { | |
parser.doctype += c | |
if (c === '[') { | |
parser.state = S.DOCTYPE_DTD | |
} else if (isQuote(c)) { | |
parser.state = S.DOCTYPE_QUOTED | |
parser.q = c | |
} | |
} | |
continue | |
case S.DOCTYPE_QUOTED: | |
parser.doctype += c | |
if (c === parser.q) { | |
parser.q = '' | |
parser.state = S.DOCTYPE | |
} | |
continue | |
case S.DOCTYPE_DTD: | |
parser.doctype += c | |
if (c === ']') { | |
parser.state = S.DOCTYPE | |
} else if (isQuote(c)) { | |
parser.state = S.DOCTYPE_DTD_QUOTED | |
parser.q = c | |
} | |
continue | |
case S.DOCTYPE_DTD_QUOTED: | |
parser.doctype += c | |
if (c === parser.q) { | |
parser.state = S.DOCTYPE_DTD | |
parser.q = '' | |
} | |
continue | |
case S.COMMENT: | |
if (c === '-') { | |
parser.state = S.COMMENT_ENDING | |
} else { | |
parser.comment += c | |
} | |
continue | |
case S.COMMENT_ENDING: | |
if (c === '-') { | |
parser.state = S.COMMENT_ENDED | |
parser.comment = textopts(parser.opt, parser.comment) | |
if (parser.comment) { | |
emitNode(parser, 'oncomment', parser.comment) | |
} | |
parser.comment = '' | |
} else { | |
parser.comment += '-' + c | |
parser.state = S.COMMENT | |
} | |
continue | |
case S.COMMENT_ENDED: | |
if (c !== '>') { | |
strictFail(parser, 'Malformed comment') | |
// allow <!-- blah -- bloo --> in non-strict mode, | |
// which is a comment of " blah -- bloo " | |
parser.comment += '--' + c | |
parser.state = S.COMMENT | |
} else { | |
parser.state = S.TEXT | |
} | |
continue | |
case S.CDATA: | |
if (c === ']') { | |
parser.state = S.CDATA_ENDING | |
} else { | |
parser.cdata += c | |
} | |
continue | |
case S.CDATA_ENDING: | |
if (c === ']') { | |
parser.state = S.CDATA_ENDING_2 | |
} else { | |
parser.cdata += ']' + c | |
parser.state = S.CDATA | |
} | |
continue | |
case S.CDATA_ENDING_2: | |
if (c === '>') { | |
if (parser.cdata) { | |
emitNode(parser, 'oncdata', parser.cdata) | |
} | |
emitNode(parser, 'onclosecdata') | |
parser.cdata = '' | |
parser.state = S.TEXT | |
} else if (c === ']') { | |
parser.cdata += ']' | |
} else { | |
parser.cdata += ']]' + c | |
parser.state = S.CDATA | |
} | |
continue | |
case S.PROC_INST: | |
if (c === '?') { | |
parser.state = S.PROC_INST_ENDING | |
} else if (isWhitespace(c)) { | |
parser.state = S.PROC_INST_BODY | |
} else { | |
parser.procInstName += c | |
} | |
continue | |
case S.PROC_INST_BODY: | |
if (!parser.procInstBody && isWhitespace(c)) { | |
continue | |
} else if (c === '?') { | |
parser.state = S.PROC_INST_ENDING | |
} else { | |
parser.procInstBody += c | |
} | |
continue | |
case S.PROC_INST_ENDING: | |
if (c === '>') { | |
emitNode(parser, 'onprocessinginstruction', { | |
name: parser.procInstName, | |
body: parser.procInstBody | |
}) | |
parser.procInstName = parser.procInstBody = '' | |
parser.state = S.TEXT | |
} else { | |
parser.procInstBody += '?' + c | |
parser.state = S.PROC_INST_BODY | |
} | |
continue | |
case S.OPEN_TAG: | |
if (isMatch(nameBody, c)) { | |
parser.tagName += c | |
} else { | |
newTag(parser) | |
if (c === '>') { | |
openTag(parser) | |
} else if (c === '/') { | |
parser.state = S.OPEN_TAG_SLASH | |
} else { | |
if (!isWhitespace(c)) { | |
strictFail(parser, 'Invalid character in tag name') | |
} | |
parser.state = S.ATTRIB | |
} | |
} | |
continue | |
case S.OPEN_TAG_SLASH: | |
if (c === '>') { | |
openTag(parser, true) | |
closeTag(parser) | |
} else { | |
strictFail(parser, 'Forward-slash in opening tag not followed by >') | |
parser.state = S.ATTRIB | |
} | |
continue | |
case S.ATTRIB: | |
// haven't read the attribute name yet. | |
if (isWhitespace(c)) { | |
continue | |
} else if (c === '>') { | |
openTag(parser) | |
} else if (c === '/') { | |
parser.state = S.OPEN_TAG_SLASH | |
} else if (isMatch(nameStart, c)) { | |
parser.attribName = c | |
parser.attribValue = '' | |
parser.state = S.ATTRIB_NAME | |
} else { | |
strictFail(parser, 'Invalid attribute name') | |
} | |
continue | |
case S.ATTRIB_NAME: | |
if (c === '=') { | |
parser.state = S.ATTRIB_VALUE | |
} else if (c === '>') { | |
strictFail(parser, 'Attribute without value') | |
parser.attribValue = parser.attribName | |
attrib(parser) | |
openTag(parser) | |
} else if (isWhitespace(c)) { | |
parser.state = S.ATTRIB_NAME_SAW_WHITE | |
} else if (isMatch(nameBody, c)) { | |
parser.attribName += c | |
} else { | |
strictFail(parser, 'Invalid attribute name') | |
} | |
continue | |
case S.ATTRIB_NAME_SAW_WHITE: | |
if (c === '=') { | |
parser.state = S.ATTRIB_VALUE | |
} else if (isWhitespace(c)) { | |
continue | |
} else { | |
strictFail(parser, 'Attribute without value') | |
parser.tag.attributes[parser.attribName] = '' | |
parser.attribValue = '' | |
emitNode(parser, 'onattribute', { | |
name: parser.attribName, | |
value: '' | |
}) | |
parser.attribName = '' | |
if (c === '>') { | |
openTag(parser) | |
} else if (isMatch(nameStart, c)) { | |
parser.attribName = c | |
parser.state = S.ATTRIB_NAME | |
} else { | |
strictFail(parser, 'Invalid attribute name') | |
parser.state = S.ATTRIB | |
} | |
} | |
continue | |
case S.ATTRIB_VALUE: | |
if (isWhitespace(c)) { | |
continue | |
} else if (isQuote(c)) { | |
parser.q = c | |
parser.state = S.ATTRIB_VALUE_QUOTED | |
} else { | |
strictFail(parser, 'Unquoted attribute value') | |
parser.state = S.ATTRIB_VALUE_UNQUOTED | |
parser.attribValue = c | |
} | |
continue | |
case S.ATTRIB_VALUE_QUOTED: | |
if (c !== parser.q) { | |
if (c === '&') { | |
parser.state = S.ATTRIB_VALUE_ENTITY_Q | |
} else { | |
parser.attribValue += c | |
} | |
continue | |
} | |
attrib(parser) | |
parser.q = '' | |
parser.state = S.ATTRIB_VALUE_CLOSED | |
continue | |
case S.ATTRIB_VALUE_CLOSED: | |
if (isWhitespace(c)) { | |
parser.state = S.ATTRIB | |
} else if (c === '>') { | |
openTag(parser) | |
} else if (c === '/') { | |
parser.state = S.OPEN_TAG_SLASH | |
} else if (isMatch(nameStart, c)) { | |
strictFail(parser, 'No whitespace between attributes') | |
parser.attribName = c | |
parser.attribValue = '' | |
parser.state = S.ATTRIB_NAME | |
} else { | |
strictFail(parser, 'Invalid attribute name') | |
} | |
continue | |
case S.ATTRIB_VALUE_UNQUOTED: | |
if (!isAttribEnd(c)) { | |
if (c === '&') { | |
parser.state = S.ATTRIB_VALUE_ENTITY_U | |
} else { | |
parser.attribValue += c | |
} | |
continue | |
} | |
attrib(parser) | |
if (c === '>') { | |
openTag(parser) | |
} else { | |
parser.state = S.ATTRIB | |
} | |
continue | |
case S.CLOSE_TAG: | |
if (!parser.tagName) { | |
if (isWhitespace(c)) { | |
continue | |
} else if (notMatch(nameStart, c)) { | |
if (parser.script) { | |
parser.script += '</' + c | |
parser.state = S.SCRIPT | |
} else { | |
strictFail(parser, 'Invalid tagname in closing tag.') | |
} | |
} else { | |
parser.tagName = c | |
} | |
} else if (c === '>') { | |
closeTag(parser) | |
} else if (isMatch(nameBody, c)) { | |
parser.tagName += c | |
} else if (parser.script) { | |
parser.script += '</' + parser.tagName | |
parser.tagName = '' | |
parser.state = S.SCRIPT | |
} else { | |
if (!isWhitespace(c)) { | |
strictFail(parser, 'Invalid tagname in closing tag') | |
} | |
parser.state = S.CLOSE_TAG_SAW_WHITE | |
} | |
continue | |
case S.CLOSE_TAG_SAW_WHITE: | |
if (isWhitespace(c)) { | |
continue | |
} | |
if (c === '>') { | |
closeTag(parser) | |
} else { | |
strictFail(parser, 'Invalid characters in closing tag') | |
} | |
continue | |
case S.TEXT_ENTITY: | |
case S.ATTRIB_VALUE_ENTITY_Q: | |
case S.ATTRIB_VALUE_ENTITY_U: | |
var returnState | |
var buffer | |
switch (parser.state) { | |
case S.TEXT_ENTITY: | |
returnState = S.TEXT | |
buffer = 'textNode' | |
break | |
case S.ATTRIB_VALUE_ENTITY_Q: | |
returnState = S.ATTRIB_VALUE_QUOTED | |
buffer = 'attribValue' | |
break | |
case S.ATTRIB_VALUE_ENTITY_U: | |
returnState = S.ATTRIB_VALUE_UNQUOTED | |
buffer = 'attribValue' | |
break | |
} | |
if (c === ';') { | |
var parsedEntity = parseEntity(parser) | |
// Custom entities can contain tags, so we potentially need to parse the result | |
if (parser.state === S.TEXT_ENTITY && !sax.ENTITIES[parser.entity] && parsedEntity !== '&' + parser.entity + ';') { | |
chunk = chunk.slice(0, i) + parsedEntity + chunk.slice(i) | |
} else { | |
parser[buffer] += parsedEntity | |
} | |
parser.entity = '' | |
parser.state = returnState | |
} else if (isMatch(parser.entity.length ? entityBody : entityStart, c)) { | |
parser.entity += c | |
} else { | |
strictFail(parser, 'Invalid character in entity name') | |
parser[buffer] += '&' + parser.entity + c | |
parser.entity = '' | |
parser.state = returnState | |
} | |
continue | |
default: | |
throw new Error(parser, 'Unknown state: ' + parser.state) | |
} | |
} // while | |
if (parser.position >= parser.bufferCheckPosition) { | |
checkBufferLength(parser) | |
} | |
return parser | |
} | |
})(typeof exports === 'undefined' ? this.sax = {} : exports) | |