import { startsWith, endsWith } from 'lodash' import type { Token } from './types' import { childlessTags } from './tags' interface State { str: string position: number tokens: Token[] } const jumpPosition = (state: State, end: number) => { const len = end - state.position movePositopn(state, len) } const movePositopn = (state: State, len: number) => { state.position = state.position + len } const findTextEnd = (str: string, index: number) => { const isEnd = false while (!isEnd) { const textEnd = str.indexOf('<', index) if (textEnd === -1) { return textEnd } const char = str.charAt(textEnd + 1) if (char === '/' || char === '!' || /[A-Za-z0-9]/.test(char)) { return textEnd } index = textEnd + 1 } return -1 } const lexText = (state: State) => { const { str } = state let textEnd = findTextEnd(str, state.position) if (textEnd === state.position) return if (textEnd === -1) { textEnd = str.length } const content = str.slice(state.position, textEnd) jumpPosition(state, textEnd) state.tokens.push({ type: 'text', content, }) } const lexComment = (state: State) => { const { str } = state movePositopn(state, 4) let contentEnd = str.indexOf('-->', state.position) let commentEnd = contentEnd + 3 if (contentEnd === -1) { contentEnd = commentEnd = str.length } const content = str.slice(state.position, contentEnd) jumpPosition(state, commentEnd) state.tokens.push({ type: 'comment', content, }) } const lexTagName = (state: State) => { const { str } = state const len = str.length let start = state.position while (start < len) { const char = str.charAt(start) const isTagChar = !(/\s/.test(char) || char === '/' || char === '>') if (isTagChar) break start++ } let end = start + 1 while (end < len) { const char = str.charAt(end) const isTagChar = !(/\s/.test(char) || char === '/' || char === '>') if (!isTagChar) break end++ } jumpPosition(state, end) const tagName = str.slice(start, end) state.tokens.push({ type: 'tag', content: tagName }) return tagName } const lexTagAttributes = (state: State) => { const { str, tokens } = state let cursor = state.position let quote = null let wordBegin = cursor const words = [] const len = str.length while (cursor < len) { const char = str.charAt(cursor) if (quote) { const isQuoteEnd = char === quote if (isQuoteEnd) quote = null cursor++ continue } const isTagEnd = char === '/' || char === '>' if (isTagEnd) { if (cursor !== wordBegin) words.push(str.slice(wordBegin, cursor)) break } const isWordEnd = /\s/.test(char) if (isWordEnd) { if (cursor !== wordBegin) words.push(str.slice(wordBegin, cursor)) wordBegin = cursor + 1 cursor++ continue } const isQuoteStart = char === '\'' || char === '"' if (isQuoteStart) { quote = char cursor++ continue } cursor++ } jumpPosition(state, cursor) const type = 'attribute' for (let i = 0; i < words.length; i++) { const word = words[i] const isNotPair = word.indexOf('=') === -1 if (isNotPair) { const secondWord = words[i + 1] if (secondWord && startsWith(secondWord, '=')) { if (secondWord.length > 1) { const newWord = word + secondWord tokens.push({ type, content: newWord }) i += 1 continue } const thirdWord = words[i + 2] i += 1 if (thirdWord) { const newWord = word + '=' + thirdWord tokens.push({ type, content: newWord }) i += 1 continue } } } if (endsWith(word, '=')) { const secondWord = words[i + 1] if (secondWord && secondWord.indexOf('=') === -1) { const newWord = word + secondWord tokens.push({ type, content: newWord }) i += 1 continue } const newWord = word.slice(0, -1) tokens.push({ type, content: newWord }) continue } tokens.push({ type, content: word }) } } const lexSkipTag = (tagName: string, state: State) => { const { str, tokens } = state const safeTagName = tagName.toLowerCase() const len = str.length let index = state.position while (index < len) { const nextTag = str.indexOf(' { const { str } = state const secondChar = str.charAt(state.position + 1) const tagStartClose = secondChar === '/' movePositopn(state, tagStartClose ? 2 : 1) state.tokens.push({ type: 'tag-start', close: tagStartClose, }) const tagName = lexTagName(state) lexTagAttributes(state) const firstChar = str.charAt(state.position) const tagEndClose = firstChar === '/' movePositopn(state, tagEndClose ? 2 : 1) state.tokens.push({ type: 'tag-end', close: tagEndClose, }) return tagName } const lex = (state: State) => { const str = state.str const len = str.length while (state.position < len) { const start = state.position lexText(state) if (state.position === start) { const isComment = startsWith(str, '!--', start + 1) if (isComment) lexComment(state) else { const tagName = lexTag(state) const safeTag = tagName.toLowerCase() if (childlessTags.includes(safeTag)) lexSkipTag(tagName, state) } } } } export const lexer = (str: string): Token[] => { const state = { str, position: 0, tokens: [], } lex(state) return state.tokens }