|
import { startsWith, endsWith } from 'lodash' |
|
import type { Token } from './types' |
|
import { childlessTags } from './tags' |
|
|
|
interface State { |
|
str: string |
|
position: number |
|
tokens: Token[] |
|
} |
|
|
|
const jumpPosition = (state: State, end: number) => { |
|
const len = end - state.position |
|
movePositopn(state, len) |
|
} |
|
|
|
const movePositopn = (state: State, len: number) => { |
|
state.position = state.position + len |
|
} |
|
|
|
const findTextEnd = (str: string, index: number) => { |
|
const isEnd = false |
|
while (!isEnd) { |
|
const textEnd = str.indexOf('<', index) |
|
if (textEnd === -1) { |
|
return textEnd |
|
} |
|
const char = str.charAt(textEnd + 1) |
|
if (char === '/' || char === '!' || /[A-Za-z0-9]/.test(char)) { |
|
return textEnd |
|
} |
|
index = textEnd + 1 |
|
} |
|
return -1 |
|
} |
|
|
|
const lexText = (state: State) => { |
|
const { str } = state |
|
let textEnd = findTextEnd(str, state.position) |
|
if (textEnd === state.position) return |
|
if (textEnd === -1) { |
|
textEnd = str.length |
|
} |
|
|
|
const content = str.slice(state.position, textEnd) |
|
jumpPosition(state, textEnd) |
|
|
|
state.tokens.push({ |
|
type: 'text', |
|
content, |
|
}) |
|
} |
|
|
|
const lexComment = (state: State) => { |
|
const { str } = state |
|
|
|
movePositopn(state, 4) |
|
let contentEnd = str.indexOf('-->', state.position) |
|
let commentEnd = contentEnd + 3 |
|
if (contentEnd === -1) { |
|
contentEnd = commentEnd = str.length |
|
} |
|
|
|
const content = str.slice(state.position, contentEnd) |
|
jumpPosition(state, commentEnd) |
|
|
|
state.tokens.push({ |
|
type: 'comment', |
|
content, |
|
}) |
|
} |
|
|
|
const lexTagName = (state: State) => { |
|
const { str } = state |
|
const len = str.length |
|
let start = state.position |
|
|
|
while (start < len) { |
|
const char = str.charAt(start) |
|
const isTagChar = !(/\s/.test(char) || char === '/' || char === '>') |
|
if (isTagChar) break |
|
start++ |
|
} |
|
|
|
let end = start + 1 |
|
while (end < len) { |
|
const char = str.charAt(end) |
|
const isTagChar = !(/\s/.test(char) || char === '/' || char === '>') |
|
if (!isTagChar) break |
|
end++ |
|
} |
|
|
|
jumpPosition(state, end) |
|
const tagName = str.slice(start, end) |
|
state.tokens.push({ |
|
type: 'tag', |
|
content: tagName |
|
}) |
|
return tagName |
|
} |
|
|
|
const lexTagAttributes = (state: State) => { |
|
const { str, tokens } = state |
|
let cursor = state.position |
|
let quote = null |
|
let wordBegin = cursor |
|
const words = [] |
|
const len = str.length |
|
while (cursor < len) { |
|
const char = str.charAt(cursor) |
|
if (quote) { |
|
const isQuoteEnd = char === quote |
|
if (isQuoteEnd) quote = null |
|
cursor++ |
|
continue |
|
} |
|
|
|
const isTagEnd = char === '/' || char === '>' |
|
if (isTagEnd) { |
|
if (cursor !== wordBegin) words.push(str.slice(wordBegin, cursor)) |
|
break |
|
} |
|
|
|
const isWordEnd = /\s/.test(char) |
|
if (isWordEnd) { |
|
if (cursor !== wordBegin) words.push(str.slice(wordBegin, cursor)) |
|
wordBegin = cursor + 1 |
|
cursor++ |
|
continue |
|
} |
|
|
|
const isQuoteStart = char === '\'' || char === '"' |
|
if (isQuoteStart) { |
|
quote = char |
|
cursor++ |
|
continue |
|
} |
|
|
|
cursor++ |
|
} |
|
jumpPosition(state, cursor) |
|
|
|
const type = 'attribute' |
|
for (let i = 0; i < words.length; i++) { |
|
const word = words[i] |
|
|
|
const isNotPair = word.indexOf('=') === -1 |
|
if (isNotPair) { |
|
const secondWord = words[i + 1] |
|
if (secondWord && startsWith(secondWord, '=')) { |
|
if (secondWord.length > 1) { |
|
const newWord = word + secondWord |
|
tokens.push({ type, content: newWord }) |
|
i += 1 |
|
continue |
|
} |
|
const thirdWord = words[i + 2] |
|
i += 1 |
|
if (thirdWord) { |
|
const newWord = word + '=' + thirdWord |
|
tokens.push({ type, content: newWord }) |
|
i += 1 |
|
continue |
|
} |
|
} |
|
} |
|
if (endsWith(word, '=')) { |
|
const secondWord = words[i + 1] |
|
if (secondWord && secondWord.indexOf('=') === -1) { |
|
const newWord = word + secondWord |
|
tokens.push({ type, content: newWord }) |
|
i += 1 |
|
continue |
|
} |
|
|
|
const newWord = word.slice(0, -1) |
|
tokens.push({ type, content: newWord }) |
|
continue |
|
} |
|
|
|
tokens.push({ type, content: word }) |
|
} |
|
} |
|
|
|
const lexSkipTag = (tagName: string, state: State) => { |
|
const { str, tokens } = state |
|
const safeTagName = tagName.toLowerCase() |
|
const len = str.length |
|
let index = state.position |
|
|
|
while (index < len) { |
|
const nextTag = str.indexOf('</', index) |
|
if (nextTag === -1) { |
|
lexText(state) |
|
break |
|
} |
|
|
|
const tagState = { |
|
str, |
|
position: state.position, |
|
tokens: [], |
|
} |
|
jumpPosition(tagState, nextTag) |
|
const name = lexTag(tagState) |
|
if (safeTagName !== name.toLowerCase()) { |
|
index = tagState.position |
|
continue |
|
} |
|
|
|
if (nextTag !== state.position) { |
|
const textStart = state.position |
|
jumpPosition(state, nextTag) |
|
tokens.push({ |
|
type: 'text', |
|
content: str.slice(textStart, nextTag), |
|
}) |
|
} |
|
|
|
tokens.push(...tagState.tokens) |
|
jumpPosition(state, tagState.position) |
|
break |
|
} |
|
} |
|
|
|
const lexTag = (state: State) => { |
|
const { str } = state |
|
const secondChar = str.charAt(state.position + 1) |
|
const tagStartClose = secondChar === '/' |
|
movePositopn(state, tagStartClose ? 2 : 1) |
|
state.tokens.push({ |
|
type: 'tag-start', |
|
close: tagStartClose, |
|
}) |
|
|
|
const tagName = lexTagName(state) |
|
lexTagAttributes(state) |
|
|
|
const firstChar = str.charAt(state.position) |
|
const tagEndClose = firstChar === '/' |
|
movePositopn(state, tagEndClose ? 2 : 1) |
|
state.tokens.push({ |
|
type: 'tag-end', |
|
close: tagEndClose, |
|
}) |
|
return tagName |
|
} |
|
|
|
const lex = (state: State) => { |
|
const str = state.str |
|
const len = str.length |
|
|
|
while (state.position < len) { |
|
const start = state.position |
|
lexText(state) |
|
|
|
if (state.position === start) { |
|
const isComment = startsWith(str, '!--', start + 1) |
|
if (isComment) lexComment(state) |
|
else { |
|
const tagName = lexTag(state) |
|
const safeTag = tagName.toLowerCase() |
|
if (childlessTags.includes(safeTag)) lexSkipTag(tagName, state) |
|
} |
|
} |
|
} |
|
} |
|
|
|
export const lexer = (str: string): Token[] => { |
|
const state = { |
|
str, |
|
position: 0, |
|
tokens: [], |
|
} |
|
lex(state) |
|
return state.tokens |
|
} |