CatPtain's picture
Upload 339 files
89ce340 verified
raw
history blame
6.29 kB
import { startsWith, endsWith } from 'lodash'
import type { Token } from './types'
import { childlessTags } from './tags'
interface State {
str: string
position: number
tokens: Token[]
}
const jumpPosition = (state: State, end: number) => {
const len = end - state.position
movePositopn(state, len)
}
const movePositopn = (state: State, len: number) => {
state.position = state.position + len
}
const findTextEnd = (str: string, index: number) => {
const isEnd = false
while (!isEnd) {
const textEnd = str.indexOf('<', index)
if (textEnd === -1) {
return textEnd
}
const char = str.charAt(textEnd + 1)
if (char === '/' || char === '!' || /[A-Za-z0-9]/.test(char)) {
return textEnd
}
index = textEnd + 1
}
return -1
}
const lexText = (state: State) => {
const { str } = state
let textEnd = findTextEnd(str, state.position)
if (textEnd === state.position) return
if (textEnd === -1) {
textEnd = str.length
}
const content = str.slice(state.position, textEnd)
jumpPosition(state, textEnd)
state.tokens.push({
type: 'text',
content,
})
}
const lexComment = (state: State) => {
const { str } = state
movePositopn(state, 4)
let contentEnd = str.indexOf('-->', state.position)
let commentEnd = contentEnd + 3
if (contentEnd === -1) {
contentEnd = commentEnd = str.length
}
const content = str.slice(state.position, contentEnd)
jumpPosition(state, commentEnd)
state.tokens.push({
type: 'comment',
content,
})
}
const lexTagName = (state: State) => {
const { str } = state
const len = str.length
let start = state.position
while (start < len) {
const char = str.charAt(start)
const isTagChar = !(/\s/.test(char) || char === '/' || char === '>')
if (isTagChar) break
start++
}
let end = start + 1
while (end < len) {
const char = str.charAt(end)
const isTagChar = !(/\s/.test(char) || char === '/' || char === '>')
if (!isTagChar) break
end++
}
jumpPosition(state, end)
const tagName = str.slice(start, end)
state.tokens.push({
type: 'tag',
content: tagName
})
return tagName
}
const lexTagAttributes = (state: State) => {
const { str, tokens } = state
let cursor = state.position
let quote = null
let wordBegin = cursor
const words = []
const len = str.length
while (cursor < len) {
const char = str.charAt(cursor)
if (quote) {
const isQuoteEnd = char === quote
if (isQuoteEnd) quote = null
cursor++
continue
}
const isTagEnd = char === '/' || char === '>'
if (isTagEnd) {
if (cursor !== wordBegin) words.push(str.slice(wordBegin, cursor))
break
}
const isWordEnd = /\s/.test(char)
if (isWordEnd) {
if (cursor !== wordBegin) words.push(str.slice(wordBegin, cursor))
wordBegin = cursor + 1
cursor++
continue
}
const isQuoteStart = char === '\'' || char === '"'
if (isQuoteStart) {
quote = char
cursor++
continue
}
cursor++
}
jumpPosition(state, cursor)
const type = 'attribute'
for (let i = 0; i < words.length; i++) {
const word = words[i]
const isNotPair = word.indexOf('=') === -1
if (isNotPair) {
const secondWord = words[i + 1]
if (secondWord && startsWith(secondWord, '=')) {
if (secondWord.length > 1) {
const newWord = word + secondWord
tokens.push({ type, content: newWord })
i += 1
continue
}
const thirdWord = words[i + 2]
i += 1
if (thirdWord) {
const newWord = word + '=' + thirdWord
tokens.push({ type, content: newWord })
i += 1
continue
}
}
}
if (endsWith(word, '=')) {
const secondWord = words[i + 1]
if (secondWord && secondWord.indexOf('=') === -1) {
const newWord = word + secondWord
tokens.push({ type, content: newWord })
i += 1
continue
}
const newWord = word.slice(0, -1)
tokens.push({ type, content: newWord })
continue
}
tokens.push({ type, content: word })
}
}
const lexSkipTag = (tagName: string, state: State) => {
const { str, tokens } = state
const safeTagName = tagName.toLowerCase()
const len = str.length
let index = state.position
while (index < len) {
const nextTag = str.indexOf('</', index)
if (nextTag === -1) {
lexText(state)
break
}
const tagState = {
str,
position: state.position,
tokens: [],
}
jumpPosition(tagState, nextTag)
const name = lexTag(tagState)
if (safeTagName !== name.toLowerCase()) {
index = tagState.position
continue
}
if (nextTag !== state.position) {
const textStart = state.position
jumpPosition(state, nextTag)
tokens.push({
type: 'text',
content: str.slice(textStart, nextTag),
})
}
tokens.push(...tagState.tokens)
jumpPosition(state, tagState.position)
break
}
}
const lexTag = (state: State) => {
const { str } = state
const secondChar = str.charAt(state.position + 1)
const tagStartClose = secondChar === '/'
movePositopn(state, tagStartClose ? 2 : 1)
state.tokens.push({
type: 'tag-start',
close: tagStartClose,
})
const tagName = lexTagName(state)
lexTagAttributes(state)
const firstChar = str.charAt(state.position)
const tagEndClose = firstChar === '/'
movePositopn(state, tagEndClose ? 2 : 1)
state.tokens.push({
type: 'tag-end',
close: tagEndClose,
})
return tagName
}
const lex = (state: State) => {
const str = state.str
const len = str.length
while (state.position < len) {
const start = state.position
lexText(state)
if (state.position === start) {
const isComment = startsWith(str, '!--', start + 1)
if (isComment) lexComment(state)
else {
const tagName = lexTag(state)
const safeTag = tagName.toLowerCase()
if (childlessTags.includes(safeTag)) lexSkipTag(tagName, state)
}
}
}
}
export const lexer = (str: string): Token[] => {
const state = {
str,
position: 0,
tokens: [],
}
lex(state)
return state.tokens
}