Spaces:

CatPtain
/

web_ppt

Running

File size: 6,288 Bytes

89ce340

import { startsWith, endsWith } from 'lodash'
import type { Token } from './types'
import { childlessTags } from './tags'

interface State {
  str: string
  position: number
  tokens: Token[]
}

const jumpPosition = (state: State, end: number) => {
  const len = end - state.position
  movePositopn(state, len)
}

const movePositopn = (state: State, len: number) => {
  state.position = state.position + len
}

const findTextEnd = (str: string, index: number) => {
  const isEnd = false
  while (!isEnd) {
    const textEnd = str.indexOf('<', index)
    if (textEnd === -1) {
      return textEnd
    }
    const char = str.charAt(textEnd + 1)
    if (char === '/' || char === '!' || /[A-Za-z0-9]/.test(char)) {
      return textEnd
    }
    index = textEnd + 1
  }
  return -1
}

const lexText = (state: State) => {
  const { str } = state
  let textEnd = findTextEnd(str, state.position)
  if (textEnd === state.position) return
  if (textEnd === -1) {
    textEnd = str.length
  }

  const content = str.slice(state.position, textEnd)
  jumpPosition(state, textEnd)

  state.tokens.push({
    type: 'text', 
    content, 
  })
}

const lexComment = (state: State) => {
  const { str } = state

  movePositopn(state, 4)
  let contentEnd = str.indexOf('-->', state.position)
  let commentEnd = contentEnd + 3
  if (contentEnd === -1) {
    contentEnd = commentEnd = str.length
  }

  const content = str.slice(state.position, contentEnd)
  jumpPosition(state, commentEnd)

  state.tokens.push({
    type: 'comment',
    content,
  })
}

const lexTagName = (state: State) => {
  const { str } = state
  const len = str.length
  let start = state.position

  while (start < len) {
    const char = str.charAt(start)
    const isTagChar = !(/\s/.test(char) || char === '/' || char === '>')
    if (isTagChar) break
    start++
  }

  let end = start + 1
  while (end < len) {
    const char = str.charAt(end)
    const isTagChar = !(/\s/.test(char) || char === '/' || char === '>')
    if (!isTagChar) break
    end++
  }

  jumpPosition(state, end)
  const tagName = str.slice(start, end)
  state.tokens.push({
    type: 'tag',
    content: tagName
  })
  return tagName
}

const lexTagAttributes = (state: State) => {
  const { str, tokens } = state
  let cursor = state.position
  let quote = null
  let wordBegin = cursor
  const words = []
  const len = str.length
  while (cursor < len) {
    const char = str.charAt(cursor)
    if (quote) {
      const isQuoteEnd = char === quote
      if (isQuoteEnd) quote = null
      cursor++
      continue
    }

    const isTagEnd = char === '/' || char === '>'
    if (isTagEnd) {
      if (cursor !== wordBegin) words.push(str.slice(wordBegin, cursor))
      break
    }

    const isWordEnd = /\s/.test(char)
    if (isWordEnd) {
      if (cursor !== wordBegin) words.push(str.slice(wordBegin, cursor))
      wordBegin = cursor + 1
      cursor++
      continue
    }

    const isQuoteStart = char === '\'' || char === '"'
    if (isQuoteStart) {
      quote = char
      cursor++
      continue
    }

    cursor++
  }
  jumpPosition(state, cursor)

  const type = 'attribute'
  for (let i = 0; i < words.length; i++) {
    const word = words[i]

    const isNotPair = word.indexOf('=') === -1
    if (isNotPair) {
      const secondWord = words[i + 1]
      if (secondWord && startsWith(secondWord, '=')) {
        if (secondWord.length > 1) {
          const newWord = word + secondWord
          tokens.push({ type, content: newWord })
          i += 1
          continue
        }
        const thirdWord = words[i + 2]
        i += 1
        if (thirdWord) {
          const newWord = word + '=' + thirdWord
          tokens.push({ type, content: newWord })
          i += 1
          continue
        }
      }
    }
    if (endsWith(word, '=')) {
      const secondWord = words[i + 1]
      if (secondWord && secondWord.indexOf('=') === -1) {
        const newWord = word + secondWord
        tokens.push({ type, content: newWord })
        i += 1
        continue
      }

      const newWord = word.slice(0, -1)
      tokens.push({ type, content: newWord })
      continue
    }

    tokens.push({ type, content: word })
  }
}

const lexSkipTag = (tagName: string, state: State) => {
  const { str, tokens } = state
  const safeTagName = tagName.toLowerCase()
  const len = str.length
  let index = state.position
  
  while (index < len) {
    const nextTag = str.indexOf('</', index)
    if (nextTag === -1) {
      lexText(state)
      break
    }

    const tagState = {
      str,
      position: state.position,
      tokens: [],
    }
    jumpPosition(tagState, nextTag)
    const name = lexTag(tagState)
    if (safeTagName !== name.toLowerCase()) {
      index = tagState.position
      continue
    }

    if (nextTag !== state.position) {
      const textStart = state.position
      jumpPosition(state, nextTag)
      tokens.push({
        type: 'text',
        content: str.slice(textStart, nextTag),
      })
    }

    tokens.push(...tagState.tokens)
    jumpPosition(state, tagState.position)
    break
  }
}

const lexTag = (state: State) => {
  const { str } = state
  const secondChar = str.charAt(state.position + 1)
  const tagStartClose = secondChar === '/'
  movePositopn(state, tagStartClose ? 2 : 1)
  state.tokens.push({
    type: 'tag-start',
    close: tagStartClose,
  })

  const tagName = lexTagName(state)
  lexTagAttributes(state)

  const firstChar = str.charAt(state.position)
  const tagEndClose = firstChar === '/'
  movePositopn(state, tagEndClose ? 2 : 1)
  state.tokens.push({
    type: 'tag-end',
    close: tagEndClose,
  })
  return tagName
}

const lex = (state: State) => {
  const str = state.str
  const len = str.length

  while (state.position < len) {
    const start = state.position
    lexText(state)

    if (state.position === start) {
      const isComment = startsWith(str, '!--', start + 1)
      if (isComment) lexComment(state)
      else {
        const tagName = lexTag(state)
        const safeTag = tagName.toLowerCase()
        if (childlessTags.includes(safeTag)) lexSkipTag(tagName, state)
      }
    }
  }
}

export const lexer = (str: string): Token[] => {
  const state = {
    str,
    position: 0,
    tokens: [],
  }
  lex(state)
  return state.tokens
}