Spaces:
Running
Running
// Partial port of python's argparse module, version 3.9.0 (only wrap and fill functions): | |
// https://github.com/python/cpython/blob/v3.9.0b4/Lib/textwrap.py | |
/* | |
* Text wrapping and filling. | |
*/ | |
// Copyright (C) 1999-2001 Gregory P. Ward. | |
// Copyright (C) 2002, 2003 Python Software Foundation. | |
// Copyright (C) 2020 argparse.js authors | |
// Originally written by Greg Ward <[email protected]> | |
// Hardcode the recognized whitespace characters to the US-ASCII | |
// whitespace characters. The main reason for doing this is that | |
// some Unicode spaces (like \u00a0) are non-breaking whitespaces. | |
// | |
// This less funky little regex just split on recognized spaces. E.g. | |
// "Hello there -- you goof-ball, use the -b option!" | |
// splits into | |
// Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/ | |
const wordsep_simple_re = /([\t\n\x0b\x0c\r ]+)/ | |
class TextWrapper { | |
/* | |
* Object for wrapping/filling text. The public interface consists of | |
* the wrap() and fill() methods; the other methods are just there for | |
* subclasses to override in order to tweak the default behaviour. | |
* If you want to completely replace the main wrapping algorithm, | |
* you'll probably have to override _wrap_chunks(). | |
* | |
* Several instance attributes control various aspects of wrapping: | |
* width (default: 70) | |
* the maximum width of wrapped lines (unless break_long_words | |
* is false) | |
* initial_indent (default: "") | |
* string that will be prepended to the first line of wrapped | |
* output. Counts towards the line's width. | |
* subsequent_indent (default: "") | |
* string that will be prepended to all lines save the first | |
* of wrapped output; also counts towards each line's width. | |
* expand_tabs (default: true) | |
* Expand tabs in input text to spaces before further processing. | |
* Each tab will become 0 .. 'tabsize' spaces, depending on its position | |
* in its line. If false, each tab is treated as a single character. | |
* tabsize (default: 8) | |
* Expand tabs in input text to 0 .. 'tabsize' spaces, unless | |
* 'expand_tabs' is false. | |
* replace_whitespace (default: true) | |
* Replace all whitespace characters in the input text by spaces | |
* after tab expansion. Note that if expand_tabs is false and | |
* replace_whitespace is true, every tab will be converted to a | |
* single space! | |
* fix_sentence_endings (default: false) | |
* Ensure that sentence-ending punctuation is always followed | |
* by two spaces. Off by default because the algorithm is | |
* (unavoidably) imperfect. | |
* break_long_words (default: true) | |
* Break words longer than 'width'. If false, those words will not | |
* be broken, and some lines might be longer than 'width'. | |
* break_on_hyphens (default: true) | |
* Allow breaking hyphenated words. If true, wrapping will occur | |
* preferably on whitespaces and right after hyphens part of | |
* compound words. | |
* drop_whitespace (default: true) | |
* Drop leading and trailing whitespace from lines. | |
* max_lines (default: None) | |
* Truncate wrapped lines. | |
* placeholder (default: ' [...]') | |
* Append to the last line of truncated text. | |
*/ | |
constructor(options = {}) { | |
let { | |
width = 70, | |
initial_indent = '', | |
subsequent_indent = '', | |
expand_tabs = true, | |
replace_whitespace = true, | |
fix_sentence_endings = false, | |
break_long_words = true, | |
drop_whitespace = true, | |
break_on_hyphens = true, | |
tabsize = 8, | |
max_lines = undefined, | |
placeholder=' [...]' | |
} = options | |
this.width = width | |
this.initial_indent = initial_indent | |
this.subsequent_indent = subsequent_indent | |
this.expand_tabs = expand_tabs | |
this.replace_whitespace = replace_whitespace | |
this.fix_sentence_endings = fix_sentence_endings | |
this.break_long_words = break_long_words | |
this.drop_whitespace = drop_whitespace | |
this.break_on_hyphens = break_on_hyphens | |
this.tabsize = tabsize | |
this.max_lines = max_lines | |
this.placeholder = placeholder | |
} | |
// -- Private methods ----------------------------------------------- | |
// (possibly useful for subclasses to override) | |
_munge_whitespace(text) { | |
/* | |
* _munge_whitespace(text : string) -> string | |
* | |
* Munge whitespace in text: expand tabs and convert all other | |
* whitespace characters to spaces. Eg. " foo\\tbar\\n\\nbaz" | |
* becomes " foo bar baz". | |
*/ | |
if (this.expand_tabs) { | |
text = text.replace(/\t/g, ' '.repeat(this.tabsize)) // not strictly correct in js | |
} | |
if (this.replace_whitespace) { | |
text = text.replace(/[\t\n\x0b\x0c\r]/g, ' ') | |
} | |
return text | |
} | |
_split(text) { | |
/* | |
* _split(text : string) -> [string] | |
* | |
* Split the text to wrap into indivisible chunks. Chunks are | |
* not quite the same as words; see _wrap_chunks() for full | |
* details. As an example, the text | |
* Look, goof-ball -- use the -b option! | |
* breaks into the following chunks: | |
* 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ', | |
* 'use', ' ', 'the', ' ', '-b', ' ', 'option!' | |
* if break_on_hyphens is True, or in: | |
* 'Look,', ' ', 'goof-ball', ' ', '--', ' ', | |
* 'use', ' ', 'the', ' ', '-b', ' ', option!' | |
* otherwise. | |
*/ | |
let chunks = text.split(wordsep_simple_re) | |
chunks = chunks.filter(Boolean) | |
return chunks | |
} | |
_handle_long_word(reversed_chunks, cur_line, cur_len, width) { | |
/* | |
* _handle_long_word(chunks : [string], | |
* cur_line : [string], | |
* cur_len : int, width : int) | |
* | |
* Handle a chunk of text (most likely a word, not whitespace) that | |
* is too long to fit in any line. | |
*/ | |
// Figure out when indent is larger than the specified width, and make | |
// sure at least one character is stripped off on every pass | |
let space_left | |
if (width < 1) { | |
space_left = 1 | |
} else { | |
space_left = width - cur_len | |
} | |
// If we're allowed to break long words, then do so: put as much | |
// of the next chunk onto the current line as will fit. | |
if (this.break_long_words) { | |
cur_line.push(reversed_chunks[reversed_chunks.length - 1].slice(0, space_left)) | |
reversed_chunks[reversed_chunks.length - 1] = reversed_chunks[reversed_chunks.length - 1].slice(space_left) | |
// Otherwise, we have to preserve the long word intact. Only add | |
// it to the current line if there's nothing already there -- | |
// that minimizes how much we violate the width constraint. | |
} else if (!cur_line) { | |
cur_line.push(...reversed_chunks.pop()) | |
} | |
// If we're not allowed to break long words, and there's already | |
// text on the current line, do nothing. Next time through the | |
// main loop of _wrap_chunks(), we'll wind up here again, but | |
// cur_len will be zero, so the next line will be entirely | |
// devoted to the long word that we can't handle right now. | |
} | |
_wrap_chunks(chunks) { | |
/* | |
* _wrap_chunks(chunks : [string]) -> [string] | |
* | |
* Wrap a sequence of text chunks and return a list of lines of | |
* length 'self.width' or less. (If 'break_long_words' is false, | |
* some lines may be longer than this.) Chunks correspond roughly | |
* to words and the whitespace between them: each chunk is | |
* indivisible (modulo 'break_long_words'), but a line break can | |
* come between any two chunks. Chunks should not have internal | |
* whitespace; ie. a chunk is either all whitespace or a "word". | |
* Whitespace chunks will be removed from the beginning and end of | |
* lines, but apart from that whitespace is preserved. | |
*/ | |
let lines = [] | |
let indent | |
if (this.width <= 0) { | |
throw Error(`invalid width ${this.width} (must be > 0)`) | |
} | |
if (this.max_lines !== undefined) { | |
if (this.max_lines > 1) { | |
indent = this.subsequent_indent | |
} else { | |
indent = this.initial_indent | |
} | |
if (indent.length + this.placeholder.trimStart().length > this.width) { | |
throw Error('placeholder too large for max width') | |
} | |
} | |
// Arrange in reverse order so items can be efficiently popped | |
// from a stack of chucks. | |
chunks = chunks.reverse() | |
while (chunks.length > 0) { | |
// Start the list of chunks that will make up the current line. | |
// cur_len is just the length of all the chunks in cur_line. | |
let cur_line = [] | |
let cur_len = 0 | |
// Figure out which static string will prefix this line. | |
let indent | |
if (lines) { | |
indent = this.subsequent_indent | |
} else { | |
indent = this.initial_indent | |
} | |
// Maximum width for this line. | |
let width = this.width - indent.length | |
// First chunk on line is whitespace -- drop it, unless this | |
// is the very beginning of the text (ie. no lines started yet). | |
if (this.drop_whitespace && chunks[chunks.length - 1].trim() === '' && lines.length > 0) { | |
chunks.pop() | |
} | |
while (chunks.length > 0) { | |
let l = chunks[chunks.length - 1].length | |
// Can at least squeeze this chunk onto the current line. | |
if (cur_len + l <= width) { | |
cur_line.push(chunks.pop()) | |
cur_len += l | |
// Nope, this line is full. | |
} else { | |
break | |
} | |
} | |
// The current line is full, and the next chunk is too big to | |
// fit on *any* line (not just this one). | |
if (chunks.length && chunks[chunks.length - 1].length > width) { | |
this._handle_long_word(chunks, cur_line, cur_len, width) | |
cur_len = cur_line.map(l => l.length).reduce((a, b) => a + b, 0) | |
} | |
// If the last chunk on this line is all whitespace, drop it. | |
if (this.drop_whitespace && cur_line.length > 0 && cur_line[cur_line.length - 1].trim() === '') { | |
cur_len -= cur_line[cur_line.length - 1].length | |
cur_line.pop() | |
} | |
if (cur_line) { | |
if (this.max_lines === undefined || | |
lines.length + 1 < this.max_lines || | |
(chunks.length === 0 || | |
this.drop_whitespace && | |
chunks.length === 1 && | |
!chunks[0].trim()) && cur_len <= width) { | |
// Convert current line back to a string and store it in | |
// list of all lines (return value). | |
lines.push(indent + cur_line.join('')) | |
} else { | |
let had_break = false | |
while (cur_line) { | |
if (cur_line[cur_line.length - 1].trim() && | |
cur_len + this.placeholder.length <= width) { | |
cur_line.push(this.placeholder) | |
lines.push(indent + cur_line.join('')) | |
had_break = true | |
break | |
} | |
cur_len -= cur_line[-1].length | |
cur_line.pop() | |
} | |
if (!had_break) { | |
if (lines) { | |
let prev_line = lines[lines.length - 1].trimEnd() | |
if (prev_line.length + this.placeholder.length <= | |
this.width) { | |
lines[lines.length - 1] = prev_line + this.placeholder | |
break | |
} | |
} | |
lines.push(indent + this.placeholder.lstrip()) | |
} | |
break | |
} | |
} | |
} | |
return lines | |
} | |
_split_chunks(text) { | |
text = this._munge_whitespace(text) | |
return this._split(text) | |
} | |
// -- Public interface ---------------------------------------------- | |
wrap(text) { | |
/* | |
* wrap(text : string) -> [string] | |
* | |
* Reformat the single paragraph in 'text' so it fits in lines of | |
* no more than 'self.width' columns, and return a list of wrapped | |
* lines. Tabs in 'text' are expanded with string.expandtabs(), | |
* and all other whitespace characters (including newline) are | |
* converted to space. | |
*/ | |
let chunks = this._split_chunks(text) | |
// not implemented in js | |
//if (this.fix_sentence_endings) { | |
// this._fix_sentence_endings(chunks) | |
//} | |
return this._wrap_chunks(chunks) | |
} | |
fill(text) { | |
/* | |
* fill(text : string) -> string | |
* | |
* Reformat the single paragraph in 'text' to fit in lines of no | |
* more than 'self.width' columns, and return a new string | |
* containing the entire wrapped paragraph. | |
*/ | |
return this.wrap(text).join('\n') | |
} | |
} | |
// -- Convenience interface --------------------------------------------- | |
function wrap(text, options = {}) { | |
/* | |
* Wrap a single paragraph of text, returning a list of wrapped lines. | |
* | |
* Reformat the single paragraph in 'text' so it fits in lines of no | |
* more than 'width' columns, and return a list of wrapped lines. By | |
* default, tabs in 'text' are expanded with string.expandtabs(), and | |
* all other whitespace characters (including newline) are converted to | |
* space. See TextWrapper class for available keyword args to customize | |
* wrapping behaviour. | |
*/ | |
let { width = 70, ...kwargs } = options | |
let w = new TextWrapper(Object.assign({ width }, kwargs)) | |
return w.wrap(text) | |
} | |
function fill(text, options = {}) { | |
/* | |
* Fill a single paragraph of text, returning a new string. | |
* | |
* Reformat the single paragraph in 'text' to fit in lines of no more | |
* than 'width' columns, and return a new string containing the entire | |
* wrapped paragraph. As with wrap(), tabs are expanded and other | |
* whitespace characters converted to space. See TextWrapper class for | |
* available keyword args to customize wrapping behaviour. | |
*/ | |
let { width = 70, ...kwargs } = options | |
let w = new TextWrapper(Object.assign({ width }, kwargs)) | |
return w.fill(text) | |
} | |
// -- Loosely related functionality ------------------------------------- | |
let _whitespace_only_re = /^[ \t]+$/mg | |
let _leading_whitespace_re = /(^[ \t]*)(?:[^ \t\n])/mg | |
function dedent(text) { | |
/* | |
* Remove any common leading whitespace from every line in `text`. | |
* | |
* This can be used to make triple-quoted strings line up with the left | |
* edge of the display, while still presenting them in the source code | |
* in indented form. | |
* | |
* Note that tabs and spaces are both treated as whitespace, but they | |
* are not equal: the lines " hello" and "\\thello" are | |
* considered to have no common leading whitespace. | |
* | |
* Entirely blank lines are normalized to a newline character. | |
*/ | |
// Look for the longest leading string of spaces and tabs common to | |
// all lines. | |
let margin = undefined | |
text = text.replace(_whitespace_only_re, '') | |
let indents = text.match(_leading_whitespace_re) || [] | |
for (let indent of indents) { | |
indent = indent.slice(0, -1) | |
if (margin === undefined) { | |
margin = indent | |
// Current line more deeply indented than previous winner: | |
// no change (previous winner is still on top). | |
} else if (indent.startsWith(margin)) { | |
// pass | |
// Current line consistent with and no deeper than previous winner: | |
// it's the new winner. | |
} else if (margin.startsWith(indent)) { | |
margin = indent | |
// Find the largest common whitespace between current line and previous | |
// winner. | |
} else { | |
for (let i = 0; i < margin.length && i < indent.length; i++) { | |
if (margin[i] !== indent[i]) { | |
margin = margin.slice(0, i) | |
break | |
} | |
} | |
} | |
} | |
if (margin) { | |
text = text.replace(new RegExp('^' + margin, 'mg'), '') | |
} | |
return text | |
} | |
module.exports = { wrap, fill, dedent } | |