Blame - linux-x64/clang/python3/lib/python3.9/textwrap.py - hafnium/prebuilts

blob: 30e693c8de03548344a038d0dc946e1d3a3ca8d0 [file] [log] [blame]

Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	1	"""Text wrapping and filling.
				2	"""
				3
				4	# Copyright (C) 1999-2001 Gregory P. Ward.
				5	# Copyright (C) 2002, 2003 Python Software Foundation.
				6	# Written by Greg Ward <gward@python.net>
				7
				8	import re
				9
				10	__all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent', 'shorten']
				11
				12	# Hardcode the recognized whitespace characters to the US-ASCII
				13	# whitespace characters. The main reason for doing this is that
				14	# some Unicode spaces (like \u00a0) are non-breaking whitespaces.
				15	_whitespace = '\t\n\x0b\x0c\r '
				16
				17	class TextWrapper:
				18	"""
				19	Object for wrapping/filling text. The public interface consists of
				20	the wrap() and fill() methods; the other methods are just there for
				21	subclasses to override in order to tweak the default behaviour.
				22	If you want to completely replace the main wrapping algorithm,
				23	you'll probably have to override _wrap_chunks().
				24
				25	Several instance attributes control various aspects of wrapping:
				26	width (default: 70)
				27	the maximum width of wrapped lines (unless break_long_words
				28	is false)
				29	initial_indent (default: "")
				30	string that will be prepended to the first line of wrapped
				31	output. Counts towards the line's width.
				32	subsequent_indent (default: "")
				33	string that will be prepended to all lines save the first
				34	of wrapped output; also counts towards each line's width.
				35	expand_tabs (default: true)
				36	Expand tabs in input text to spaces before further processing.
				37	Each tab will become 0 .. 'tabsize' spaces, depending on its position
				38	in its line. If false, each tab is treated as a single character.
				39	tabsize (default: 8)
				40	Expand tabs in input text to 0 .. 'tabsize' spaces, unless
				41	'expand_tabs' is false.
				42	replace_whitespace (default: true)
				43	Replace all whitespace characters in the input text by spaces
				44	after tab expansion. Note that if expand_tabs is false and
				45	replace_whitespace is true, every tab will be converted to a
				46	single space!
				47	fix_sentence_endings (default: false)
				48	Ensure that sentence-ending punctuation is always followed
				49	by two spaces. Off by default because the algorithm is
				50	(unavoidably) imperfect.
				51	break_long_words (default: true)
				52	Break words longer than 'width'. If false, those words will not
				53	be broken, and some lines might be longer than 'width'.
				54	break_on_hyphens (default: true)
				55	Allow breaking hyphenated words. If true, wrapping will occur
				56	preferably on whitespaces and right after hyphens part of
				57	compound words.
				58	drop_whitespace (default: true)
				59	Drop leading and trailing whitespace from lines.
				60	max_lines (default: None)
				61	Truncate wrapped lines.
				62	placeholder (default: ' [...]')
				63	Append to the last line of truncated text.
				64	"""
				65
				66	unicode_whitespace_trans = {}
				67	uspace = ord(' ')
				68	for x in _whitespace:
				69	unicode_whitespace_trans[ord(x)] = uspace
				70
				71	# This funky little regex is just the trick for splitting
				72	# text up into word-wrappable chunks. E.g.
				73	# "Hello there -- you goof-ball, use the -b option!"
				74	# splits into
				75	# Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
				76	# (after stripping out empty strings).
				77	word_punct = r'[\w!"\'&.,?]'
				78	letter = r'[^\d\W]'
				79	whitespace = r'[%s]' % re.escape(_whitespace)
				80	nowhitespace = '[^' + whitespace[1:]
				81	wordsep_re = re.compile(r'''
				82	( # any whitespace
				83	%(ws)s+
				84	\| # em-dash between words
				85	(?<=%(wp)s) -{2,} (?=\w)
				86	\| # word, possibly hyphenated
				87	%(nws)s+? (?:
				88	# hyphenated word
				89	-(?: (?<=%(lt)s{2}-) \| (?<=%(lt)s-%(lt)s-))
				90	(?= %(lt)s -? %(lt)s)
				91	\| # end of word
				92	(?=%(ws)s\|\Z)
				93	\| # em-dash
				94	(?<=%(wp)s) (?=-{2,}\w)
				95	)
				96	)''' % {'wp': word_punct, 'lt': letter,
				97	'ws': whitespace, 'nws': nowhitespace},
				98	re.VERBOSE)
				99	del word_punct, letter, nowhitespace
				100
				101	# This less funky little regex just split on recognized spaces. E.g.
				102	# "Hello there -- you goof-ball, use the -b option!"
				103	# splits into
				104	# Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
				105	wordsep_simple_re = re.compile(r'(%s+)' % whitespace)
				106	del whitespace
				107
				108	# XXX this is not locale- or charset-aware -- string.lowercase
				109	# is US-ASCII only (and therefore English-only)
				110	sentence_end_re = re.compile(r'[a-z]' # lowercase letter
				111	r'[\.\!\?]' # sentence-ending punct.
				112	r'[\"\']?' # optional end-of-quote
				113	r'\Z') # end of chunk
				114
				115	def __init__(self,
				116	width=70,
				117	initial_indent="",
				118	subsequent_indent="",
				119	expand_tabs=True,
				120	replace_whitespace=True,
				121	fix_sentence_endings=False,
				122	break_long_words=True,
				123	drop_whitespace=True,
				124	break_on_hyphens=True,
				125	tabsize=8,
				126	*,
				127	max_lines=None,
				128	placeholder=' [...]'):
				129	self.width = width
				130	self.initial_indent = initial_indent
				131	self.subsequent_indent = subsequent_indent
				132	self.expand_tabs = expand_tabs
				133	self.replace_whitespace = replace_whitespace
				134	self.fix_sentence_endings = fix_sentence_endings
				135	self.break_long_words = break_long_words
				136	self.drop_whitespace = drop_whitespace
				137	self.break_on_hyphens = break_on_hyphens
				138	self.tabsize = tabsize
				139	self.max_lines = max_lines
				140	self.placeholder = placeholder
				141
				142
				143	# -- Private methods -----------------------------------------------
				144	# (possibly useful for subclasses to override)
				145
				146	def _munge_whitespace(self, text):
				147	"""_munge_whitespace(text : string) -> string
				148
				149	Munge whitespace in text: expand tabs and convert all other
				150	whitespace characters to spaces. Eg. " foo\\tbar\\n\\nbaz"
				151	becomes " foo bar baz".
				152	"""
				153	if self.expand_tabs:
				154	text = text.expandtabs(self.tabsize)
				155	if self.replace_whitespace:
				156	text = text.translate(self.unicode_whitespace_trans)
				157	return text
				158
				159
				160	def _split(self, text):
				161	"""_split(text : string) -> [string]
				162
				163	Split the text to wrap into indivisible chunks. Chunks are
				164	not quite the same as words; see _wrap_chunks() for full
				165	details. As an example, the text
				166	Look, goof-ball -- use the -b option!
				167	breaks into the following chunks:
				168	'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
				169	'use', ' ', 'the', ' ', '-b', ' ', 'option!'
				170	if break_on_hyphens is True, or in:
				171	'Look,', ' ', 'goof-ball', ' ', '--', ' ',
				172	'use', ' ', 'the', ' ', '-b', ' ', option!'
				173	otherwise.
				174	"""
				175	if self.break_on_hyphens is True:
				176	chunks = self.wordsep_re.split(text)
				177	else:
				178	chunks = self.wordsep_simple_re.split(text)
				179	chunks = [c for c in chunks if c]
				180	return chunks
				181
				182	def _fix_sentence_endings(self, chunks):
				183	"""_fix_sentence_endings(chunks : [string])
				184
				185	Correct for sentence endings buried in 'chunks'. Eg. when the
				186	original text contains "... foo.\\nBar ...", munge_whitespace()
				187	and split() will convert that to [..., "foo.", " ", "Bar", ...]
				188	which has one too few spaces; this method simply changes the one
				189	space to two.
				190	"""
				191	i = 0
				192	patsearch = self.sentence_end_re.search
				193	while i < len(chunks)-1:
				194	if chunks[i+1] == " " and patsearch(chunks[i]):
				195	chunks[i+1] = " "
				196	i += 2
				197	else:
				198	i += 1
				199
				200	def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
				201	"""_handle_long_word(chunks : [string],
				202	cur_line : [string],
				203	cur_len : int, width : int)
				204
				205	Handle a chunk of text (most likely a word, not whitespace) that
				206	is too long to fit in any line.
				207	"""
				208	# Figure out when indent is larger than the specified width, and make
				209	# sure at least one character is stripped off on every pass
				210	if width < 1:
				211	space_left = 1
				212	else:
				213	space_left = width - cur_len
				214
				215	# If we're allowed to break long words, then do so: put as much
				216	# of the next chunk onto the current line as will fit.
				217	if self.break_long_words:
				218	cur_line.append(reversed_chunks[-1][:space_left])
				219	reversed_chunks[-1] = reversed_chunks[-1][space_left:]
				220
				221	# Otherwise, we have to preserve the long word intact. Only add
				222	# it to the current line if there's nothing already there --
				223	# that minimizes how much we violate the width constraint.
				224	elif not cur_line:
				225	cur_line.append(reversed_chunks.pop())
				226
				227	# If we're not allowed to break long words, and there's already
				228	# text on the current line, do nothing. Next time through the
				229	# main loop of _wrap_chunks(), we'll wind up here again, but
				230	# cur_len will be zero, so the next line will be entirely
				231	# devoted to the long word that we can't handle right now.
				232
				233	def _wrap_chunks(self, chunks):
				234	"""_wrap_chunks(chunks : [string]) -> [string]
				235
				236	Wrap a sequence of text chunks and return a list of lines of
				237	length 'self.width' or less. (If 'break_long_words' is false,
				238	some lines may be longer than this.) Chunks correspond roughly
				239	to words and the whitespace between them: each chunk is
				240	indivisible (modulo 'break_long_words'), but a line break can
				241	come between any two chunks. Chunks should not have internal
				242	whitespace; ie. a chunk is either all whitespace or a "word".
				243	Whitespace chunks will be removed from the beginning and end of
				244	lines, but apart from that whitespace is preserved.
				245	"""
				246	lines = []
				247	if self.width <= 0:
				248	raise ValueError("invalid width %r (must be > 0)" % self.width)
				249	if self.max_lines is not None:
				250	if self.max_lines > 1:
				251	indent = self.subsequent_indent
				252	else:
				253	indent = self.initial_indent
				254	if len(indent) + len(self.placeholder.lstrip()) > self.width:
				255	raise ValueError("placeholder too large for max width")
				256
				257	# Arrange in reverse order so items can be efficiently popped
				258	# from a stack of chucks.
				259	chunks.reverse()
				260
				261	while chunks:
				262
				263	# Start the list of chunks that will make up the current line.
				264	# cur_len is just the length of all the chunks in cur_line.
				265	cur_line = []
				266	cur_len = 0
				267
				268	# Figure out which static string will prefix this line.
				269	if lines:
				270	indent = self.subsequent_indent
				271	else:
				272	indent = self.initial_indent
				273
				274	# Maximum width for this line.
				275	width = self.width - len(indent)
				276
				277	# First chunk on line is whitespace -- drop it, unless this
				278	# is the very beginning of the text (ie. no lines started yet).
				279	if self.drop_whitespace and chunks[-1].strip() == '' and lines:
				280	del chunks[-1]
				281
				282	while chunks:
				283	l = len(chunks[-1])
				284
				285	# Can at least squeeze this chunk onto the current line.
				286	if cur_len + l <= width:
				287	cur_line.append(chunks.pop())
				288	cur_len += l
				289
				290	# Nope, this line is full.
				291	else:
				292	break
				293
				294	# The current line is full, and the next chunk is too big to
				295	# fit on any line (not just this one).
				296	if chunks and len(chunks[-1]) > width:
				297	self._handle_long_word(chunks, cur_line, cur_len, width)
				298	cur_len = sum(map(len, cur_line))
				299
				300	# If the last chunk on this line is all whitespace, drop it.
				301	if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
				302	cur_len -= len(cur_line[-1])
				303	del cur_line[-1]
				304
				305	if cur_line:
				306	if (self.max_lines is None or
				307	len(lines) + 1 < self.max_lines or
				308	(not chunks or
				309	self.drop_whitespace and
				310	len(chunks) == 1 and
				311	not chunks[0].strip()) and cur_len <= width):
				312	# Convert current line back to a string and store it in
				313	# list of all lines (return value).
				314	lines.append(indent + ''.join(cur_line))
				315	else:
				316	while cur_line:
				317	if (cur_line[-1].strip() and
				318	cur_len + len(self.placeholder) <= width):
				319	cur_line.append(self.placeholder)
				320	lines.append(indent + ''.join(cur_line))
				321	break
				322	cur_len -= len(cur_line[-1])
				323	del cur_line[-1]
				324	else:
				325	if lines:
				326	prev_line = lines[-1].rstrip()
				327	if (len(prev_line) + len(self.placeholder) <=
				328	self.width):
				329	lines[-1] = prev_line + self.placeholder
				330	break
				331	lines.append(indent + self.placeholder.lstrip())
				332	break
				333
				334	return lines
				335
				336	def _split_chunks(self, text):
				337	text = self._munge_whitespace(text)
				338	return self._split(text)
				339
				340	# -- Public interface ----------------------------------------------
				341
				342	def wrap(self, text):
				343	"""wrap(text : string) -> [string]
				344
				345	Reformat the single paragraph in 'text' so it fits in lines of
				346	no more than 'self.width' columns, and return a list of wrapped
				347	lines. Tabs in 'text' are expanded with string.expandtabs(),
				348	and all other whitespace characters (including newline) are
				349	converted to space.
				350	"""
				351	chunks = self._split_chunks(text)
				352	if self.fix_sentence_endings:
				353	self._fix_sentence_endings(chunks)
				354	return self._wrap_chunks(chunks)
				355
				356	def fill(self, text):
				357	"""fill(text : string) -> string
				358
				359	Reformat the single paragraph in 'text' to fit in lines of no
				360	more than 'self.width' columns, and return a new string
				361	containing the entire wrapped paragraph.
				362	"""
				363	return "\n".join(self.wrap(text))
				364
				365
				366	# -- Convenience interface ---------------------------------------------
				367
				368	def wrap(text, width=70, **kwargs):
				369	"""Wrap a single paragraph of text, returning a list of wrapped lines.
				370
				371	Reformat the single paragraph in 'text' so it fits in lines of no
				372	more than 'width' columns, and return a list of wrapped lines. By
				373	default, tabs in 'text' are expanded with string.expandtabs(), and
				374	all other whitespace characters (including newline) are converted to
				375	space. See TextWrapper class for available keyword args to customize
				376	wrapping behaviour.
				377	"""
				378	w = TextWrapper(width=width, **kwargs)
				379	return w.wrap(text)
				380
				381	def fill(text, width=70, **kwargs):
				382	"""Fill a single paragraph of text, returning a new string.
				383
				384	Reformat the single paragraph in 'text' to fit in lines of no more
				385	than 'width' columns, and return a new string containing the entire
				386	wrapped paragraph. As with wrap(), tabs are expanded and other
				387	whitespace characters converted to space. See TextWrapper class for
				388	available keyword args to customize wrapping behaviour.
				389	"""
				390	w = TextWrapper(width=width, **kwargs)
				391	return w.fill(text)
				392
				393	def shorten(text, width, **kwargs):
				394	"""Collapse and truncate the given text to fit in the given width.
				395
				396	The text first has its whitespace collapsed. If it then fits in
				397	the width, it is returned as is. Otherwise, as many words
				398	as possible are joined and then the placeholder is appended::
				399
				400	>>> textwrap.shorten("Hello world!", width=12)
				401	'Hello world!'
				402	>>> textwrap.shorten("Hello world!", width=11)
				403	'Hello [...]'
				404	"""
				405	w = TextWrapper(width=width, max_lines=1, **kwargs)
				406	return w.fill(' '.join(text.strip().split()))
				407
				408
				409	# -- Loosely related functionality -------------------------------------
				410
				411	_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)
				412	_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)
				413
				414	def dedent(text):
				415	"""Remove any common leading whitespace from every line in `text`.
				416
				417	This can be used to make triple-quoted strings line up with the left
				418	edge of the display, while still presenting them in the source code
				419	in indented form.
				420
				421	Note that tabs and spaces are both treated as whitespace, but they
				422	are not equal: the lines " hello" and "\\thello" are
				423	considered to have no common leading whitespace.
				424
				425	Entirely blank lines are normalized to a newline character.
				426	"""
				427	# Look for the longest leading string of spaces and tabs common to
				428	# all lines.
				429	margin = None
				430	text = _whitespace_only_re.sub('', text)
				431	indents = _leading_whitespace_re.findall(text)
				432	for indent in indents:
				433	if margin is None:
				434	margin = indent
				435
				436	# Current line more deeply indented than previous winner:
				437	# no change (previous winner is still on top).
				438	elif indent.startswith(margin):
				439	pass
				440
				441	# Current line consistent with and no deeper than previous winner:
				442	# it's the new winner.
				443	elif margin.startswith(indent):
				444	margin = indent
				445
				446	# Find the largest common whitespace between current line and previous
				447	# winner.
				448	else:
				449	for i, (x, y) in enumerate(zip(margin, indent)):
				450	if x != y:
				451	margin = margin[:i]
				452	break
				453
				454	# sanity check (testing/debugging only)
				455	if 0 and margin:
				456	for line in text.split("\n"):
				457	assert not line or line.startswith(margin), \
				458	"line = %r, margin = %r" % (line, margin)
				459
				460	if margin:
				461	text = re.sub(r'(?m)^' + margin, '', text)
				462	return text
				463
				464
				465	def indent(text, prefix, predicate=None):
				466	"""Adds 'prefix' to the beginning of selected lines in 'text'.
				467
				468	If 'predicate' is provided, 'prefix' will only be added to the lines
				469	where 'predicate(line)' is True. If 'predicate' is not provided,
				470	it will default to adding 'prefix' to all non-empty lines that do not
				471	consist solely of whitespace characters.
				472	"""
				473	if predicate is None:
				474	def predicate(line):
				475	return line.strip()
				476
				477	def prefixed_lines():
				478	for line in text.splitlines(True):
				479	yield (prefix + line if predicate(line) else line)
				480	return ''.join(prefixed_lines())
				481
				482
				483	if __name__ == "__main__":
				484	#print dedent("\tfoo\n\tbar")
				485	#print dedent(" \thello there\n \t how are you?")
				486	print(dedent("Hello there.\n This is indented."))