Breaking a line along a space after exceeding a certain width (Python)
I am drawing text on top of a base image.
One of the basic requirements is that the line wraps to the next line (s) when the total width of the characters exceeds the width of the base image. I am accomplishing this with the correct working snippet:
base_width, base_height = base_img.size
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", font_size)
line_width = 0
line_count = 1
lines = []
string = ""
for c in text:
line_width += font.getsize(c)[0]
string+=str(c)
if line_width > base_width:
lines.append(string)
string = ""
line_width = 0
line_count += 1
if string:
lines.append(string)
The result lines
is a list of substrings obtained by splitting the original string.
Now I need to improve this algorithm.
The problem is that it interrupts the middle word line. For example. The string lorem ipsum
may end like lines = ['lorem ip','sum']
. Instead, the ideal breakup for me is much more human-readable lines = ['lorem ','ipsum']
, or lines = ['lorem',' ipsum']
.
In other words, I want to split the lines along the white spaces, not in the middle of the word. Can anyone give me an illustrative example of how I can do this? I don't seem to wrap my head around.
source to share
Here's an attempt to get your code to work with minimal changes and a lot of debug output:
#!python3
#coding=utf-8
""" Line break demo 2 """
text = "lorem ipsum dolor sit amet blablah"
for wmax in [10,25,55,80,100,120]:
print(wmax)
base_width, base_height = (wmax,None)#base_img.size
#font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", font_size)
line_width = 0
line_count = 1
lines = []
string = ""
for c in text:
line_width += 5#font.getsize(c)[0]
string += c
if line_width > base_width:
print("text ", text)
print("string", string)
s = string.rsplit(" ", 1)
print("split ", s)
string = s[0]
lines.append(string)
try:
string = s[1]
line_width = len(string) * 5
except:
string = ""
line_width = 0
print("lines ", lines)
print("string", string)
line_count += 1
print()
if string:
lines.append(string)
print(lines)
print()
Output:
10
text lorem ipsum dolor sit amet blablah
string lor
split ['lor']
lines ['lor']
string
text lorem ipsum dolor sit amet blablah
string em
split ['em', '']
lines ['lor', 'em']
string
text lorem ipsum dolor sit amet blablah
string ips
split ['ips']
lines ['lor', 'em', 'ips']
string
text lorem ipsum dolor sit amet blablah
string um
split ['um', '']
lines ['lor', 'em', 'ips', 'um']
string
text lorem ipsum dolor sit amet blablah
string dol
split ['dol']
lines ['lor', 'em', 'ips', 'um', 'dol']
string
text lorem ipsum dolor sit amet blablah
string or
split ['or', '']
lines ['lor', 'em', 'ips', 'um', 'dol', 'or']
string
text lorem ipsum dolor sit amet blablah
string sit
split ['sit']
lines ['lor', 'em', 'ips', 'um', 'dol', 'or', 'sit']
string
text lorem ipsum dolor sit amet blablah
string am
split ['', 'am']
lines ['lor', 'em', 'ips', 'um', 'dol', 'or', 'sit', '']
string am
text lorem ipsum dolor sit amet blablah
string ame
split ['ame']
lines ['lor', 'em', 'ips', 'um', 'dol', 'or', 'sit', '', 'ame']
string
text lorem ipsum dolor sit amet blablah
string t b
split ['t', 'b']
lines ['lor', 'em', 'ips', 'um', 'dol', 'or', 'sit', '', 'ame', 't']
string b
text lorem ipsum dolor sit amet blablah
string bla
split ['bla']
lines ['lor', 'em', 'ips', 'um', 'dol', 'or', 'sit', '', 'ame', 't', 'bla']
string
text lorem ipsum dolor sit amet blablah
string bla
split ['bla']
lines ['lor', 'em', 'ips', 'um', 'dol', 'or', 'sit', '', 'ame', 't', 'bla', 'bla
string
['lor', 'em', 'ips', 'um', 'dol', 'or', 'sit', '', 'ame', 't', 'bla', 'bla', 'h']
25
text lorem ipsum dolor sit amet blablah
string lorem
split ['lorem', '']
lines ['lorem']
string
text lorem ipsum dolor sit amet blablah
string ipsum
split ['ipsum', '']
lines ['lorem', 'ipsum']
string
text lorem ipsum dolor sit amet blablah
string dolor
split ['dolor', '']
lines ['lorem', 'ipsum', 'dolor']
string
text lorem ipsum dolor sit amet blablah
string sit am
split ['sit', 'am']
lines ['lorem', 'ipsum', 'dolor', 'sit']
string am
text lorem ipsum dolor sit amet blablah
string amet b
split ['amet', 'b']
lines ['lorem', 'ipsum', 'dolor', 'sit', 'amet']
string b
text lorem ipsum dolor sit amet blablah
string blabla
split ['blabla']
lines ['lorem', 'ipsum', 'dolor', 'sit', 'amet', 'blabla']
string
['lorem', 'ipsum', 'dolor', 'sit', 'amet', 'blabla', 'h']
55
text lorem ipsum dolor sit amet blablah
string lorem ipsum
split ['lorem ipsum', '']
lines ['lorem ipsum']
string
text lorem ipsum dolor sit amet blablah
string dolor sit am
split ['dolor sit', 'am']
lines ['lorem ipsum', 'dolor sit']
string am
text lorem ipsum dolor sit amet blablah
string amet blablah
split ['amet', 'blablah']
lines ['lorem ipsum', 'dolor sit', 'amet']
string blablah
['lorem ipsum', 'dolor sit', 'amet', 'blablah']
80
text lorem ipsum dolor sit amet blablah
string lorem ipsum dolor
split ['lorem ipsum', 'dolor']
lines ['lorem ipsum']
string dolor
text lorem ipsum dolor sit amet blablah
string dolor sit amet bl
split ['dolor sit amet', 'bl']
lines ['lorem ipsum', 'dolor sit amet']
string bl
['lorem ipsum', 'dolor sit amet', 'blablah']
100
text lorem ipsum dolor sit amet blablah
string lorem ipsum dolor sit
split ['lorem ipsum dolor', 'sit']
lines ['lorem ipsum dolor']
string sit
['lorem ipsum dolor', 'sit amet blablah']
120
text lorem ipsum dolor sit amet blablah
string lorem ipsum dolor sit ame
split ['lorem ipsum dolor sit', 'ame']
lines ['lorem ipsum dolor sit']
string ame
['lorem ipsum dolor sit', 'amet blablah']
source to share
There is a python textwrap module for this :
In [1]: import textwrap
In [2]: textwrap.wrap('x lorem ipsum', width=5)
Out[2]: ['x', 'lorem', 'ipsum']
change
I misunderstood the purpose of the author. The problem is that the line width is not defined as the number of characters, but as the width of the image containing the rendered text. I came up with a hacky method by implementing my own string class with the correct width definition and modifying the class slightly TextWrapper
:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import textwrap
class MyTextWrapper(textwrap.TextWrapper):
def _split(self, text):
assert isinstance(text, StringWithWidth)
return [StringWithWidth(i) for i in super()._split(text._str)]
def _wrap_chunks(self, chunks):
# only modify ''.join in the original code
"""_wrap_chunks(chunks : [string]) -> [string]
Wrap a sequence of text chunks and return a list of lines of
length 'self.width' or less. (If 'break_long_words' is false,
some lines may be longer than this.) Chunks correspond roughly
to words and the whitespace between them: each chunk is
indivisible (modulo 'break_long_words'), but a line break can
come between any two chunks. Chunks should not have internal
whitespace; ie. a chunk is either all whitespace or a "word".
Whitespace chunks will be removed from the beginning and end of
lines, but apart from that whitespace is preserved.
"""
lines = []
if self.width <= 0:
raise ValueError("invalid width %r (must be > 0)" % self.width)
if self.max_lines is not None:
if self.max_lines > 1:
indent = self.subsequent_indent
else:
indent = self.initial_indent
if len(indent) + len(self.placeholder.lstrip()) > self.width:
raise ValueError("placeholder too large for max width")
# Arrange in reverse order so items can be efficiently popped
# from a stack of chucks.
chunks.reverse()
while chunks:
# Start the list of chunks that will make up the current line.
# cur_len is just the length of all the chunks in cur_line.
cur_line = []
cur_len = 0
# Figure out which static string will prefix this line.
if lines:
indent = self.subsequent_indent
else:
indent = self.initial_indent
# Maximum width for this line.
width = self.width - len(indent)
# First chunk on line is whitespace -- drop it, unless this
# is the very beginning of the text (ie. no lines started yet).
if self.drop_whitespace and chunks[-1].strip() == '' and lines:
del chunks[-1]
while chunks:
l = len(chunks[-1])
# Can at least squeeze this chunk onto the current line.
if cur_len + l <= width:
cur_line.append(chunks.pop())
cur_len += l
# Nope, this line is full.
else:
break
# The current line is full, and the next chunk is too big to
# fit on *any* line (not just this one).
if chunks and len(chunks[-1]) > width:
self._handle_long_word(chunks, cur_line, cur_len, width)
cur_len = sum(map(len, cur_line))
# If the last chunk on this line is all whitespace, drop it.
if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
cur_len -= len(cur_line[-1])
del cur_line[-1]
if cur_line:
if (self.max_lines is None or
len(lines) + 1 < self.max_lines or
(not chunks or
self.drop_whitespace and
len(chunks) == 1 and
not chunks[0].strip()) and cur_len <= width):
# Convert current line back to a string and store it in
# list of all lines (return value).
lines.append(StringWithWidth(
indent + ''.join(map(_as_str, cur_line))))
else:
while cur_line:
if (cur_line[-1].strip() and
cur_len + len(self.placeholder) <= width):
cur_line.append(self.placeholder)
lines.append(StringWithWidth(
indent + ''.join(map(_as_str, cur_line))))
break
cur_len -= len(cur_line[-1])
del cur_line[-1]
else:
if lines:
prev_line = lines[-1].rstrip()
if (len(prev_line) + len(self.placeholder) <=
self.width):
lines[-1] = prev_line + self.placeholder
break
lines.append(indent + self.placeholder.lstrip())
break
return lines
def _make_str_fwd(name):
def func(self, *args, **kwargs):
return StringWithWidth(getattr(self._str, name)(*args, **kwargs))
func.__name__ = name
return func
def _as_str(val):
if isinstance(val, StringWithWidth):
val = val._str
assert isinstance(val, str)
return val
class StringWithWidth:
char_width = {
'x': 1,
's': 2,
' ': 1
}
def __init__(self, s):
self._str = s
expandtabs = _make_str_fwd('expandtabs')
translate = _make_str_fwd('translate')
strip = _make_str_fwd('strip')
__getitem__ = _make_str_fwd('__getitem__')
def __eq__(self, rhs):
return self._str == _as_str(rhs)
def __add__(self, rhs):
return StringWithWidth(self._str + _as_str(rhs))
def __len__(self):
return sum(map(self.char_width.__getitem__, self._str))
def __repr__(self):
return repr(self._str)
def main():
print(MyTextWrapper(width=8).wrap(StringWithWidth('x ss s')))
if __name__ == '__main__':
main()
source to share
Perhaps this simple approach helps, although I suppose the recursion could be more elegant. Note that the fixed character width must be replaced with an appropriate function call.
#!python3
#coding=utf-8
""" Line break demo """
text = "lorem ipsum dolor sit amet blablah"
print("breaking", text)
words = text.split()
widths = [sum( [5 for c in w] ) for w in words]
#print(words)
#print(widths)
print(len(words), "words")
for wmax in [10,25,55,80,100,120]:
print("\nmax line width:", wmax)
lines = []
li = 0
r = range(len(widths)) # 0...5
for i in r:
w = sum(widths[li:i+1])
if w >= wmax and i>0:
lines.append( " ".join(words[li:i]) )
li = i
print(" ---")
w = sum(widths[li:i+1])
print( " ", i, words[i], widths[i], w )
# remainder
lines.append( " ".join(words[li:]) )
print(lines)
Output:
breaking lorem ipsum dolor sit amet blablah 6 words max line width: 10 0 lorem 25 25 --- 1 ipsum 25 25 --- 2 dolor 25 25 --- 3 sit 15 15 --- 4 amet 20 20 --- 5 blablah 35 35 ['lorem', 'ipsum', 'dolor', 'sit', 'amet', 'blablah'] max line width: 25 0 lorem 25 25 --- 1 ipsum 25 25 --- 2 dolor 25 25 --- 3 sit 15 15 --- 4 amet 20 20 --- 5 blablah 35 35 ['lorem', 'ipsum', 'dolor', 'sit', 'amet', 'blablah'] max line width: 55 0 lorem 25 25 1 ipsum 25 50 --- 2 dolor 25 25 3 sit 15 40 --- 4 amet 20 20 --- 5 blablah 35 35 ['lorem ipsum', 'dolor sit', 'amet', 'blablah'] max line width: 80 0 lorem 25 25 1 ipsum 25 50 2 dolor 25 75 --- 3 sit 15 15 4 amet 20 35 5 blablah 35 70 ['lorem ipsum dolor', 'sit amet blablah'] max line width: 100 0 lorem 25 25 1 ipsum 25 50 2 dolor 25 75 3 sit 15 90 --- 4 amet 20 20 5 blablah 35 55 ['lorem ipsum dolor sit', 'amet blablah'] max line width: 120 0 lorem 25 25 1 ipsum 25 50 2 dolor 25 75 3 sit 15 90 4 amet 20 110 --- 5 blablah 35 35 ['lorem ipsum dolor sit amet', 'blablah']
source to share
This is how I went about solving the problem (rewrote my source code):
def break_lines(img, text, font_size):
base_width = img.size[0]
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", font_size)
line = ""
lines = []
width_of_line = 0
number_of_lines = 0
# break string into multi-lines that fit base_width
for token in text.split():
token = token+' '
token_width = font.getsize(token)[0]
if width_of_line+token_width < base_width:
line+=token
width_of_line+=token_width
else:
lines.append(line)
number_of_lines += 1
width_of_line = 0
line = ""
line+=token
width_of_line+=token_width
if line:
lines.append(line)
number_of_lines += 1
return lines, number_of_lines
source to share
I couldn't get it out of my head, so it's a little more compact here:
#!python3
#coding=utf-8
""" Line break demo 3 """
def charwidth(char):
return 5
def stringwidth(string):
return sum( [charwidth(char) for char in string] )
text = "lorem ipsum dolor sit amet blablah"
limit = 60
words = text.split()
lines = [[]]
while( words ):
word = words.pop(0)
if stringwidth( " ".join(lines[-1]) ) + 1 + stringwidth(word) < limit:
lines[-1].append(word)
else:
lines.append( [word] )
print( [" ".join(words) for words in lines ] )
source to share
Here's a modified version of @ handle's third answer that also allows long words to be split.
This function also allows you to specify the maximum number of lines and truncate with "..." when over the limit.
def text_width(text, font):
return font.getsize(text)[0]
# Set max_lines to 0 for no limit
def wrap_text(text, font, max_width, max_lines=0):
words = text.split()
lines = []
while(words):
word = words.pop(0)
# Append word if it not too long
if len(lines) > 0 and (text_width(" ".join(lines[-1]), font) + 1 + text_width(word,font)) < max_width:
lines[-1].append(word)
else:
# Brute-force: chunkify word until it fits
chunk = len(word)
while chunk > 0:
while (text_width(word[:chunk],font) > max_width and chunk > 1):
chunk -= 1
lines.append( [word[:chunk]] )
word = word[chunk:]
chunk = len(word)
lines = [" ".join(words) for words in lines]
if max_lines and len(lines) > max_lines:
lines[max_lines-1] = lines[max_lines-1][:-1] + "..."
return "\n".join(lines[:max_lines])
source to share