Skip to content

Commit 027dd21

Browse files
Preserve the input space characters in the output
We now match (on line 107) any space character rather than just a tab or a space. To make sure that a user can choose the older behaviour, we preserve that behind a new boolean parameter called `normalise_space_characters`.
1 parent fba0551 commit 027dd21

2 files changed

Lines changed: 27 additions & 3 deletions

File tree

titlecase/__init__.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,16 +77,18 @@ def set_small_word_list(small=SMALL):
7777
SUBPHRASE = regex.compile(r'([:.;?!][ ])(%s)' % small)
7878

7979

80-
def titlecase(text, callback=None, small_first_last=True, preserve_blank_lines=False):
80+
def titlecase(text, callback=None, small_first_last=True, preserve_blank_lines=False, normalise_space_characters=False):
8181
"""
8282
:param text: Titlecases input text
8383
:param callback: Callback function that returns the titlecase version of a specific word
8484
:param small_first_last: Capitalize small words (e.g. 'A') at the beginning; disabled when recursing
8585
:param preserve_blank_lines: Preserve blank lines in the output
86+
:param normalise_space_characters: Convert all original spaces to normal space characters
8687
:type text: str
8788
:type callback: function
8889
:type small_first_last: bool
8990
:type preserve_blank_lines: bool
91+
:type normalise_space_characters: bool
9092
9193
This filter changes all words to Title Caps, and attempts to be clever
9294
about *un*capitalizing SMALL words like a/an/the in the input.
@@ -102,7 +104,9 @@ def titlecase(text, callback=None, small_first_last=True, preserve_blank_lines=F
102104
processed = []
103105
for line in lines:
104106
all_caps = line.upper() == line
105-
words = regex.split('[\t ]', line)
107+
split_line = regex.split(r'(\s)', line)
108+
words = split_line[::2]
109+
spaces = split_line[1::2]
106110
tc_line = []
107111
for word in words:
108112
if callback:
@@ -190,7 +194,13 @@ def titlecase(text, callback=None, small_first_last=True, preserve_blank_lines=F
190194
lambda m: m.group(0).capitalize(), tc_line[-1]
191195
)
192196

193-
result = " ".join(tc_line)
197+
if normalise_space_characters:
198+
result = " ".join(tc_line)
199+
else:
200+
line_to_be_joined = tc_line + spaces
201+
line_to_be_joined[::2] = tc_line
202+
line_to_be_joined[1::2] = spaces
203+
result = "".join(line_to_be_joined)
194204

195205
result = SUBPHRASE.sub(lambda m: '%s%s' % (
196206
m.group(1),

titlecase/tests.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,10 @@
307307
"Mr mr Mrs Ms Mss Dr dr , Mr. and Mrs. Person",
308308
"Mr Mr Mrs Ms MSS Dr Dr , Mr. And Mrs. Person",
309309
),
310+
(
311+
"a mix of\tdifferent\u200aspace\u2006characters",
312+
"A Mix of\tDifferent\u200aSpace\u2006Characters",
313+
),
310314
)
311315

312316

@@ -429,6 +433,16 @@ def test_complex_blanks(self):
429433
self.assertEqual(titlecase(s, preserve_blank_lines=True),
430434
'\n\nLeading Blank\n\n\nMulti-Blank\n\n\n\n\nTrailing Blank\n\n')
431435

436+
class TestNormaliseSpaceCharacters(unittest.TestCase):
437+
def test_tabs(self):
438+
s = 'text\twith\ttabs'
439+
self.assertEqual(titlecase(s), 'Text\tWith\tTabs')
440+
self.assertEqual(titlecase(s, normalise_space_characters=True), 'Text With Tabs')
441+
442+
def test_nbsps(self):
443+
s = 'text with nonbreaking spaces'
444+
self.assertEqual(titlecase(s), 'Text With Nonbreaking Spaces')
445+
self.assertEqual(titlecase(s, normalise_space_characters=True), 'Text With Nonbreaking Spaces')
432446

433447
if __name__ == '__main__':
434448
unittest.main()

0 commit comments

Comments
 (0)