Skip to content

Commit 42f43e2

Browse files
authored
Merge pull request #97 from robinwhittleton/nbsp-support
Add an option to preserve the input space characters
2 parents 418c57c + 027dd21 commit 42f43e2

2 files changed

Lines changed: 30 additions & 4 deletions

File tree

titlecase/__init__.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,14 +77,18 @@ def set_small_word_list(small=SMALL):
7777
SUBPHRASE = regex.compile(r'([:.;?!][ ])(%s)' % small)
7878

7979

80-
def titlecase(text, callback=None, small_first_last=True, preserve_blank_lines=False):
80+
def titlecase(text, callback=None, small_first_last=True, preserve_blank_lines=False, normalise_space_characters=False):
8181
"""
8282
:param text: Titlecases input text
8383
:param callback: Callback function that returns the titlecase version of a specific word
8484
:param small_first_last: Capitalize small words (e.g. 'A') at the beginning; disabled when recursing
85+
:param preserve_blank_lines: Preserve blank lines in the output
86+
:param normalise_space_characters: Convert all original spaces to normal space characters
8587
:type text: str
8688
:type callback: function
8789
:type small_first_last: bool
90+
:type preserve_blank_lines: bool
91+
:type normalise_space_characters: bool
8892
8993
This filter changes all words to Title Caps, and attempts to be clever
9094
about *un*capitalizing SMALL words like a/an/the in the input.
@@ -100,7 +104,9 @@ def titlecase(text, callback=None, small_first_last=True, preserve_blank_lines=F
100104
processed = []
101105
for line in lines:
102106
all_caps = line.upper() == line
103-
words = regex.split('[\t ]', line)
107+
split_line = regex.split(r'(\s)', line)
108+
words = split_line[::2]
109+
spaces = split_line[1::2]
104110
tc_line = []
105111
for word in words:
106112
if callback:
@@ -188,7 +194,13 @@ def titlecase(text, callback=None, small_first_last=True, preserve_blank_lines=F
188194
lambda m: m.group(0).capitalize(), tc_line[-1]
189195
)
190196

191-
result = " ".join(tc_line)
197+
if normalise_space_characters:
198+
result = " ".join(tc_line)
199+
else:
200+
line_to_be_joined = tc_line + spaces
201+
line_to_be_joined[::2] = tc_line
202+
line_to_be_joined[1::2] = spaces
203+
result = "".join(line_to_be_joined)
192204

193205
result = SUBPHRASE.sub(lambda m: '%s%s' % (
194206
m.group(1),

titlecase/tests.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from titlecase import titlecase, set_small_word_list, create_wordlist_filter_from_file
1313

1414

15-
# (executed by `test_input_output` below)
15+
# (executed by `test_specific_string` below)
1616
TEST_DATA = (
1717
(
1818
"",
@@ -307,6 +307,10 @@
307307
"Mr mr Mrs Ms Mss Dr dr , Mr. and Mrs. Person",
308308
"Mr Mr Mrs Ms MSS Dr Dr , Mr. And Mrs. Person",
309309
),
310+
(
311+
"a mix of\tdifferent\u200aspace\u2006characters",
312+
"A Mix of\tDifferent\u200aSpace\u2006Characters",
313+
),
310314
)
311315

312316

@@ -429,6 +433,16 @@ def test_complex_blanks(self):
429433
self.assertEqual(titlecase(s, preserve_blank_lines=True),
430434
'\n\nLeading Blank\n\n\nMulti-Blank\n\n\n\n\nTrailing Blank\n\n')
431435

436+
class TestNormaliseSpaceCharacters(unittest.TestCase):
437+
def test_tabs(self):
438+
s = 'text\twith\ttabs'
439+
self.assertEqual(titlecase(s), 'Text\tWith\tTabs')
440+
self.assertEqual(titlecase(s, normalise_space_characters=True), 'Text With Tabs')
441+
442+
def test_nbsps(self):
443+
s = 'text with nonbreaking spaces'
444+
self.assertEqual(titlecase(s), 'Text With Nonbreaking Spaces')
445+
self.assertEqual(titlecase(s, normalise_space_characters=True), 'Text With Nonbreaking Spaces')
432446

433447
if __name__ == '__main__':
434448
unittest.main()

0 commit comments

Comments
 (0)