Skip to content
Snippets Groups Projects
Commit 20b389c3 authored by Daniel Ecer's avatar Daniel Ecer
Browse files

treat asterisk as junk; treat repeating characters as junk

parent 6bbdc0df
No related branches found
No related tags found
No related merge requests found
...@@ -30,7 +30,9 @@ def len_index_range(index_range): ...@@ -30,7 +30,9 @@ def len_index_range(index_range):
# Treat space or comma after a dot, or a dot after a letter as junk # Treat space or comma after a dot, or a dot after a letter as junk
DEFAULT_ISJUNK = lambda s, i: ( DEFAULT_ISJUNK = lambda s, i: (
(i > 0 and s[i - 1] == '.' and (s[i] == ' ' or s[i] == ',')) or (i > 0 and s[i - 1] == '.' and (s[i] == ' ' or s[i] == ',')) or
(i > 0 and s[i - 1].isalpha() and s[i] == '.') (i > 0 and s[i - 1].isalpha() and s[i] == '.') or
(i > 0 and s[i - 1] == s[i]) or
s[i] == '*'
) )
DOT_IS_JUNK = lambda s, i: s[i] == '.' DOT_IS_JUNK = lambda s, i: s[i] == '.'
......
...@@ -20,6 +20,12 @@ class TestRemoveJunk(object): ...@@ -20,6 +20,12 @@ class TestRemoveJunk(object):
def test_should_remove_dots_after_capitals(self): def test_should_remove_dots_after_capitals(self):
assert remove_junk('P.O. Box', DOT_IS_JUNK) == 'PO Box' assert remove_junk('P.O. Box', DOT_IS_JUNK) == 'PO Box'
def test_should_remove_asterisk_after_capitals(self):
assert remove_junk('Mr Beam*') == 'Mr Beam'
def test_should_remove_repeating_characters(self):
assert remove_junk('Mr Beeeam') == 'Mr Beam'
class TestInvertIndexRanges(object): class TestInvertIndexRanges(object):
def test_should_return_empty_for_empty_range(self): def test_should_return_empty_for_empty_range(self):
assert list(invert_index_ranges([], 0, 0)) == list([]) assert list(invert_index_ranges([], 0, 0)) == list([])
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment