Skip to content

Commit 60e84ce

Browse files
committed
random_one for imperceptible word swaps
1 parent dabb8f6 commit 60e84ce

3 files changed

Lines changed: 68 additions & 16 deletions

File tree

textattack/transformations/word_swaps/word_swap_deletions.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from .word_swap_differential_evolution import WordSwapDifferentialEvolution
77
from typing import List, Tuple
88
from textattack.shared import AttackedText
9+
import numpy as np
910

1011
class WordSwapDeletions(WordSwapDifferentialEvolution):
1112
"""
@@ -16,11 +17,12 @@ class WordSwapDeletions(WordSwapDifferentialEvolution):
1617
https://arxiv.org/abs/2106.09898
1718
"""
1819

19-
def __init__(self, **kwargs):
20+
def __init__(self, random_one=False, **kwargs):
2021
super().__init__(**kwargs)
2122
self.del_chr = chr(0x8)
2223
self.ins_chr_min = '!'
2324
self.ins_chr_max = '~'
25+
self.random_one = random_one
2426

2527
def _get_bounds(self, current_text: AttackedText, max_perturbs: int, _) -> List[Tuple[int, int]]:
2628
return [(-1, len(current_text.text) - 1), (ord(self.ins_chr_min), ord(self.ins_chr_max))] * max_perturbs
@@ -41,13 +43,26 @@ def apply_perturbation(self, current_text: AttackedText, perturbation_vector: Li
4143

4244
def _get_replacement_words(self, word: str) -> List[str]:
4345
candidate_words = []
44-
for i in range(len(word) + 1): # +1 to allow insertions at the end too
45-
for code_point in range(ord(self.ins_chr_min), ord(self.ins_chr_max) + 1):
46-
insert_char = chr(code_point)
47-
perturbed = (
48-
word[:i] + insert_char + self.del_chr + word[i:]
49-
)
50-
candidate_words.append(perturbed)
46+
if self.random_one:
47+
if len(word) == 0:
48+
return []
49+
i = np.random.randint(0, len(word) + 1)
50+
rand_char = chr(np.random.randint(ord(self.ins_chr_min), ord(self.ins_chr_max) + 1))
51+
perturbed = word[:i] + rand_char + self.del_chr + word[i:]
52+
candidate_words.append(perturbed)
53+
else:
54+
for i in range(len(word) + 1): # +1 to allow insertions at the end
55+
for code_point in range(ord(self.ins_chr_min), ord(self.ins_chr_max) + 1):
56+
insert_char = chr(code_point)
57+
perturbed = word[:i] + insert_char + self.del_chr + word[i:]
58+
candidate_words.append(perturbed)
5159
return candidate_words
5260

61+
@property
62+
def deterministic(self):
63+
return not self.random_one
64+
65+
def extra_repr_keys(self):
66+
return super().extra_repr_keys()
67+
5368

textattack/transformations/word_swaps/word_swap_invisible_characters.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from typing import List, Tuple
88
from textattack.shared import AttackedText
99
import random
10+
import numpy as np
1011

1112
class WordSwapInvisibleCharacters(WordSwapDifferentialEvolution):
1213
"""
@@ -17,9 +18,10 @@ class WordSwapInvisibleCharacters(WordSwapDifferentialEvolution):
1718
https://arxiv.org/abs/2106.09898
1819
"""
1920

20-
def __init__(self, **kwargs):
21+
def __init__(self, random_one=False, **kwargs):
2122
super().__init__(**kwargs)
2223
self.invisible_chars = ["\u200B", "\u200C", "\u200D"]
24+
self.random_one = random_one
2325

2426
def _get_bounds(self, current_text: AttackedText, max_perturbs: int, _) -> List[Tuple[int, int]]:
2527
return [(0, len(self.invisible_chars) - 1), (-1, len(current_text.text) - 1)] * max_perturbs
@@ -39,8 +41,23 @@ def apply_perturbation(self, current_text: AttackedText, perturbation_vector: Li
3941

4042
def _get_replacement_words(self, word: str) -> List[str]:
4143
candidate_words = []
42-
for i in range(1, len(word)):
43-
for inv_char in self.invisible_chars:
44-
new_word = word[:i] + inv_char + word[i:]
45-
candidate_words.append(new_word)
44+
if self.random_one:
45+
if len(word) <= 1:
46+
return []
47+
i = np.random.randint(1, len(word)) # insert between characters
48+
inv_char = np.random.choice(self.invisible_chars)
49+
new_word = word[:i] + inv_char + word[i:]
50+
candidate_words.append(new_word)
51+
else:
52+
for i in range(1, len(word)): # start at 1 to avoid invisible prefix
53+
for inv_char in self.invisible_chars:
54+
new_word = word[:i] + inv_char + word[i:]
55+
candidate_words.append(new_word)
4656
return candidate_words
57+
58+
@property
59+
def deterministic(self):
60+
return not self.random_one
61+
62+
def extra_repr_keys(self):
63+
return super().extra_repr_keys()

textattack/transformations/word_swaps/word_swap_reorderings.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from typing import List, Tuple, Union
99
from textattack.shared import AttackedText
1010
from dataclasses import dataclass
11+
import numpy as np
1112

1213
class WordSwapReorderings(WordSwapDifferentialEvolution):
1314
"""
@@ -17,7 +18,7 @@ class WordSwapReorderings(WordSwapDifferentialEvolution):
1718
https://arxiv.org/abs/2106.09898
1819
"""
1920

20-
def __init__(self, **kwargs):
21+
def __init__(self, random_one=False, **kwargs):
2122
super().__init__(**kwargs)
2223
self.PDF = chr(0x202C)
2324
self.LRE = chr(0x202A)
@@ -27,6 +28,7 @@ def __init__(self, **kwargs):
2728
self.PDI = chr(0x2069)
2829
self.LRI = chr(0x2066)
2930
self.RLI = chr(0x2067)
31+
self.random_one = random_one
3032

3133
@dataclass(eq=True, repr=True)
3234
class _Swap:
@@ -68,9 +70,27 @@ def apply_perturbation(self, current_text: AttackedText, perturbation_vector: Li
6870
def _get_replacement_words(self, word: str) -> List[str]:
6971
candidate_words = []
7072
chars = list(word)
71-
for i in range(len(chars) - 1):
73+
74+
if self.random_one:
75+
if len(chars) < 2:
76+
return []
77+
i = np.random.randint(0, len(chars) - 1)
7278
perturbed = chars[:]
7379
perturbed[i:i+2] = [self._Swap(chars[i+1], chars[i])]
7480
transformed = self._apply_swaps(perturbed)
7581
candidate_words.append(transformed)
76-
return candidate_words
82+
else:
83+
for i in range(len(chars) - 1):
84+
perturbed = chars[:]
85+
perturbed[i:i+2] = [self._Swap(chars[i+1], chars[i])]
86+
transformed = self._apply_swaps(perturbed)
87+
candidate_words.append(transformed)
88+
89+
return candidate_words
90+
91+
@property
92+
def deterministic(self):
93+
return not self.random_one
94+
95+
def extra_repr_keys(self):
96+
return super().extra_repr_keys()

0 commit comments

Comments
 (0)