Skip to content

Commit d35e1e3

Browse files
committed
docs
1 parent 7126f22 commit d35e1e3

5 files changed

Lines changed: 83 additions & 44 deletions

File tree

textattack/attack_recipes/bad_characters_2021.py

Lines changed: 59 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -15,65 +15,80 @@ class BadCharacters2021(AttackRecipe):
1515

1616
"""
1717
Imperceptible Perturbations Attack Recipe
18-
=======================================================
18+
=========================================
1919
20-
Implements imperceptible adversarial attacks on NLP models as outlined in the Bad Characters paper
21-
https://arxiv.org/abs/2106.09898.
20+
Implements imperceptible adversarial attacks on NLP models as outlined in the
21+
`Bad Characters paper <https://arxiv.org/abs/2106.09898>`_.
2222
23-
This recipe combines imperceptible transformations with the Differential Evolution
24-
search method. It supports a variety of goal functions (targeted, untargeted,
23+
This recipe combines imperceptible transformations with the Differential Evolution
24+
search method. It supports a variety of goal functions (targeted, untargeted,
2525
NER, translation) and several types of character-level perturbations.
2626
27-
Transformations supported:
28-
- WordSwapInvisibleCharacters: injects invisible Unicode characters
29-
- WordSwapHomoglyphSwap: replaces characters with homoglyphs
30-
- WordSwapDeletions: inserts deletion control characters
31-
- WordSwapReorderings: inserts reordering control characters
27+
**Transformations supported:**
3228
33-
Goal functions supported:
34-
- Targeted classification (probability output)
35-
- Strict targeted classification (probability output)
36-
- Named Entity Recognition (list of entity dicts output)
37-
- Logit sum (for logits-based classifiers like toxic comment detection)
38-
- Translation BLEU score minimization
39-
- Translation Levenshtein distance maximization
29+
- ``WordSwapInvisibleCharacters``: injects invisible Unicode characters
30+
- ``WordSwapHomoglyphSwap``: replaces characters with homoglyphs
31+
- ``WordSwapDeletions``: inserts deletion control characters
32+
- ``WordSwapReorderings``: inserts reordering control characters
33+
34+
**Goal functions supported:**
35+
36+
- ``TargetedClassification``
37+
- ``TargetedStrict``
38+
- ``TargetedBonus``
39+
- ``LogitSum`` (for logits-based classifiers like toxic comment detection)
40+
- ``MinimizeBleu`` (translation BLEU score minimization)
41+
- ``MaximizeLevenshtein`` (translation Levenshtein distance maximization)
4042
4143
All transformations are compatible with all goal functions.
4244
43-
Note: This recipe assumes the model wrapper is compatible with the goal function
44-
chosen. For example, a Named Entity Recognition goal function expects a model wrapper
45-
that outputs a list of dictionaries per input, while classification goals expect
46-
probability or logit arrays.
45+
Note:
46+
This recipe assumes the model wrapper is compatible with the goal function chosen.
47+
For example, a ``NamedEntityRecognition`` goal function expects a model wrapper
48+
that outputs a list of dictionaries per input, while ``LogitSum`` expects an array of logits.
4749
"""
4850

4951
@staticmethod
5052
def build(model_wrapper, goal_function_type: str, perturbation_type: str = None, allow_skip: bool = False, perturbs=1, popsize=32, maxiter=10, **goal_function_kwargs):
5153
"""
5254
Builds an imperceptible attack instance.
5355
54-
Args:
55-
model_wrapper: A TextAttack model wrapper compatible with the selected goal function.
56-
goal_function_type (str, optional): One of:
57-
- "targeted_classification": targeted attack on a classification model (default).
58-
- "targeted_strict": stricter targeted attack on a classification model.
59-
- "targeted_bonus": targeted attack on a classification model that gives a bonus score of 1 if the prediction for the target class is the max of all classes.
60-
- "named_entity_recognition": token-level targeted attack on a NER model.
61-
- "logit_sum": untargeted attack minimizing total logits.
62-
- "minimize_bleu": attack minimizing BLEU score between original and perturbed translations.
63-
- "maximize_levenshtein": attack maximizing Levenshtein distance between original and perturbed translations.
64-
perturbation_type (str, optional): One of:
65-
- "homoglyphs" (default)
66-
- "invisible"
67-
- "deletions"
68-
- "reorderings"
69-
allow_skip (bool): If set to False, the attack will continue even if attacking the unperturbed input string already completes the goal. Set to False in the paper.
70-
perturbs (int): Maximum number of perturbations allowed per input string. Values from 1 to 5 were used in the paper.
71-
popsize (int): Population size for differential evolution. Set to 32 in the paper.
72-
maxiter (int): Maximum number of generations for differential evolution. Set to 10 in the paper.
73-
**goal_function_kwargs: Additional arguments passed to the goal function.
74-
75-
Returns:
76-
textattack.Attack: Configured Attack instance.
56+
Parameters
57+
----------
58+
model_wrapper : ModelWrapper
59+
A TextAttack model wrapper compatible with the selected goal function.
60+
goal_function_type : str, optional
61+
Goal function type. One of:
62+
63+
- ``"targeted_classification"``: targeted attack on a classification model (default).
64+
- ``"targeted_strict"``: stricter targeted attack.
65+
- ``"targeted_bonus"``: bonus if prediction for target class is highest.
66+
- ``"named_entity_recognition"``: token-level NER attack.
67+
- ``"logit_sum"``: untargeted attack minimizing total logits.
68+
- ``"minimize_bleu"``: translation attack minimizing BLEU.
69+
- ``"maximize_levenshtein"``: translation attack maximizing Levenshtein distance.
70+
perturbation_type : str, optional
71+
Type of character-level perturbation. One of:
72+
73+
- ``"homoglyphs"`` (default)
74+
- ``"invisible"``
75+
- ``"deletions"``
76+
- ``"reorderings"``
77+
allow_skip : bool
78+
If False, the attack will continue even if the goal is already satisfied.
79+
perturbs : int
80+
Maximum number of perturbations allowed per input string.
81+
popsize : int
82+
Population size for differential evolution. Typically 32.
83+
maxiter : int
84+
Maximum number of generations for differential evolution. Typically 10.
85+
**goal_function_kwargs : dict
86+
Additional arguments passed to the goal function.
87+
88+
Returns
89+
-------
90+
textattack.Attack
91+
Configured Attack instance.
7792
"""
7893

7994
if goal_function_type == "targeted_classification":

textattack/goal_functions/custom/logit_sum.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
"""
2+
3+
Goal Function for Logit sum
4+
-------------------------------------------------------
5+
"""
6+
17
from textattack.goal_functions import GoalFunction
28
from textattack.goal_function_results import LogitSumGoalFunctionResult
39
import torch

textattack/goal_functions/custom/named_entity_recognition.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
"""
2+
3+
Goal Function for NamedEntityRecognition
4+
-------------------------------------------------------
5+
"""
6+
17
from textattack.goal_functions import GoalFunction
28
from textattack.goal_function_results import NamedEntityRecognitionGoalFunctionResult
39
import numpy as np

textattack/goal_functions/custom/targeted_bonus.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
"""
2+
3+
Goal Function for Targeted classification with bonus score
4+
------------------------------------------------------------
5+
"""
6+
17
from textattack.goal_functions import GoalFunction
28
from textattack.goal_function_results import TargetedBonusGoalFunctionResult
39
import numpy as np

textattack/goal_functions/custom/targeted_strict.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
"""
2+
3+
Goal Function for Strict targeted classification
4+
-------------------------------------------------------
5+
"""
6+
17
from textattack.goal_functions import GoalFunction
28
from textattack.goal_function_results import TargetedStrictGoalFunctionResult
39
import numpy as np

0 commit comments

Comments
 (0)