Spaces:

anonymous8
/

Rapid-Textual-Adversarial-Defense

Paused

Rapid-Textual-Adversarial-Defense / textattack /transformations /word_merges /word_merge.py

anonymous8/RPD-Demo

initial commit

4943752 about 3 years ago

3.56 kB

	"""
	Word Merge
	------------------------------------------------
	Word Merge transformations act by taking two adjacent words, and "merges" them into one word by deleting one word and replacing another.
	For example, if we can merge the words "the" and "movie" in the text "I like the movie" and get following text: "I like film".
	When we choose to "merge" word at index ``i``, we merge it with the next word at ``i+1``.
	"""
	from textattack.transformations import Transformation


	class WordMerge(Transformation):
	"""An abstract class for word merges."""

	def __call__(
	self,
	current_text,
	pre_transformation_constraints=[],
	indices_to_modify=None,
	shifted_idxs=True,
	):
	"""Returns a list of all possible transformations for ``current_text``.
	Applies the ``pre_transformation_constraints`` then calls
	``_get_transformations``.

	Args:
	current_text: The ``AttackedText`` to transform.
	pre_transformation_constraints: The ``PreTransformationConstraint`` to apply before
	beginning the transformation.
	indices_to_modify: Which word indices should be modified as dictated by the
	``SearchMethod``.
	shifted_idxs (bool): Whether indices have been shifted from
	their original position in the text.
	"""
	if indices_to_modify is None:
	indices_to_modify = set(range(len(current_text.words) - 1))
	else:
	indices_to_modify = set(indices_to_modify)

	if shifted_idxs:
	indices_to_modify = set(
	current_text.convert_from_original_idxs(indices_to_modify)
	)

	for constraint in pre_transformation_constraints:
	allowed_indices = constraint(current_text, self)
	for i in indices_to_modify:
	if i not in allowed_indices and i + 1 not in allowed_indices:
	indices_to_modify.remove(i)

	transformed_texts = self._get_transformations(current_text, indices_to_modify)
	for text in transformed_texts:
	text.attack_attrs["last_transformation"] = self
	if len(text.attack_attrs["newly_modified_indices"]) == 0:
	print("xcv", text, len(text.attack_attrs["newly_modified_indices"]))
	return transformed_texts

	def _get_new_words(self, current_text, index):
	"""Returns a set of new words we can insert at position `index` of `current_text`
	Args:
	current_text (AttackedText): Current text to modify.
	index (int): Position in which to insert a new word
	Returns:
	list[str]: List of new words to insert.
	"""
	raise NotImplementedError()

	def _get_transformations(self, current_text, indices_to_modify):
	"""
	Return a set of transformed texts obtained by insertion a new word in `indices_to_modify`
	Args:
	current_text (AttackedText): Current text to modify.
	indices_to_modify (list[int]): List of positions in which to insert a new word.

	Returns:
	list[AttackedText]: List of transformed texts
	"""
	transformed_texts = []

	for i in indices_to_modify:
	new_words = self._get_new_words(current_text, i)

	for w in new_words:
	temp_text = current_text.replace_word_at_index(i, w)
	transformed_texts.append(temp_text.delete_word_at_index(i + 1))

	return transformed_texts