Source code for hivetracered.attacks.composed_attack

from typing import Union, List, Dict, Optional, AsyncGenerator
from hivetracered.attacks.base_attack import BaseAttack

[docs] class ComposedAttack(BaseAttack): """ An attack that composes two attacks sequentially, where the output of the inner attack becomes the input to the outer attack, creating a pipeline of transformations. """
[docs] def __init__(self, outer_attack: BaseAttack, inner_attack: BaseAttack, name: Optional[str] = None, description: Optional[str] = None): """ Initialize a composed attack with inner and outer attack components. Args: outer_attack: The attack to apply second in the composition inner_attack: The attack to apply first in the composition name: Optional custom name for the attack (defaults to "Composed(outer ∘ inner)") description: Optional custom description (defaults to composition description) """ self.outer_attack = outer_attack self.inner_attack = inner_attack self._name = name or f"Composed({outer_attack.get_name()}{inner_attack.get_name()})" self._description = description or f"Composes {outer_attack.get_name()} after {inner_attack.get_name()}"
[docs] def apply(self, prompt: Union[str, List[Dict[str, str]]]) -> Union[str, List[Dict[str, str]]]: """ Apply the inner attack followed by the outer attack to the given prompt. Args: prompt: A string or list of messages to apply the attacks to Returns: The transformed prompt with both attacks applied sequentially """ inner_result = self.inner_attack.apply(prompt) return self.outer_attack.apply(inner_result)
[docs] async def stream_abatch(self, prompts: List[Union[str, List[Dict[str, str]]]]) -> AsyncGenerator[List[Union[str, List[Dict[str, str]]]], None]: """ Apply the composition of attacks to a batch of prompts asynchronously. Args: prompts: A list of prompts to apply the attacks to Returns: An async generator yielding transformed prompts """ # First, apply inner attack to all prompts inner_results = [] async for result in self.inner_attack.stream_abatch(prompts): inner_results.append(result) # Then, apply outer attack to all results async for result in self.outer_attack.stream_abatch(inner_results): yield result
[docs] def get_name(self) -> str: """ Get the name of the attack. Returns: The custom name if provided, otherwise a generated name based on component attacks """ return self._name
[docs] def get_description(self) -> str: """ Get the description of the attack. Returns: The custom description if provided, otherwise a generated description """ return self._description
[docs] def get_params(self): """ Get the parameters of the attack. Returns: A dictionary containing both the inner and outer attack parameters """ return { "outer_attack": self.outer_attack.get_params(), "inner_attack": self.inner_attack.get_params(), "name": self.get_name(), "description": self.get_description(), }