Source code for hivetracered.attacks.composed_attack

from collections.abc import AsyncGenerator
from hivetracered.attacks.base_attack import BaseAttack


[docs]
class ComposedAttack(BaseAttack):
    """
    An attack that composes two attacks sequentially, where the output of the inner attack 
    becomes the input to the outer attack, creating a pipeline of transformations.
    """

[docs]
    def __init__(self, outer_attack: BaseAttack, inner_attack: BaseAttack, name: str | None = None, description: str | None = None):
        """
        Initialize a composed attack with inner and outer attack components.
        
        Args:
            outer_attack: The attack to apply second in the composition
            inner_attack: The attack to apply first in the composition
            name: Optional custom name for the attack (defaults to "Composed(outer ∘ inner)")
            description: Optional custom description (defaults to composition description)
        """
        self.outer_attack = outer_attack
        self.inner_attack = inner_attack
        self._name = name or f"Composed({outer_attack.get_name()} ∘ {inner_attack.get_name()})"
        self._description = description or f"Composes {outer_attack.get_name()} after {inner_attack.get_name()}"



[docs]
    def apply(self, prompt: str | list[dict[str, str]]) -> str | list[dict[str, str]]:
        """
        Apply the inner attack followed by the outer attack to the given prompt.
        
        Args:
            prompt: A string or list of messages to apply the attacks to
            
        Returns:
            The transformed prompt with both attacks applied sequentially
        """
        inner_result = self.inner_attack.apply(prompt)
        return self.outer_attack.apply(inner_result)



[docs]
    async def stream_abatch(self, prompts: list[str | list[dict[str, str]]]) -> AsyncGenerator[list[str | list[dict[str, str]]], None]:
        """
        Apply the composition of attacks to a batch of prompts asynchronously.
        
        Args:
            prompts: A list of prompts to apply the attacks to
            
        Returns:
            An async generator yielding transformed prompts
        """
        # First, apply inner attack to all prompts
        inner_results = []
        async for result in self.inner_attack.stream_abatch(prompts):
            inner_results.append(result)
        # Then, apply outer attack to all results
        async for result in self.outer_attack.stream_abatch(inner_results):
            yield result



[docs]
    def get_name(self) -> str:
        """
        Get the name of the attack.
        
        Returns:
            The custom name if provided, otherwise a generated name based on component attacks
        """
        return self._name



[docs]
    def get_description(self) -> str:
        """
        Get the description of the attack.
        
        Returns:
            The custom description if provided, otherwise a generated description
        """
        return self._description



[docs]
    def get_params(self):
        """
        Get the parameters of the attack.
        
        Returns:
            A dictionary containing both the inner and outer attack parameters
        """
        return {
            "outer_attack": self.outer_attack.get_params(),
            "inner_attack": self.inner_attack.get_params(),
            "name": self.get_name(),
            "description": self.get_description(),
        }