Source code for hivetracered.attacks.algo_attack
from typing import Union, List, Dict, Optional, AsyncGenerator
from abc import ABC, abstractmethod
from hivetracered.attacks.template_attack import TemplateAttack
[docs]
class AlgoAttack(TemplateAttack, ABC):
"""
Abstract base class for algorithmic attacks that apply transformations to text.
Provides options to apply transformations with or without instructions, giving flexibility
to deliver raw transformations or transformations wrapped in template instructions.
"""
[docs]
def __init__(self, raw: bool = False, template: Optional[str] = None, name: Optional[str] = None, description: Optional[str] = None):
"""
Initialize the algorithmic attack.
Args:
raw: If True, applies the transformation without instructions; if False, wraps with template
template: Custom instruction template with '{prompt}' placeholder; uses default if None
name: Optional name for the attack (defaults to class name)
description: Optional description for the attack
"""
super().__init__(template=template, name=name, description=description)
self.raw = raw
[docs]
def apply(self, prompt: Union[str, List[Dict[str, str]]]) -> Union[str, List[Dict[str, str]]]:
"""
Apply the attack to the given prompt, with or without instructions based on the raw flag.
Args:
prompt: A string or list of messages to apply the attack to
Returns:
The transformed prompt with the attack applied
"""
if isinstance(prompt, str):
transformed_text = self.transform(prompt)
if self.raw:
return transformed_text
else:
instruction_prompt = self.template.format(prompt=transformed_text)
return instruction_prompt
else: # For list of messages format
result = prompt.copy()
for i in range(len(result) - 1, -1, -1):
if result[i].get("role") == "user":
content = result[i].get("content", "")
transformed_text = self.transform(content)
if self.raw:
result[i]["content"] = transformed_text
else:
instruction_prompt = self.template.format(prompt=transformed_text)
result[i]["content"] = instruction_prompt
return result
[docs]
async def stream_abatch(self, prompts: List[Union[str, List[Dict[str, str]]]]) -> AsyncGenerator[List[Union[str, List[Dict[str, str]]]], None]:
"""
Apply the attack to a batch of prompts asynchronously.
Args:
prompts: A list of prompts to apply the attack to
Returns:
An async generator yielding transformed prompts
"""
for prompt in prompts:
yield self.apply(prompt)
def _get_mode(self, raw: bool = False) -> str:
"""
Get the mode of the attack.
Args:
raw: Whether the mode is raw or templated
Returns:
String indicating the attack mode
"""
return " Raw" if raw else ""