Source code for hivetracered.evaluators.model_evaluator

from typing import Dict, Any, Union, List, Optional, AsyncGenerator
from hivetracered.evaluators.base_evaluator import BaseEvaluator
from hivetracered.models.base_model import Model
from abc import abstractmethod
[docs] class ModelEvaluator(BaseEvaluator): """ Evaluator that uses a language model to evaluate prompts and responses. This class can be extended to implement specific evaluation methodologies. """
[docs] def __init__(self, model: Model, evaluation_prompt_template: str, name: Optional[str] = None, description: Optional[str] = None): """ Initialize the model-based evaluator. Args: model: The model to use for evaluation evaluation_prompt_template: Template string for formatting the evaluation prompt name: Optional name for the evaluator description: Optional description for the evaluator """ self.model = model self.evaluation_prompt_template = evaluation_prompt_template self._name = name self._description = description
def _get_extra_format_kwargs(self) -> Dict[str, Any]: """ Return extra keyword arguments for the evaluation prompt template. Override in subclasses to inject additional placeholders (e.g. {goal}). """ return {}
[docs] def create_evaluation_prompt(self, prompt: Union[str, List[Dict[str, str]]], response: str) -> str: """ Create the evaluation prompt to send to the model. Args: prompt: The original prompt as a string or list of messages response: The model's response to evaluate Returns: The formatted evaluation prompt """ # Extract the prompt text if isinstance(prompt, str): prompt_text = prompt elif isinstance(prompt, list) and all(isinstance(item, dict) for item in prompt): # For chat-style prompts, concatenate all roles and content prompt_text = "\n".join([f"{msg.get('role', 'unknown')}: {msg.get('content', '')}" for msg in prompt if "content" in msg]) else: prompt_text = str(prompt) # Extract the response text if hasattr(response, 'content'): response_text = response.content elif isinstance(response, str): response_text = response elif isinstance(response, dict) and "content" in response: response_text = response["content"] else: response_text = str(response) # Format the evaluation prompt return self.evaluation_prompt_template.format( prompt=prompt_text, response=response_text, **self._get_extra_format_kwargs(), )
[docs] def evaluate(self, prompt: Union[str, List[Dict[str, str]]], response: str) -> Dict[str, Any]: """ Evaluate a model response by using another model as evaluator. Args: prompt: The original prompt as a string or list of messages response: The model's response to evaluate Returns: A dictionary containing evaluation results """ evaluation_prompt = self.create_evaluation_prompt(prompt, response) evaluation_response = self.model.invoke(evaluation_prompt) return self._parse_evaluation_response(evaluation_response)
@abstractmethod def _parse_evaluation_response(self, evaluation_response: Dict[str, Any]) -> Dict[str, Any]: """ Parse the evaluation response from the model. """ pass
[docs] async def stream_abatch(self, prompts: List[Dict[str, str]], responses: List[Any]) -> AsyncGenerator[Dict[str, Any], None]: """ Stream a batch of evaluations. """ evaluation_prompts = [self.create_evaluation_prompt(prompt, response) for prompt, response in zip(prompts, responses)] async for evaluation_response in self.model.stream_abatch(evaluation_prompts): yield self._parse_evaluation_response(evaluation_response)
[docs] def get_name(self) -> str: """ Get the name of the evaluator. Returns: The name of the evaluator """ if self._name: return self._name return "Model-based Evaluator"
[docs] def get_description(self) -> str: """ Get the description of the evaluator. Returns: A description of what the evaluator does """ if self._description: return self._description return "Evaluates responses using a language model to analyze prompt-response pairs"
[docs] def get_params(self) -> Dict[str, Any]: """ Get the parameters of the evaluator. """ return { **self.model.get_params(), "evaluation_prompt_template": self.evaluation_prompt_template, "name": self.get_name(), "description": self.get_description(), }