EvaluatorCallbackHandler#

class langchain_core.tracers.evaluation.EvaluatorCallbackHandler(

evaluators: Sequence[langsmith.RunEvaluator],

client: langsmith.Client | None = None,

example_id: UUID | str | None = None,

skip_unfinished: bool = True,

project_name: str | None = 'evaluators',

max_concurrency: int | None = None,

**kwargs: Any,

)[source]#

Tracer that runs a run evaluator whenever a run is persisted.

example_id#: Union[UUID, None] The example ID associated with the runs.

client#: Client The LangSmith client instance used for evaluating the runs.

evaluators#: Sequence[RunEvaluator] The sequence of run evaluators to be executed.

executor#: ThreadPoolExecutor The thread pool executor used for running the evaluators.

futures#: set[Future] The set of futures representing the running evaluators.

skip_unfinished#: bool Whether to skip runs that are not finished or raised an error.

project_name#: Optional[str] The LangSmith project name to be organize eval chain runs under.

Create an EvaluatorCallbackHandler.

Parameters:

evaluators (Sequence[langsmith.RunEvaluator]) – Sequence[RunEvaluator] The run evaluators to apply to all top level runs.
client (Optional[langsmith.Client]) – LangSmith Client, optional The LangSmith client instance to use for evaluating the runs. If not specified, a new instance will be created.
example_id (Optional[Union[UUID, str]]) – Union[UUID, str], optional The example ID to be associated with the runs.
skip_unfinished (bool) – bool, optional Whether to skip unfinished runs.
project_name (Optional[str]) – str, optional The LangSmith project name to be organize eval chain runs under.
max_concurrency (Optional[int]) – int, optional The maximum number of concurrent evaluators to run.
kwargs (Any)

Attributes

`ignore_agent`	Whether to ignore agent callbacks.
`ignore_chain`	Whether to ignore chain callbacks.
`ignore_chat_model`	Whether to ignore chat model callbacks.
`ignore_custom_event`	Ignore custom event.
`ignore_llm`	Whether to ignore LLM callbacks.
`ignore_retriever`	Whether to ignore retriever callbacks.
`ignore_retry`	Whether to ignore retry callbacks.
`log_missing_parent`
`name`
`raise_error`	Whether to raise an error if an exception occurs.
`run_inline`	Whether to run the callback inline.

Methods

`__init__`(evaluators[, client, example_id, ...])	Create an EvaluatorCallbackHandler.
`on_agent_action`(action, *, run_id[, ...])	Run on agent action.
`on_agent_finish`(finish, *, run_id[, ...])	Run on the agent end.
`on_chain_end`(outputs, *, run_id[, inputs])	End a trace for a chain run.
`on_chain_error`(error, *[, inputs])	Handle an error for a chain run.
`on_chain_start`(serialized, inputs, *, run_id)	Start a trace for a chain run.
`on_chat_model_start`(serialized, messages, *, ...)	Start a trace for an LLM run.
`on_custom_event`(name, data, *, run_id[, ...])	Override to define a handler for a custom event.
`on_llm_end`(response, , run_id, *kwargs)	End a trace for an LLM run.
`on_llm_error`(error, , run_id, *kwargs)	Handle an error for an LLM run.
`on_llm_new_token`(token, *[, chunk, ...])	Run on new LLM token.
`on_llm_start`(serialized, prompts, *, run_id)	Start a trace for an LLM run.
`on_retriever_end`(documents, , run_id, *kwargs)	Run when the Retriever ends running.
`on_retriever_error`(error, , run_id, *kwargs)	Run when Retriever errors.
`on_retriever_start`(serialized, query, *, run_id)	Run when the Retriever starts running.
`on_retry`(retry_state, , run_id, *kwargs)	Run on retry.
`on_text`(text, *, run_id[, parent_run_id])	Run on an arbitrary text.
`on_tool_end`(output, , run_id, *kwargs)	End a trace for a tool run.
`on_tool_error`(error, , run_id, *kwargs)	Handle an error for a tool run.
`on_tool_start`(serialized, input_str, *, run_id)	Start a trace for a tool run.
`wait_for_futures`()	Wait for all futures to complete.

__init__(

evaluators: Sequence[langsmith.RunEvaluator],

client: langsmith.Client | None = None,

example_id: UUID | str | None = None,

skip_unfinished: bool = True,

project_name: str | None = 'evaluators',

max_concurrency: int | None = None,

**kwargs: Any,

) → None[source]#

Create an EvaluatorCallbackHandler.

Parameters:

evaluators (Sequence[langsmith.RunEvaluator]) – Sequence[RunEvaluator] The run evaluators to apply to all top level runs.
client (Optional[langsmith.Client]) – LangSmith Client, optional The LangSmith client instance to use for evaluating the runs. If not specified, a new instance will be created.
example_id (Optional[Union[UUID, str]]) – Union[UUID, str], optional The example ID to be associated with the runs.
skip_unfinished (bool) – bool, optional Whether to skip unfinished runs.
project_name (Optional[str]) – str, optional The LangSmith project name to be organize eval chain runs under.
max_concurrency (Optional[int]) – int, optional The maximum number of concurrent evaluators to run.
kwargs (Any)

Return type:

None

on_agent_action(

action: AgentAction,

*,

run_id: UUID,

parent_run_id: UUID | None = None,

**kwargs: Any,

) → Any#

Run on agent action.

Parameters:

action (AgentAction) – The agent action.
run_id (UUID) – The run ID. This is the ID of the current run.
parent_run_id (UUID) – The parent run ID. This is the ID of the parent run.
kwargs (Any) – Additional keyword arguments.

Return type:

Any

on_agent_finish(

finish: AgentFinish,

*,

run_id: UUID,

parent_run_id: UUID | None = None,

**kwargs: Any,

) → Any#

Run on the agent end.

Parameters:

finish (AgentFinish) – The agent finish.
run_id (UUID) – The run ID. This is the ID of the current run.
parent_run_id (UUID) – The parent run ID. This is the ID of the parent run.
kwargs (Any) – Additional keyword arguments.

Return type:

Any

on_chain_end(

outputs: dict[str, Any],

*,

run_id: UUID,

inputs: dict[str, Any] | None = None,

**kwargs: Any,

) → Run#

End a trace for a chain run.

Parameters:

outputs (dict[str, Any]) – The outputs for the chain.
run_id (UUID) – The run ID.
inputs (Optional[dict[str, Any]]) – The inputs for the chain. Defaults to None.
kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

on_chain_error(

error: BaseException,

*,

inputs: dict[str, Any] | None = None,

run_id: UUID,

**kwargs: Any,

) → Run#

Handle an error for a chain run.

Parameters:

error (BaseException) – The error.
inputs (Optional[dict[str, Any]]) – The inputs for the chain. Defaults to None.
run_id (UUID) – The run ID.
kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

on_chain_start(

serialized: dict[str, Any],

inputs: dict[str, Any],

*,

run_id: UUID,

tags: list[str] | None = None,

parent_run_id: UUID | None = None,

metadata: dict[str, Any] | None = None,

run_type: str | None = None,

name: str | None = None,

**kwargs: Any,

) → Run#

Start a trace for a chain run.

Parameters:

serialized (dict[str, Any]) – The serialized chain.
inputs (dict[str, Any]) – The inputs for the chain.
run_id (UUID) – The run ID.
tags (Optional[list[str]]) – The tags for the run. Defaults to None.
parent_run_id (Optional[UUID]) – The parent run ID. Defaults to None.
metadata (Optional[dict[str, Any]]) – The metadata for the run. Defaults to None.
run_type (Optional[str]) – The type of the run. Defaults to None.
name (Optional[str]) – The name of the run.
kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

on_chat_model_start(

serialized: dict[str, Any],

messages: list[list[BaseMessage]],

*,

run_id: UUID,

tags: list[str] | None = None,

parent_run_id: UUID | None = None,

metadata: dict[str, Any] | None = None,

name: str | None = None,

**kwargs: Any,

) → Run#

Start a trace for an LLM run.

Parameters:

serialized (dict[str, Any]) – The serialized model.
messages (list[list[BaseMessage]]) – The messages to start the chat with.
run_id (UUID) – The run ID.
tags (Optional[list[str]]) – The tags for the run. Defaults to None.
parent_run_id (Optional[UUID]) – The parent run ID. Defaults to None.
metadata (Optional[dict[str, Any]]) – The metadata for the run. Defaults to None.
name (Optional[str]) – The name of the run.
kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

on_custom_event(

name: str,

data: Any,

*,

run_id: UUID,

tags: list[str] | None = None,

metadata: dict[str, Any] | None = None,

**kwargs: Any,

) → Any#

Override to define a handler for a custom event.

Parameters:

name (str) – The name of the custom event.
data (Any) – The data for the custom event. Format will match the format specified by the user.
run_id (UUID) – The ID of the run.
tags (Optional[list[str]]) – The tags associated with the custom event (includes inherited tags).
metadata (Optional[dict[str, Any]]) – The metadata associated with the custom event (includes inherited metadata).
kwargs (Any)

Return type:

Any

Added in version 0.2.15.

on_llm_end(

response: LLMResult,

*,

run_id: UUID,

**kwargs: Any,

) → Run#

End a trace for an LLM run.

Parameters:

response (LLMResult) – The response.
run_id (UUID) – The run ID.
kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

on_llm_error(

error: BaseException,

*,

run_id: UUID,

**kwargs: Any,

) → Run#

Handle an error for an LLM run.

Parameters:

error (BaseException) – The error.
run_id (UUID) – The run ID.
kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

on_llm_new_token(

token: str,

*,

chunk: GenerationChunk | ChatGenerationChunk | None = None,

run_id: UUID,

parent_run_id: UUID | None = None,

**kwargs: Any,

) → Run#

Run on new LLM token. Only available when streaming is enabled.

Parameters:

token (str) – The token.
chunk (Optional[Union[GenerationChunk, ChatGenerationChunk]]) – The chunk. Defaults to None.
run_id (UUID) – The run ID.
parent_run_id (Optional[UUID]) – The parent run ID. Defaults to None.
kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

on_llm_start(

serialized: dict[str, Any],

prompts: list[str],

*,

run_id: UUID,

tags: list[str] | None = None,

parent_run_id: UUID | None = None,

metadata: dict[str, Any] | None = None,

name: str | None = None,

**kwargs: Any,

) → Run#

Start a trace for an LLM run.

Parameters:

serialized (dict[str, Any]) – The serialized model.
prompts (list[str]) – The prompts to start the LLM with.
run_id (UUID) – The run ID.
tags (Optional[list[str]]) – The tags for the run. Defaults to None.
parent_run_id (Optional[UUID]) – The parent run ID. Defaults to None.
metadata (Optional[dict[str, Any]]) – The metadata for the run. Defaults to None.
name (Optional[str]) – The name of the run.
kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

on_retriever_end(

documents: Sequence[Document],

*,

run_id: UUID,

**kwargs: Any,

) → Run#

Run when the Retriever ends running.

Parameters:

documents (Sequence[Document]) – The documents.
run_id (UUID) – The run ID.
kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

on_retriever_error(

error: BaseException,

*,

run_id: UUID,

**kwargs: Any,

) → Run#

Run when Retriever errors.

Parameters:

error (BaseException) – The error.
run_id (UUID) – The run ID.
kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

on_retriever_start(

serialized: dict[str, Any],

query: str,

*,

run_id: UUID,

parent_run_id: UUID | None = None,

tags: list[str] | None = None,

metadata: dict[str, Any] | None = None,

name: str | None = None,

**kwargs: Any,

) → Run#

Run when the Retriever starts running.

Parameters:

serialized (dict[str, Any]) – The serialized retriever.
query (str) – The query.
run_id (UUID) – The run ID.
parent_run_id (Optional[UUID]) – The parent run ID. Defaults to None.
tags (Optional[list[str]]) – The tags for the run. Defaults to None.
metadata (Optional[dict[str, Any]]) – The metadata for the run. Defaults to None.
name (Optional[str]) – The name of the run.
kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

on_retry(

retry_state: RetryCallState,

*,

run_id: UUID,

**kwargs: Any,

) → Run#

Run on retry.

Parameters:

retry_state (RetryCallState) – The retry state.
run_id (UUID) – The run ID.
kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

on_text(

text: str,

*,

run_id: UUID,

parent_run_id: UUID | None = None,

**kwargs: Any,

) → Any#

Run on an arbitrary text.

Parameters:

text (str) – The text.
run_id (UUID) – The run ID. This is the ID of the current run.
parent_run_id (UUID) – The parent run ID. This is the ID of the parent run.
kwargs (Any) – Additional keyword arguments.

Return type:

Any

on_tool_end(

output: Any,

*,

run_id: UUID,

**kwargs: Any,

) → Run#

End a trace for a tool run.

Parameters:

output (Any) – The output for the tool.
run_id (UUID) – The run ID.
kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

on_tool_error(

error: BaseException,

*,

run_id: UUID,

**kwargs: Any,

) → Run#

Handle an error for a tool run.

Parameters:

error (BaseException) – The error.
run_id (UUID) – The run ID.
kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

on_tool_start(

serialized: dict[str, Any],

input_str: str,

*,

run_id: UUID,

tags: list[str] | None = None,

parent_run_id: UUID | None = None,

metadata: dict[str, Any] | None = None,

name: str | None = None,

inputs: dict[str, Any] | None = None,

**kwargs: Any,

) → Run#

Start a trace for a tool run.

Parameters:

serialized (dict[str, Any]) – The serialized tool.
input_str (str) – The input string.
run_id (UUID) – The run ID.
tags (Optional[list[str]]) – The tags for the run. Defaults to None.
parent_run_id (Optional[UUID]) – The parent run ID. Defaults to None.
metadata (Optional[dict[str, Any]]) – The metadata for the run. Defaults to None.
name (Optional[str]) – The name of the run.
inputs (Optional[dict[str, Any]]) – The inputs for the tool.
kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

wait_for_futures() → None[source]#

Wait for all futures to complete.

Return type:: None