EvaluatorCallbackHandler#

class langchain_core.tracers.evaluation.EvaluatorCallbackHandler(
evaluators: Sequence[langsmith.RunEvaluator],
client: langsmith.Client | None = None,
example_id: UUID | str | None = None,
skip_unfinished: bool = True,
project_name: str | None = 'evaluators',
max_concurrency: int | None = None,
**kwargs: Any,
)[source]#

Tracer that runs a run evaluator whenever a run is persisted.

example_id#

Union[UUID, None] The example ID associated with the runs.

client#

Client The LangSmith client instance used for evaluating the runs.

evaluators#

Sequence[RunEvaluator] The sequence of run evaluators to be executed.

executor#

ThreadPoolExecutor The thread pool executor used for running the evaluators.

futures#

set[Future] The set of futures representing the running evaluators.

skip_unfinished#

bool Whether to skip runs that are not finished or raised an error.

project_name#

Optional[str] The LangSmith project name to be organize eval chain runs under.

Create an EvaluatorCallbackHandler.

Parameters:
  • evaluators (Sequence[langsmith.RunEvaluator]) – Sequence[RunEvaluator] The run evaluators to apply to all top level runs.

  • client (Optional[langsmith.Client]) – LangSmith Client, optional The LangSmith client instance to use for evaluating the runs. If not specified, a new instance will be created.

  • example_id (Optional[Union[UUID, str]]) – Union[UUID, str], optional The example ID to be associated with the runs.

  • skip_unfinished (bool) – bool, optional Whether to skip unfinished runs.

  • project_name (Optional[str]) – str, optional The LangSmith project name to be organize eval chain runs under.

  • max_concurrency (Optional[int]) – int, optional The maximum number of concurrent evaluators to run.

  • kwargs (Any)

Attributes

ignore_agent

Whether to ignore agent callbacks.

ignore_chain

Whether to ignore chain callbacks.

ignore_chat_model

Whether to ignore chat model callbacks.

ignore_custom_event

Ignore custom event.

ignore_llm

Whether to ignore LLM callbacks.

ignore_retriever

Whether to ignore retriever callbacks.

ignore_retry

Whether to ignore retry callbacks.

log_missing_parent

name

raise_error

Whether to raise an error if an exception occurs.

run_inline

Whether to run the callback inline.

Methods

__init__(evaluators[, client, example_id, ...])

Create an EvaluatorCallbackHandler.

on_agent_action(action, *, run_id[, ...])

Run on agent action.

on_agent_finish(finish, *, run_id[, ...])

Run on the agent end.

on_chain_end(outputs, *, run_id[, inputs])

End a trace for a chain run.

on_chain_error(error, *[, inputs])

Handle an error for a chain run.

on_chain_start(serialized, inputs, *, run_id)

Start a trace for a chain run.

on_chat_model_start(serialized, messages, *, ...)

Start a trace for an LLM run.

on_custom_event(name, data, *, run_id[, ...])

Override to define a handler for a custom event.

on_llm_end(response, *, run_id, **kwargs)

End a trace for an LLM run.

on_llm_error(error, *, run_id, **kwargs)

Handle an error for an LLM run.

on_llm_new_token(token, *[, chunk, ...])

Run on new LLM token.

on_llm_start(serialized, prompts, *, run_id)

Start a trace for an LLM run.

on_retriever_end(documents, *, run_id, **kwargs)

Run when the Retriever ends running.

on_retriever_error(error, *, run_id, **kwargs)

Run when Retriever errors.

on_retriever_start(serialized, query, *, run_id)

Run when the Retriever starts running.

on_retry(retry_state, *, run_id, **kwargs)

Run on retry.

on_text(text, *, run_id[, parent_run_id])

Run on an arbitrary text.

on_tool_end(output, *, run_id, **kwargs)

End a trace for a tool run.

on_tool_error(error, *, run_id, **kwargs)

Handle an error for a tool run.

on_tool_start(serialized, input_str, *, run_id)

Start a trace for a tool run.

wait_for_futures()

Wait for all futures to complete.

__init__(
evaluators: Sequence[langsmith.RunEvaluator],
client: langsmith.Client | None = None,
example_id: UUID | str | None = None,
skip_unfinished: bool = True,
project_name: str | None = 'evaluators',
max_concurrency: int | None = None,
**kwargs: Any,
) None[source]#

Create an EvaluatorCallbackHandler.

Parameters:
  • evaluators (Sequence[langsmith.RunEvaluator]) – Sequence[RunEvaluator] The run evaluators to apply to all top level runs.

  • client (Optional[langsmith.Client]) – LangSmith Client, optional The LangSmith client instance to use for evaluating the runs. If not specified, a new instance will be created.

  • example_id (Optional[Union[UUID, str]]) – Union[UUID, str], optional The example ID to be associated with the runs.

  • skip_unfinished (bool) – bool, optional Whether to skip unfinished runs.

  • project_name (Optional[str]) – str, optional The LangSmith project name to be organize eval chain runs under.

  • max_concurrency (Optional[int]) – int, optional The maximum number of concurrent evaluators to run.

  • kwargs (Any)

Return type:

None

on_agent_action(
action: AgentAction,
*,
run_id: UUID,
parent_run_id: UUID | None = None,
**kwargs: Any,
) Any#

Run on agent action.

Parameters:
  • action (AgentAction) – The agent action.

  • run_id (UUID) – The run ID. This is the ID of the current run.

  • parent_run_id (UUID) – The parent run ID. This is the ID of the parent run.

  • kwargs (Any) – Additional keyword arguments.

Return type:

Any

on_agent_finish(
finish: AgentFinish,
*,
run_id: UUID,
parent_run_id: UUID | None = None,
**kwargs: Any,
) Any#

Run on the agent end.

Parameters:
  • finish (AgentFinish) – The agent finish.

  • run_id (UUID) – The run ID. This is the ID of the current run.

  • parent_run_id (UUID) – The parent run ID. This is the ID of the parent run.

  • kwargs (Any) – Additional keyword arguments.

Return type:

Any

on_chain_end(
outputs: dict[str, Any],
*,
run_id: UUID,
inputs: dict[str, Any] | None = None,
**kwargs: Any,
) Run#

End a trace for a chain run.

Parameters:
  • outputs (dict[str, Any]) – The outputs for the chain.

  • run_id (UUID) – The run ID.

  • inputs (Optional[dict[str, Any]]) – The inputs for the chain. Defaults to None.

  • kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

on_chain_error(
error: BaseException,
*,
inputs: dict[str, Any] | None = None,
run_id: UUID,
**kwargs: Any,
) Run#

Handle an error for a chain run.

Parameters:
  • error (BaseException) – The error.

  • inputs (Optional[dict[str, Any]]) – The inputs for the chain. Defaults to None.

  • run_id (UUID) – The run ID.

  • kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

on_chain_start(
serialized: dict[str, Any],
inputs: dict[str, Any],
*,
run_id: UUID,
tags: list[str] | None = None,
parent_run_id: UUID | None = None,
metadata: dict[str, Any] | None = None,
run_type: str | None = None,
name: str | None = None,
**kwargs: Any,
) Run#

Start a trace for a chain run.

Parameters:
  • serialized (dict[str, Any]) – The serialized chain.

  • inputs (dict[str, Any]) – The inputs for the chain.

  • run_id (UUID) – The run ID.

  • tags (Optional[list[str]]) – The tags for the run. Defaults to None.

  • parent_run_id (Optional[UUID]) – The parent run ID. Defaults to None.

  • metadata (Optional[dict[str, Any]]) – The metadata for the run. Defaults to None.

  • run_type (Optional[str]) – The type of the run. Defaults to None.

  • name (Optional[str]) – The name of the run.

  • kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

on_chat_model_start(
serialized: dict[str, Any],
messages: list[list[BaseMessage]],
*,
run_id: UUID,
tags: list[str] | None = None,
parent_run_id: UUID | None = None,
metadata: dict[str, Any] | None = None,
name: str | None = None,
**kwargs: Any,
) Run#

Start a trace for an LLM run.

Parameters:
  • serialized (dict[str, Any]) – The serialized model.

  • messages (list[list[BaseMessage]]) – The messages to start the chat with.

  • run_id (UUID) – The run ID.

  • tags (Optional[list[str]]) – The tags for the run. Defaults to None.

  • parent_run_id (Optional[UUID]) – The parent run ID. Defaults to None.

  • metadata (Optional[dict[str, Any]]) – The metadata for the run. Defaults to None.

  • name (Optional[str]) – The name of the run.

  • kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

on_custom_event(
name: str,
data: Any,
*,
run_id: UUID,
tags: list[str] | None = None,
metadata: dict[str, Any] | None = None,
**kwargs: Any,
) Any#

Override to define a handler for a custom event.

Parameters:
  • name (str) – The name of the custom event.

  • data (Any) – The data for the custom event. Format will match the format specified by the user.

  • run_id (UUID) – The ID of the run.

  • tags (Optional[list[str]]) – The tags associated with the custom event (includes inherited tags).

  • metadata (Optional[dict[str, Any]]) – The metadata associated with the custom event (includes inherited metadata).

  • kwargs (Any)

Return type:

Any

Added in version 0.2.15.

on_llm_end(
response: LLMResult,
*,
run_id: UUID,
**kwargs: Any,
) Run#

End a trace for an LLM run.

Parameters:
  • response (LLMResult) – The response.

  • run_id (UUID) – The run ID.

  • kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

on_llm_error(
error: BaseException,
*,
run_id: UUID,
**kwargs: Any,
) Run#

Handle an error for an LLM run.

Parameters:
  • error (BaseException) – The error.

  • run_id (UUID) – The run ID.

  • kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

on_llm_new_token(
token: str,
*,
chunk: GenerationChunk | ChatGenerationChunk | None = None,
run_id: UUID,
parent_run_id: UUID | None = None,
**kwargs: Any,
) Run#

Run on new LLM token. Only available when streaming is enabled.

Parameters:
  • token (str) – The token.

  • chunk (Optional[Union[GenerationChunk, ChatGenerationChunk]]) – The chunk. Defaults to None.

  • run_id (UUID) – The run ID.

  • parent_run_id (Optional[UUID]) – The parent run ID. Defaults to None.

  • kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

on_llm_start(
serialized: dict[str, Any],
prompts: list[str],
*,
run_id: UUID,
tags: list[str] | None = None,
parent_run_id: UUID | None = None,
metadata: dict[str, Any] | None = None,
name: str | None = None,
**kwargs: Any,
) Run#

Start a trace for an LLM run.

Parameters:
  • serialized (dict[str, Any]) – The serialized model.

  • prompts (list[str]) – The prompts to start the LLM with.

  • run_id (UUID) – The run ID.

  • tags (Optional[list[str]]) – The tags for the run. Defaults to None.

  • parent_run_id (Optional[UUID]) – The parent run ID. Defaults to None.

  • metadata (Optional[dict[str, Any]]) – The metadata for the run. Defaults to None.

  • name (Optional[str]) – The name of the run.

  • kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

on_retriever_end(
documents: Sequence[Document],
*,
run_id: UUID,
**kwargs: Any,
) Run#

Run when the Retriever ends running.

Parameters:
  • documents (Sequence[Document]) – The documents.

  • run_id (UUID) – The run ID.

  • kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

on_retriever_error(
error: BaseException,
*,
run_id: UUID,
**kwargs: Any,
) Run#

Run when Retriever errors.

Parameters:
  • error (BaseException) – The error.

  • run_id (UUID) – The run ID.

  • kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

on_retriever_start(
serialized: dict[str, Any],
query: str,
*,
run_id: UUID,
parent_run_id: UUID | None = None,
tags: list[str] | None = None,
metadata: dict[str, Any] | None = None,
name: str | None = None,
**kwargs: Any,
) Run#

Run when the Retriever starts running.

Parameters:
  • serialized (dict[str, Any]) – The serialized retriever.

  • query (str) – The query.

  • run_id (UUID) – The run ID.

  • parent_run_id (Optional[UUID]) – The parent run ID. Defaults to None.

  • tags (Optional[list[str]]) – The tags for the run. Defaults to None.

  • metadata (Optional[dict[str, Any]]) – The metadata for the run. Defaults to None.

  • name (Optional[str]) – The name of the run.

  • kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

on_retry(
retry_state: RetryCallState,
*,
run_id: UUID,
**kwargs: Any,
) Run#

Run on retry.

Parameters:
  • retry_state (RetryCallState) – The retry state.

  • run_id (UUID) – The run ID.

  • kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

on_text(
text: str,
*,
run_id: UUID,
parent_run_id: UUID | None = None,
**kwargs: Any,
) Any#

Run on an arbitrary text.

Parameters:
  • text (str) – The text.

  • run_id (UUID) – The run ID. This is the ID of the current run.

  • parent_run_id (UUID) – The parent run ID. This is the ID of the parent run.

  • kwargs (Any) – Additional keyword arguments.

Return type:

Any

on_tool_end(
output: Any,
*,
run_id: UUID,
**kwargs: Any,
) Run#

End a trace for a tool run.

Parameters:
  • output (Any) – The output for the tool.

  • run_id (UUID) – The run ID.

  • kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

on_tool_error(
error: BaseException,
*,
run_id: UUID,
**kwargs: Any,
) Run#

Handle an error for a tool run.

Parameters:
  • error (BaseException) – The error.

  • run_id (UUID) – The run ID.

  • kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

on_tool_start(
serialized: dict[str, Any],
input_str: str,
*,
run_id: UUID,
tags: list[str] | None = None,
parent_run_id: UUID | None = None,
metadata: dict[str, Any] | None = None,
name: str | None = None,
inputs: dict[str, Any] | None = None,
**kwargs: Any,
) Run#

Start a trace for a tool run.

Parameters:
  • serialized (dict[str, Any]) – The serialized tool.

  • input_str (str) – The input string.

  • run_id (UUID) – The run ID.

  • tags (Optional[list[str]]) – The tags for the run. Defaults to None.

  • parent_run_id (Optional[UUID]) – The parent run ID. Defaults to None.

  • metadata (Optional[dict[str, Any]]) – The metadata for the run. Defaults to None.

  • name (Optional[str]) – The name of the run.

  • inputs (Optional[dict[str, Any]]) – The inputs for the tool.

  • kwargs (Any) – Additional arguments.

Returns:

The run.

Return type:

Run

wait_for_futures() None[source]#

Wait for all futures to complete.

Return type:

None