Class Evals (1.152.0)

Evals(api_client_: google.genai._api_client.BaseApiClient)

API documentation for Evals class.

Methods

batch_evaluate

batch_evaluate(
    *,
    dataset: typing.Union[
        vertexai._genai.types.common.EvaluationDataset,
        vertexai._genai.types.common.EvaluationDatasetDict,
    ],
    metrics: list[
        typing.Union[
            vertexai._genai.types.common.Metric, vertexai._genai.types.common.MetricDict
        ]
    ],
    dest: str,
    config: typing.Optional[
        typing.Union[
            vertexai._genai.types.common.EvaluateDatasetConfig,
            vertexai._genai.types.common.EvaluateDatasetConfigDict,
        ]
    ] = None
) -> vertexai._genai.types.common.EvaluateDatasetOperation

Evaluates a dataset based on a set of given metrics.

create_evaluation_item

create_evaluation_item(
    *,
    evaluation_item_type: vertexai._genai.types.common.EvaluationItemType,
    gcs_uri: str,
    display_name: typing.Optional[str] = None,
    config: typing.Optional[
        typing.Union[
            vertexai._genai.types.common.CreateEvaluationItemConfig,
            vertexai._genai.types.common.CreateEvaluationItemConfigDict,
        ]
    ] = None
) -> vertexai._genai.types.common.EvaluationItem

Creates an EvaluationItem.

create_evaluation_metric

create_evaluation_metric(
    *,
    display_name: typing.Optional[str] = None,
    description: typing.Optional[str] = None,
    metric: typing.Optional[
        typing.Union[
            vertexai._genai.types.common.Metric, vertexai._genai.types.common.MetricDict
        ]
    ] = None,
    config: typing.Optional[
        typing.Union[
            vertexai._genai.types.common.CreateEvaluationMetricConfig,
            vertexai._genai.types.common.CreateEvaluationMetricConfigDict,
        ]
    ] = None
) -> str

Creates an EvaluationMetric.

create_evaluation_run

create_evaluation_run(
    *,
    dataset: typing.Union[
        vertexai._genai.types.common.EvaluationRunDataSource,
        vertexai._genai.types.common.EvaluationDataset,
    ],
    dest: str,
    metrics: list[
        typing.Union[
            vertexai._genai.types.common.EvaluationRunMetric,
            vertexai._genai.types.common.EvaluationRunMetricDict,
        ]
    ],
    name: typing.Optional[str] = None,
    display_name: typing.Optional[str] = None,
    agent_info: typing.Optional[
        typing.Union[
            vertexai._genai.types.evals.AgentInfo,
            vertexai._genai.types.evals.AgentInfoDict,
        ]
    ] = None,
    agent: typing.Optional[str] = None,
    user_simulator_config: typing.Optional[
        typing.Union[
            vertexai._genai.types.evals.UserSimulatorConfig,
            vertexai._genai.types.evals.UserSimulatorConfigDict,
        ]
    ] = None,
    inference_configs: typing.Optional[
        dict[
            str,
            typing.Union[
                vertexai._genai.types.common.EvaluationRunInferenceConfig,
                vertexai._genai.types.common.EvaluationRunInferenceConfigDict,
            ],
        ]
    ] = None,
    labels: typing.Optional[dict[str, str]] = None,
    loss_analysis_metrics: typing.Optional[
        list[
            typing.Union[
                str,
                vertexai._genai.types.common.Metric,
                vertexai._genai.types.common.MetricDict,
            ]
        ]
    ] = None,
    loss_analysis_configs: typing.Optional[
        list[
            typing.Union[
                vertexai._genai.types.common.LossAnalysisConfig,
                vertexai._genai.types.common.LossAnalysisConfigDict,
            ]
        ]
    ] = None,
    config: typing.Optional[
        typing.Union[
            vertexai._genai.types.common.CreateEvaluationRunConfig,
            vertexai._genai.types.common.CreateEvaluationRunConfigDict,
        ]
    ] = None
) -> vertexai._genai.types.common.EvaluationRun

Creates an EvaluationRun.

create_evaluation_set

create_evaluation_set(
    *,
    evaluation_items: list[str],
    display_name: typing.Optional[str] = None,
    config: typing.Optional[
        typing.Union[
            vertexai._genai.types.common.CreateEvaluationSetConfig,
            vertexai._genai.types.common.CreateEvaluationSetConfigDict,
        ]
    ] = None
) -> vertexai._genai.types.common.EvaluationSet

Creates an EvaluationSet.

delete_evaluation_metric

delete_evaluation_metric(
    *,
    metric_resource_name: str,
    config: typing.Optional[
        typing.Union[
            vertexai._genai.types.common.DeleteEvaluationMetricConfig,
            vertexai._genai.types.common.DeleteEvaluationMetricConfigDict,
        ]
    ] = None
) -> None

Deletes an EvaluationMetric.

evaluate

evaluate(
    *,
    dataset: typing.Union[
        pandas.core.frame.DataFrame,
        vertexai._genai.types.common.EvaluationDataset,
        vertexai._genai.types.common.EvaluationDatasetDict,
        list[
            typing.Union[
                vertexai._genai.types.common.EvaluationDataset,
                vertexai._genai.types.common.EvaluationDatasetDict,
            ]
        ],
    ],
    metrics: typing.Optional[
        list[
            typing.Union[
                vertexai._genai.types.common.Metric,
                vertexai._genai.types.common.MetricDict,
            ]
        ]
    ] = None,
    location: typing.Optional[str] = None,
    config: typing.Optional[
        typing.Union[
            vertexai._genai.types.common.EvaluateMethodConfig,
            vertexai._genai.types.common.EvaluateMethodConfigDict,
        ]
    ] = None,
    **kwargs: typing.Any
) -> vertexai._genai.types.common.EvaluationResult

Evaluates candidate responses in the provided dataset(s) using the specified metrics.

evaluate_instances

evaluate_instances(
    *, metric_config: vertexai._genai.types.common._EvaluateInstancesRequestParameters
) -> vertexai._genai.types.common.EvaluateInstancesResponse

Evaluates an instance of a model.

generate_conversation_scenarios

generate_conversation_scenarios(
    *,
    agent_info: typing.Union[
        vertexai._genai.types.evals.AgentInfo, vertexai._genai.types.evals.AgentInfoDict
    ],
    config: typing.Union[
        vertexai._genai.types.evals.UserScenarioGenerationConfig,
        vertexai._genai.types.evals.UserScenarioGenerationConfigDict,
    ],
    allow_cross_region_model: typing.Optional[bool] = None
) -> vertexai._genai.types.common.EvaluationDataset

Generates an evaluation dataset with user scenarios, which helps to generate conversations between a simulated user and the agent under test.

generate_loss_clusters

generate_loss_clusters(
    *,
    eval_result: vertexai._genai.types.common.EvaluationResult,
    metric: typing.Optional[
        typing.Union[
            str,
            vertexai._genai.types.common.Metric,
            vertexai._genai.types.common.MetricDict,
        ]
    ] = None,
    candidate: typing.Optional[str] = None,
    config: typing.Optional[
        typing.Union[
            vertexai._genai.types.common.LossAnalysisConfig,
            vertexai._genai.types.common.LossAnalysisConfigDict,
        ]
    ] = None
) -> vertexai._genai.types.common.GenerateLossClustersResponse

Generates loss clusters from evaluation results.

Analyzes "Pass/Fail" signals from rubric-based autoraters and groups them into semantic "Loss Patterns" (e.g., "Hallucination of Action").

This method calls the GenerateLossClusters LRO and polls until completion, returning the results directly.

If metric or candidate are not provided, they will be auto-inferred from eval_result when unambiguous (i.e., when the eval result contains exactly one metric or one candidate). For multi-metric or multi-candidate evaluations, provide them explicitly.

Available candidate names can be found in eval_result.metadata.candidate_names.

Note: This API is only available in the global region.

generate_rubrics

generate_rubrics(
    *,
    src: typing.Union[
        str, pd.DataFrame, vertexai._genai.types.common.EvaluationDataset
    ],
    rubric_group_name: str,
    prompt_template: typing.Optional[str] = None,
    generator_model_config: typing.Optional[genai_types.AutoraterConfigOrDict] = None,
    rubric_content_type: typing.Optional[types.RubricContentType] = None,
    rubric_type_ontology: typing.Optional[list[str]] = None,
    predefined_spec_name: typing.Optional[
        typing.Union[str, types.PrebuiltMetric]
    ] = None,
    metric_spec_parameters: typing.Optional[dict[str, typing.Any]] = None,
    metric: typing.Optional[
        typing.Union[
            vertexai._genai.types.common.Metric, vertexai._genai.types.common.MetricDict
        ]
    ] = None,
    config: typing.Optional[
        typing.Union[
            vertexai._genai.types.common.RubricGenerationConfig,
            vertexai._genai.types.common.RubricGenerationConfigDict,
        ]
    ] = None
) -> vertexai._genai.types.common.EvaluationDataset

Generates rubrics for each prompt in the source and adds them as a new column structured as a dictionary.

You can generate rubrics by providing either:

  1. A metric to use a pre-registered metric resource.
  2. A predefined_spec_name to use a Vertex AI backend recipe.
  3. A prompt_template along with other configuration parameters (generator_model_config, rubric_content_type, rubric_type_ontology) for custom rubric generation. with metric taking precedence over predefined_spec_name, and predefined_spec_name taking precedence over prompt_template

These two modes are mutually exclusive.

get_evaluation_item

get_evaluation_item(
    *,
    name: str,
    config: typing.Optional[
        typing.Union[
            vertexai._genai.types.common.GetEvaluationItemConfig,
            vertexai._genai.types.common.GetEvaluationItemConfigDict,
        ]
    ] = None
) -> vertexai._genai.types.common.EvaluationItem

Retrieves an EvaluationItem from the resource name.

get_evaluation_metric

get_evaluation_metric(
    *,
    metric_resource_name: str,
    config: typing.Optional[
        typing.Union[
            vertexai._genai.types.common.GetEvaluationMetricConfig,
            vertexai._genai.types.common.GetEvaluationMetricConfigDict,
        ]
    ] = None
) -> vertexai._genai.types.common.EvaluationMetric

Retrieves an EvaluationMetric from the resource name.

get_evaluation_run

get_evaluation_run(
    *,
    name: str,
    include_evaluation_items: bool = False,
    config: typing.Optional[
        typing.Union[
            vertexai._genai.types.common.GetEvaluationRunConfig,
            vertexai._genai.types.common.GetEvaluationRunConfigDict,
        ]
    ] = None
) -> vertexai._genai.types.common.EvaluationRun

Retrieves an EvaluationRun from the resource name.

Exceptions
Type Description
ValueError If the name is empty or invalid.

get_evaluation_set

get_evaluation_set(
    *,
    name: str,
    config: typing.Optional[
        typing.Union[
            vertexai._genai.types.common.GetEvaluationSetConfig,
            vertexai._genai.types.common.GetEvaluationSetConfigDict,
        ]
    ] = None
) -> vertexai._genai.types.common.EvaluationSet

Retrieves an EvaluationSet from the resource name.

list_evaluation_metrics

list_evaluation_metrics(
    *,
    filter: typing.Optional[str] = None,
    order_by: typing.Optional[str] = None,
    config: typing.Optional[
        typing.Union[
            vertexai._genai.types.common.ListEvaluationMetricsConfig,
            vertexai._genai.types.common.ListEvaluationMetricsConfigDict,
        ]
    ] = None
) -> vertexai._genai.types.common.ListEvaluationMetricsResponse

Lists EvaluationMetrics.

run_inference

run_inference(
    *,
    src: typing.Union[
        str, pandas.core.frame.DataFrame, vertexai._genai.types.common.EvaluationDataset
    ],
    model: typing.Optional[
        typing.Union[str, typing.Callable[[typing.Any], typing.Any]]
    ] = None,
    agent: typing.Optional[
        typing.Union[str, vertexai._genai.types.common.AgentEngine]
    ] = None,
    location: typing.Optional[str] = None,
    config: typing.Optional[
        typing.Union[
            vertexai._genai.types.common.EvalRunInferenceConfig,
            vertexai._genai.types.common.EvalRunInferenceConfigDict,
        ]
    ] = None
) -> vertexai._genai.types.common.EvaluationDataset

Runs inference on a dataset for evaluation.