Evals(api_client_: google.genai._api_client.BaseApiClient)API documentation for Evals class.
Methods
batch_evaluate
batch_evaluate(
*,
dataset: typing.Union[
vertexai._genai.types.common.EvaluationDataset,
vertexai._genai.types.common.EvaluationDatasetDict,
],
metrics: list[
typing.Union[
vertexai._genai.types.common.Metric, vertexai._genai.types.common.MetricDict
]
],
dest: str,
config: typing.Optional[
typing.Union[
vertexai._genai.types.common.EvaluateDatasetConfig,
vertexai._genai.types.common.EvaluateDatasetConfigDict,
]
] = None
) -> vertexai._genai.types.common.EvaluateDatasetOperationEvaluates a dataset based on a set of given metrics.
create_evaluation_item
create_evaluation_item(
*,
evaluation_item_type: vertexai._genai.types.common.EvaluationItemType,
gcs_uri: str,
display_name: typing.Optional[str] = None,
config: typing.Optional[
typing.Union[
vertexai._genai.types.common.CreateEvaluationItemConfig,
vertexai._genai.types.common.CreateEvaluationItemConfigDict,
]
] = None
) -> vertexai._genai.types.common.EvaluationItemCreates an EvaluationItem.
create_evaluation_metric
create_evaluation_metric(
*,
display_name: typing.Optional[str] = None,
description: typing.Optional[str] = None,
metric: typing.Optional[
typing.Union[
vertexai._genai.types.common.Metric, vertexai._genai.types.common.MetricDict
]
] = None,
config: typing.Optional[
typing.Union[
vertexai._genai.types.common.CreateEvaluationMetricConfig,
vertexai._genai.types.common.CreateEvaluationMetricConfigDict,
]
] = None
) -> strCreates an EvaluationMetric.
create_evaluation_run
create_evaluation_run(
*,
dataset: typing.Union[
vertexai._genai.types.common.EvaluationRunDataSource,
vertexai._genai.types.common.EvaluationDataset,
],
dest: str,
metrics: list[
typing.Union[
vertexai._genai.types.common.EvaluationRunMetric,
vertexai._genai.types.common.EvaluationRunMetricDict,
]
],
name: typing.Optional[str] = None,
display_name: typing.Optional[str] = None,
agent_info: typing.Optional[
typing.Union[
vertexai._genai.types.evals.AgentInfo,
vertexai._genai.types.evals.AgentInfoDict,
]
] = None,
agent: typing.Optional[str] = None,
user_simulator_config: typing.Optional[
typing.Union[
vertexai._genai.types.evals.UserSimulatorConfig,
vertexai._genai.types.evals.UserSimulatorConfigDict,
]
] = None,
inference_configs: typing.Optional[
dict[
str,
typing.Union[
vertexai._genai.types.common.EvaluationRunInferenceConfig,
vertexai._genai.types.common.EvaluationRunInferenceConfigDict,
],
]
] = None,
labels: typing.Optional[dict[str, str]] = None,
loss_analysis_metrics: typing.Optional[
list[
typing.Union[
str,
vertexai._genai.types.common.Metric,
vertexai._genai.types.common.MetricDict,
]
]
] = None,
loss_analysis_configs: typing.Optional[
list[
typing.Union[
vertexai._genai.types.common.LossAnalysisConfig,
vertexai._genai.types.common.LossAnalysisConfigDict,
]
]
] = None,
config: typing.Optional[
typing.Union[
vertexai._genai.types.common.CreateEvaluationRunConfig,
vertexai._genai.types.common.CreateEvaluationRunConfigDict,
]
] = None
) -> vertexai._genai.types.common.EvaluationRunCreates an EvaluationRun.
create_evaluation_set
create_evaluation_set(
*,
evaluation_items: list[str],
display_name: typing.Optional[str] = None,
config: typing.Optional[
typing.Union[
vertexai._genai.types.common.CreateEvaluationSetConfig,
vertexai._genai.types.common.CreateEvaluationSetConfigDict,
]
] = None
) -> vertexai._genai.types.common.EvaluationSetCreates an EvaluationSet.
delete_evaluation_metric
delete_evaluation_metric(
*,
metric_resource_name: str,
config: typing.Optional[
typing.Union[
vertexai._genai.types.common.DeleteEvaluationMetricConfig,
vertexai._genai.types.common.DeleteEvaluationMetricConfigDict,
]
] = None
) -> NoneDeletes an EvaluationMetric.
evaluate
evaluate(
*,
dataset: typing.Union[
pandas.core.frame.DataFrame,
vertexai._genai.types.common.EvaluationDataset,
vertexai._genai.types.common.EvaluationDatasetDict,
list[
typing.Union[
vertexai._genai.types.common.EvaluationDataset,
vertexai._genai.types.common.EvaluationDatasetDict,
]
],
],
metrics: typing.Optional[
list[
typing.Union[
vertexai._genai.types.common.Metric,
vertexai._genai.types.common.MetricDict,
]
]
] = None,
location: typing.Optional[str] = None,
config: typing.Optional[
typing.Union[
vertexai._genai.types.common.EvaluateMethodConfig,
vertexai._genai.types.common.EvaluateMethodConfigDict,
]
] = None,
**kwargs: typing.Any
) -> vertexai._genai.types.common.EvaluationResultEvaluates candidate responses in the provided dataset(s) using the specified metrics.
evaluate_instances
evaluate_instances(
*, metric_config: vertexai._genai.types.common._EvaluateInstancesRequestParameters
) -> vertexai._genai.types.common.EvaluateInstancesResponseEvaluates an instance of a model.
generate_conversation_scenarios
generate_conversation_scenarios(
*,
agent_info: typing.Union[
vertexai._genai.types.evals.AgentInfo, vertexai._genai.types.evals.AgentInfoDict
],
config: typing.Union[
vertexai._genai.types.evals.UserScenarioGenerationConfig,
vertexai._genai.types.evals.UserScenarioGenerationConfigDict,
],
allow_cross_region_model: typing.Optional[bool] = None
) -> vertexai._genai.types.common.EvaluationDatasetGenerates an evaluation dataset with user scenarios, which helps to generate conversations between a simulated user and the agent under test.
generate_loss_clusters
generate_loss_clusters(
*,
eval_result: vertexai._genai.types.common.EvaluationResult,
metric: typing.Optional[
typing.Union[
str,
vertexai._genai.types.common.Metric,
vertexai._genai.types.common.MetricDict,
]
] = None,
candidate: typing.Optional[str] = None,
config: typing.Optional[
typing.Union[
vertexai._genai.types.common.LossAnalysisConfig,
vertexai._genai.types.common.LossAnalysisConfigDict,
]
] = None
) -> vertexai._genai.types.common.GenerateLossClustersResponseGenerates loss clusters from evaluation results.
Analyzes "Pass/Fail" signals from rubric-based autoraters and groups them into semantic "Loss Patterns" (e.g., "Hallucination of Action").
This method calls the GenerateLossClusters LRO and polls until completion, returning the results directly.
If metric or candidate are not provided, they will be
auto-inferred from eval_result when unambiguous (i.e., when the
eval result contains exactly one metric or one candidate). For
multi-metric or multi-candidate evaluations, provide them explicitly.
Available candidate names can be found in
eval_result.metadata.candidate_names.
Note: This API is only available in the global region.
generate_rubrics
generate_rubrics(
*,
src: typing.Union[
str, pd.DataFrame, vertexai._genai.types.common.EvaluationDataset
],
rubric_group_name: str,
prompt_template: typing.Optional[str] = None,
generator_model_config: typing.Optional[genai_types.AutoraterConfigOrDict] = None,
rubric_content_type: typing.Optional[types.RubricContentType] = None,
rubric_type_ontology: typing.Optional[list[str]] = None,
predefined_spec_name: typing.Optional[
typing.Union[str, types.PrebuiltMetric]
] = None,
metric_spec_parameters: typing.Optional[dict[str, typing.Any]] = None,
metric: typing.Optional[
typing.Union[
vertexai._genai.types.common.Metric, vertexai._genai.types.common.MetricDict
]
] = None,
config: typing.Optional[
typing.Union[
vertexai._genai.types.common.RubricGenerationConfig,
vertexai._genai.types.common.RubricGenerationConfigDict,
]
] = None
) -> vertexai._genai.types.common.EvaluationDatasetGenerates rubrics for each prompt in the source and adds them as a new column structured as a dictionary.
You can generate rubrics by providing either:
- A
metricto use a pre-registered metric resource. - A
predefined_spec_nameto use a Vertex AI backend recipe. - A
prompt_templatealong with other configuration parameters (generator_model_config,rubric_content_type,rubric_type_ontology) for custom rubric generation. withmetrictaking precedence overpredefined_spec_name, andpredefined_spec_nametaking precedence overprompt_template
These two modes are mutually exclusive.
get_evaluation_item
get_evaluation_item(
*,
name: str,
config: typing.Optional[
typing.Union[
vertexai._genai.types.common.GetEvaluationItemConfig,
vertexai._genai.types.common.GetEvaluationItemConfigDict,
]
] = None
) -> vertexai._genai.types.common.EvaluationItemRetrieves an EvaluationItem from the resource name.
get_evaluation_metric
get_evaluation_metric(
*,
metric_resource_name: str,
config: typing.Optional[
typing.Union[
vertexai._genai.types.common.GetEvaluationMetricConfig,
vertexai._genai.types.common.GetEvaluationMetricConfigDict,
]
] = None
) -> vertexai._genai.types.common.EvaluationMetricRetrieves an EvaluationMetric from the resource name.
get_evaluation_run
get_evaluation_run(
*,
name: str,
include_evaluation_items: bool = False,
config: typing.Optional[
typing.Union[
vertexai._genai.types.common.GetEvaluationRunConfig,
vertexai._genai.types.common.GetEvaluationRunConfigDict,
]
] = None
) -> vertexai._genai.types.common.EvaluationRunRetrieves an EvaluationRun from the resource name.
| Exceptions | |
|---|---|
| Type | Description |
ValueError |
If the name is empty or invalid. |
get_evaluation_set
get_evaluation_set(
*,
name: str,
config: typing.Optional[
typing.Union[
vertexai._genai.types.common.GetEvaluationSetConfig,
vertexai._genai.types.common.GetEvaluationSetConfigDict,
]
] = None
) -> vertexai._genai.types.common.EvaluationSetRetrieves an EvaluationSet from the resource name.
list_evaluation_metrics
list_evaluation_metrics(
*,
filter: typing.Optional[str] = None,
order_by: typing.Optional[str] = None,
config: typing.Optional[
typing.Union[
vertexai._genai.types.common.ListEvaluationMetricsConfig,
vertexai._genai.types.common.ListEvaluationMetricsConfigDict,
]
] = None
) -> vertexai._genai.types.common.ListEvaluationMetricsResponseLists EvaluationMetrics.
run_inference
run_inference(
*,
src: typing.Union[
str, pandas.core.frame.DataFrame, vertexai._genai.types.common.EvaluationDataset
],
model: typing.Optional[
typing.Union[str, typing.Callable[[typing.Any], typing.Any]]
] = None,
agent: typing.Optional[
typing.Union[str, vertexai._genai.types.common.AgentEngine]
] = None,
location: typing.Optional[str] = None,
config: typing.Optional[
typing.Union[
vertexai._genai.types.common.EvalRunInferenceConfig,
vertexai._genai.types.common.EvalRunInferenceConfigDict,
]
] = None
) -> vertexai._genai.types.common.EvaluationDatasetRuns inference on a dataset for evaluation.