Evals(api_client_: google.genai._api_client.BaseApiClient)API documentation for Evals class.
Methods
batch_evaluate
batch_evaluate(
*,
dataset: typing.Union[
vertexai._genai.types.common.EvaluationDataset,
vertexai._genai.types.common.EvaluationDatasetDict,
],
metrics: list[
typing.Union[
vertexai._genai.types.common.Metric, vertexai._genai.types.common.MetricDict
]
],
dest: str,
config: typing.Optional[
typing.Union[
vertexai._genai.types.common.EvaluateDatasetConfig,
vertexai._genai.types.common.EvaluateDatasetConfigDict,
]
] = None
) -> vertexai._genai.types.common.EvaluateDatasetOperationEvaluates a dataset based on a set of given metrics.
create_evaluation_item
create_evaluation_item(
*,
evaluation_item_type: vertexai._genai.types.common.EvaluationItemType,
gcs_uri: str,
display_name: typing.Optional[str] = None,
config: typing.Optional[
typing.Union[
vertexai._genai.types.common.CreateEvaluationItemConfig,
vertexai._genai.types.common.CreateEvaluationItemConfigDict,
]
] = None
) -> vertexai._genai.types.common.EvaluationItemCreates an EvaluationItem.
create_evaluation_metric
create_evaluation_metric(
*,
display_name: typing.Optional[str] = None,
description: typing.Optional[str] = None,
metric: typing.Optional[
typing.Union[
vertexai._genai.types.common.Metric, vertexai._genai.types.common.MetricDict
]
] = None,
config: typing.Optional[
typing.Union[
vertexai._genai.types.common.CreateEvaluationMetricConfig,
vertexai._genai.types.common.CreateEvaluationMetricConfigDict,
]
] = None
) -> strCreates an EvaluationMetric.
create_evaluation_run
create_evaluation_run(
*,
dataset: typing.Union[
vertexai._genai.types.common.EvaluationRunDataSource,
vertexai._genai.types.common.EvaluationDataset,
],
dest: str,
metrics: list[
typing.Union[
vertexai._genai.types.common.EvaluationRunMetric,
vertexai._genai.types.common.EvaluationRunMetricDict,
]
],
name: typing.Optional[str] = None,
display_name: typing.Optional[str] = None,
agent_info: typing.Optional[
typing.Union[
vertexai._genai.types.evals.AgentInfo,
vertexai._genai.types.evals.AgentInfoDict,
]
] = None,
agent: typing.Optional[str] = None,
user_simulator_config: typing.Optional[
typing.Union[
vertexai._genai.types.evals.UserSimulatorConfig,
vertexai._genai.types.evals.UserSimulatorConfigDict,
]
] = None,
inference_configs: typing.Optional[
dict[
str,
typing.Union[
vertexai._genai.types.common.EvaluationRunInferenceConfig,
vertexai._genai.types.common.EvaluationRunInferenceConfigDict,
],
]
] = None,
labels: typing.Optional[dict[str, str]] = None,
config: typing.Optional[
typing.Union[
vertexai._genai.types.common.CreateEvaluationRunConfig,
vertexai._genai.types.common.CreateEvaluationRunConfigDict,
]
] = None
) -> vertexai._genai.types.common.EvaluationRunCreates an EvaluationRun.
create_evaluation_set
create_evaluation_set(
*,
evaluation_items: list[str],
display_name: typing.Optional[str] = None,
config: typing.Optional[
typing.Union[
vertexai._genai.types.common.CreateEvaluationSetConfig,
vertexai._genai.types.common.CreateEvaluationSetConfigDict,
]
] = None
) -> vertexai._genai.types.common.EvaluationSetCreates an EvaluationSet.
evaluate
evaluate(
*,
dataset: typing.Union[
pandas.core.frame.DataFrame,
vertexai._genai.types.common.EvaluationDataset,
vertexai._genai.types.common.EvaluationDatasetDict,
list[
typing.Union[
vertexai._genai.types.common.EvaluationDataset,
vertexai._genai.types.common.EvaluationDatasetDict,
]
],
],
metrics: typing.Optional[
list[
typing.Union[
vertexai._genai.types.common.Metric,
vertexai._genai.types.common.MetricDict,
]
]
] = None,
location: typing.Optional[str] = None,
config: typing.Optional[
typing.Union[
vertexai._genai.types.common.EvaluateMethodConfig,
vertexai._genai.types.common.EvaluateMethodConfigDict,
]
] = None,
**kwargs: typing.Any
) -> vertexai._genai.types.common.EvaluationResultEvaluates candidate responses in the provided dataset(s) using the specified metrics.
evaluate_instances
evaluate_instances(
*, metric_config: vertexai._genai.types.common._EvaluateInstancesRequestParameters
) -> vertexai._genai.types.common.EvaluateInstancesResponseEvaluates an instance of a model.
generate_conversation_scenarios
generate_conversation_scenarios(
*,
agent_info: typing.Union[
vertexai._genai.types.evals.AgentInfo, vertexai._genai.types.evals.AgentInfoDict
],
config: typing.Union[
vertexai._genai.types.evals.UserScenarioGenerationConfig,
vertexai._genai.types.evals.UserScenarioGenerationConfigDict,
]
) -> vertexai._genai.types.common.EvaluationDatasetGenerates an evaluation dataset with user scenarios, which helps to generate conversations between a simulated user and the agent under test.
generate_rubrics
generate_rubrics(
*,
src: typing.Union[
str, pd.DataFrame, vertexai._genai.types.common.EvaluationDataset
],
rubric_group_name: str,
prompt_template: typing.Optional[str] = None,
generator_model_config: typing.Optional[genai_types.AutoraterConfigOrDict] = None,
rubric_content_type: typing.Optional[types.RubricContentType] = None,
rubric_type_ontology: typing.Optional[list[str]] = None,
predefined_spec_name: typing.Optional[
typing.Union[str, types.PrebuiltMetric]
] = None,
metric_spec_parameters: typing.Optional[dict[str, typing.Any]] = None,
metric: typing.Optional[
typing.Union[
vertexai._genai.types.common.Metric, vertexai._genai.types.common.MetricDict
]
] = None,
config: typing.Optional[
typing.Union[
vertexai._genai.types.common.RubricGenerationConfig,
vertexai._genai.types.common.RubricGenerationConfigDict,
]
] = None
) -> vertexai._genai.types.common.EvaluationDatasetGenerates rubrics for each prompt in the source and adds them as a new column structured as a dictionary.
You can generate rubrics by providing either:
- A
metricto use a pre-registered metric resource. - A
predefined_spec_nameto use a Vertex AI backend recipe. - A
prompt_templatealong with other configuration parameters (generator_model_config,rubric_content_type,rubric_type_ontology) for custom rubric generation. withmetrictaking precedence overpredefined_spec_name, andpredefined_spec_nametaking precedence overprompt_template
These two modes are mutually exclusive.
get_evaluation_item
get_evaluation_item(
*,
name: str,
config: typing.Optional[
typing.Union[
vertexai._genai.types.common.GetEvaluationItemConfig,
vertexai._genai.types.common.GetEvaluationItemConfigDict,
]
] = None
) -> vertexai._genai.types.common.EvaluationItemRetrieves an EvaluationItem from the resource name.
get_evaluation_metric
get_evaluation_metric(
*,
metric_resource_name: str,
config: typing.Optional[
typing.Union[
vertexai._genai.types.common.GetEvaluationMetricConfig,
vertexai._genai.types.common.GetEvaluationMetricConfigDict,
]
] = None
) -> vertexai._genai.types.common.EvaluationMetricRetrieves an EvaluationMetric from the resource name.
get_evaluation_run
get_evaluation_run(
*,
name: str,
include_evaluation_items: bool = False,
config: typing.Optional[
typing.Union[
vertexai._genai.types.common.GetEvaluationRunConfig,
vertexai._genai.types.common.GetEvaluationRunConfigDict,
]
] = None
) -> vertexai._genai.types.common.EvaluationRunRetrieves an EvaluationRun from the resource name.
| Exceptions | |
|---|---|
| Type | Description |
ValueError |
If the name is empty or invalid. |
get_evaluation_set
get_evaluation_set(
*,
name: str,
config: typing.Optional[
typing.Union[
vertexai._genai.types.common.GetEvaluationSetConfig,
vertexai._genai.types.common.GetEvaluationSetConfigDict,
]
] = None
) -> vertexai._genai.types.common.EvaluationSetRetrieves an EvaluationSet from the resource name.
list_evaluation_metrics
list_evaluation_metrics(
*,
config: typing.Optional[
typing.Union[
vertexai._genai.types.common.ListEvaluationMetricsConfig,
vertexai._genai.types.common.ListEvaluationMetricsConfigDict,
]
] = None
) -> vertexai._genai.types.common.ListEvaluationMetricsResponseLists EvaluationMetrics.
run_inference
run_inference(
*,
src: typing.Union[
str, pandas.core.frame.DataFrame, vertexai._genai.types.common.EvaluationDataset
],
model: typing.Optional[
typing.Union[str, typing.Callable[[typing.Any], typing.Any]]
] = None,
agent: typing.Optional[
typing.Union[str, vertexai._genai.types.common.AgentEngine]
] = None,
location: typing.Optional[str] = None,
config: typing.Optional[
typing.Union[
vertexai._genai.types.common.EvalRunInferenceConfig,
vertexai._genai.types.common.EvalRunInferenceConfigDict,
]
] = None
) -> vertexai._genai.types.common.EvaluationDatasetRuns inference on a dataset for evaluation.