GKE Recommender API v1 - Package cloud.google.com/go/gkerecommender/apiv1/gkerecommenderpb (v0.1.0)

Constants

GkeInferenceQuickstart_FetchModels_FullMethodName, GkeInferenceQuickstart_FetchModelServers_FullMethodName, GkeInferenceQuickstart_FetchModelServerVersions_FullMethodName, GkeInferenceQuickstart_FetchProfiles_FullMethodName, GkeInferenceQuickstart_GenerateOptimizedManifest_FullMethodName, GkeInferenceQuickstart_FetchBenchmarkingData_FullMethodName

const (
	GkeInferenceQuickstart_FetchModels_FullMethodName               = "/google.cloud.gkerecommender.v1.GkeInferenceQuickstart/FetchModels"
	GkeInferenceQuickstart_FetchModelServers_FullMethodName         = "/google.cloud.gkerecommender.v1.GkeInferenceQuickstart/FetchModelServers"
	GkeInferenceQuickstart_FetchModelServerVersions_FullMethodName  = "/google.cloud.gkerecommender.v1.GkeInferenceQuickstart/FetchModelServerVersions"
	GkeInferenceQuickstart_FetchProfiles_FullMethodName             = "/google.cloud.gkerecommender.v1.GkeInferenceQuickstart/FetchProfiles"
	GkeInferenceQuickstart_GenerateOptimizedManifest_FullMethodName = "/google.cloud.gkerecommender.v1.GkeInferenceQuickstart/GenerateOptimizedManifest"
	GkeInferenceQuickstart_FetchBenchmarkingData_FullMethodName     = "/google.cloud.gkerecommender.v1.GkeInferenceQuickstart/FetchBenchmarkingData"
)

Variables

File_google_cloud_gkerecommender_v1_gkerecommender_proto

var File_google_cloud_gkerecommender_v1_gkerecommender_proto protoreflect.FileDescriptor

GkeInferenceQuickstart_ServiceDesc

var GkeInferenceQuickstart_ServiceDesc = grpc.ServiceDesc{
	ServiceName: "google.cloud.gkerecommender.v1.GkeInferenceQuickstart",
	HandlerType: (*GkeInferenceQuickstartServer)(nil),
	Methods: []grpc.MethodDesc{
		{
			MethodName: "FetchModels",
			Handler:    _GkeInferenceQuickstart_FetchModels_Handler,
		},
		{
			MethodName: "FetchModelServers",
			Handler:    _GkeInferenceQuickstart_FetchModelServers_Handler,
		},
		{
			MethodName: "FetchModelServerVersions",
			Handler:    _GkeInferenceQuickstart_FetchModelServerVersions_Handler,
		},
		{
			MethodName: "FetchProfiles",
			Handler:    _GkeInferenceQuickstart_FetchProfiles_Handler,
		},
		{
			MethodName: "GenerateOptimizedManifest",
			Handler:    _GkeInferenceQuickstart_GenerateOptimizedManifest_Handler,
		},
		{
			MethodName: "FetchBenchmarkingData",
			Handler:    _GkeInferenceQuickstart_FetchBenchmarkingData_Handler,
		},
	},
	Streams:  []grpc.StreamDesc{},
	Metadata: "google/cloud/gkerecommender/v1/gkerecommender.proto",
}

GkeInferenceQuickstart_ServiceDesc is the grpc.ServiceDesc for GkeInferenceQuickstart service. It's only intended for direct use with grpc.RegisterService, and not to be introspected or modified (even as a copy)

Functions

func RegisterGkeInferenceQuickstartServer

func RegisterGkeInferenceQuickstartServer(s grpc.ServiceRegistrar, srv GkeInferenceQuickstartServer)

Amount

type Amount struct {

	// Output only. The whole units of the amount.
	// For example if `currencyCode` is `"USD"`, then 1 unit is one US dollar.
	Units int64 `protobuf:"varint,1,opt,name=units,proto3" json:"units,omitempty"`
	// Output only. Number of nano (10^-9) units of the amount.
	// The value must be between -999,999,999 and +999,999,999 inclusive.
	// If `units` is positive, `nanos` must be positive or zero.
	// If `units` is zero, `nanos` can be positive, zero, or negative.
	// If `units` is negative, `nanos` must be negative or zero.
	// For example $-1.75 is represented as `units`=-1 and `nanos`=-750,000,000.
	Nanos int32 `protobuf:"varint,2,opt,name=nanos,proto3" json:"nanos,omitempty"`
	// contains filtered or unexported fields
}

Represents an amount of money in a specific currency.

func (*Amount) Descriptor

func (*Amount) Descriptor() ([]byte, []int)

Deprecated: Use Amount.ProtoReflect.Descriptor instead.

func (*Amount) GetNanos

func (x *Amount) GetNanos() int32

func (*Amount) GetUnits

func (x *Amount) GetUnits() int64

func (*Amount) ProtoMessage

func (*Amount) ProtoMessage()

func (*Amount) ProtoReflect

func (x *Amount) ProtoReflect() protoreflect.Message

func (*Amount) Reset

func (x *Amount) Reset()

func (*Amount) String

func (x *Amount) String() string

Cost

type Cost struct {

	// Optional. The cost per million output tokens, calculated as:
	// $/output token = GPU $/s / (1/output-to-input-cost-ratio * input tokens/s +
	// output tokens/s)
	CostPerMillionOutputTokens *Amount `protobuf:"bytes,1,opt,name=cost_per_million_output_tokens,json=costPerMillionOutputTokens,proto3" json:"cost_per_million_output_tokens,omitempty"`
	// Optional. The cost per million input tokens. $/input token = ($/output
	// token) / output-to-input-cost-ratio.
	CostPerMillionInputTokens *Amount `protobuf:"bytes,2,opt,name=cost_per_million_input_tokens,json=costPerMillionInputTokens,proto3" json:"cost_per_million_input_tokens,omitempty"`
	// Optional. The pricing model used to calculate the cost. Can be one of:
	// `3-years-cud`, `1-year-cud`, `on-demand`, `spot`. If not provided, `spot`
	// will be used.
	PricingModel string `protobuf:"bytes,3,opt,name=pricing_model,json=pricingModel,proto3" json:"pricing_model,omitempty"`
	// Optional. The output-to-input cost ratio. This determines how the total GPU
	// cost is split between input and output tokens. If not provided, `4.0` is
	// used, assuming a 4:1 output:input cost ratio.
	OutputInputCostRatio *float32 `protobuf:"fixed32,4,opt,name=output_input_cost_ratio,json=outputInputCostRatio,proto3,oneof" json:"output_input_cost_ratio,omitempty"`
	// contains filtered or unexported fields
}

Cost for running a model deployment on a given instance type. Currently, only USD currency code is supported.

func (*Cost) Descriptor

func (*Cost) Descriptor() ([]byte, []int)

Deprecated: Use Cost.ProtoReflect.Descriptor instead.

func (*Cost) GetCostPerMillionInputTokens

func (x *Cost) GetCostPerMillionInputTokens() *Amount

func (*Cost) GetCostPerMillionOutputTokens

func (x *Cost) GetCostPerMillionOutputTokens() *Amount

func (*Cost) GetOutputInputCostRatio

func (x *Cost) GetOutputInputCostRatio() float32

func (*Cost) GetPricingModel

func (x *Cost) GetPricingModel() string

func (*Cost) ProtoMessage

func (*Cost) ProtoMessage()

func (*Cost) ProtoReflect

func (x *Cost) ProtoReflect() protoreflect.Message

func (*Cost) Reset

func (x *Cost) Reset()

func (*Cost) String

func (x *Cost) String() string

FetchBenchmarkingDataRequest

type FetchBenchmarkingDataRequest struct {

	// Required. The model server configuration to get benchmarking data for. Use
	// [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles]
	// to find valid configurations.
	ModelServerInfo *ModelServerInfo `protobuf:"bytes,1,opt,name=model_server_info,json=modelServerInfo,proto3" json:"model_server_info,omitempty"`
	// Optional. The instance type to filter benchmarking data. Instance types are
	// in the format `a2-highgpu-1g`. If not provided, all instance types for the
	// given profile's `model_server_info` will be returned. Use
	// [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles]
	// to find available instance types.
	InstanceType string `protobuf:"bytes,3,opt,name=instance_type,json=instanceType,proto3" json:"instance_type,omitempty"`
	// Optional. The pricing model to use for the benchmarking data. Defaults to
	// `spot`.
	PricingModel string `protobuf:"bytes,4,opt,name=pricing_model,json=pricingModel,proto3" json:"pricing_model,omitempty"`
	// contains filtered or unexported fields
}

Request message for [GkeInferenceQuickstart.FetchBenchmarkingData][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchBenchmarkingData].

func (*FetchBenchmarkingDataRequest) Descriptor

func (*FetchBenchmarkingDataRequest) Descriptor() ([]byte, []int)

Deprecated: Use FetchBenchmarkingDataRequest.ProtoReflect.Descriptor instead.

func (*FetchBenchmarkingDataRequest) GetInstanceType

func (x *FetchBenchmarkingDataRequest) GetInstanceType() string

func (*FetchBenchmarkingDataRequest) GetModelServerInfo

func (x *FetchBenchmarkingDataRequest) GetModelServerInfo() *ModelServerInfo

func (*FetchBenchmarkingDataRequest) GetPricingModel

func (x *FetchBenchmarkingDataRequest) GetPricingModel() string

func (*FetchBenchmarkingDataRequest) ProtoMessage

func (*FetchBenchmarkingDataRequest) ProtoMessage()

func (*FetchBenchmarkingDataRequest) ProtoReflect

func (*FetchBenchmarkingDataRequest) Reset

func (x *FetchBenchmarkingDataRequest) Reset()

func (*FetchBenchmarkingDataRequest) String

FetchBenchmarkingDataResponse

type FetchBenchmarkingDataResponse struct {

	// Output only. List of profiles containing their respective benchmarking
	// data.
	Profile []*Profile `protobuf:"bytes,1,rep,name=profile,proto3" json:"profile,omitempty"`
	// contains filtered or unexported fields
}

Response message for [GkeInferenceQuickstart.FetchBenchmarkingData][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchBenchmarkingData].

func (*FetchBenchmarkingDataResponse) Descriptor

func (*FetchBenchmarkingDataResponse) Descriptor() ([]byte, []int)

Deprecated: Use FetchBenchmarkingDataResponse.ProtoReflect.Descriptor instead.

func (*FetchBenchmarkingDataResponse) GetProfile

func (x *FetchBenchmarkingDataResponse) GetProfile() []*Profile

func (*FetchBenchmarkingDataResponse) ProtoMessage

func (*FetchBenchmarkingDataResponse) ProtoMessage()

func (*FetchBenchmarkingDataResponse) ProtoReflect

func (*FetchBenchmarkingDataResponse) Reset

func (x *FetchBenchmarkingDataResponse) Reset()

func (*FetchBenchmarkingDataResponse) String

FetchModelServerVersionsRequest

type FetchModelServerVersionsRequest struct {

	// Required. The model for which to list model server versions. Open-source
	// models follow the Huggingface Hub `owner/model_name` format. Use
	// [GkeInferenceQuickstart.FetchModels][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModels]
	// to find available models.
	Model string `protobuf:"bytes,1,opt,name=model,proto3" json:"model,omitempty"`
	// Required. The model server for which to list versions. Open-source model
	// servers use simplified, lowercase names (e.g., `vllm`). Use
	// [GkeInferenceQuickstart.FetchModelServers][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServers]
	// to find available model servers.
	ModelServer string `protobuf:"bytes,2,opt,name=model_server,json=modelServer,proto3" json:"model_server,omitempty"`
	// Optional. The target number of results to return in a single response.
	// If not specified, a default value will be chosen by the service.
	// Note that the response may include a partial list and a caller should
	// only rely on the response's
	// [next_page_token][google.cloud.gkerecommender.v1.FetchModelServerVersionsResponse.next_page_token]
	// to determine if there are more instances left to be queried.
	PageSize *int32 `protobuf:"varint,3,opt,name=page_size,json=pageSize,proto3,oneof" json:"page_size,omitempty"`
	// Optional. The value of
	// [next_page_token][google.cloud.gkerecommender.v1.FetchModelServerVersionsResponse.next_page_token]
	// received from a previous `FetchModelServerVersionsRequest` call.
	// Provide this to retrieve the subsequent page in a multi-page list of
	// results. When paginating, all other parameters provided to
	// `FetchModelServerVersionsRequest` must match the call that provided the
	// page token.
	PageToken *string `protobuf:"bytes,4,opt,name=page_token,json=pageToken,proto3,oneof" json:"page_token,omitempty"`
	// contains filtered or unexported fields
}

Request message for [GkeInferenceQuickstart.FetchModelServerVersions][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServerVersions].

func (*FetchModelServerVersionsRequest) Descriptor

func (*FetchModelServerVersionsRequest) Descriptor() ([]byte, []int)

Deprecated: Use FetchModelServerVersionsRequest.ProtoReflect.Descriptor instead.

func (*FetchModelServerVersionsRequest) GetModel

func (*FetchModelServerVersionsRequest) GetModelServer

func (x *FetchModelServerVersionsRequest) GetModelServer() string

func (*FetchModelServerVersionsRequest) GetPageSize

func (x *FetchModelServerVersionsRequest) GetPageSize() int32

func (*FetchModelServerVersionsRequest) GetPageToken

func (x *FetchModelServerVersionsRequest) GetPageToken() string

func (*FetchModelServerVersionsRequest) ProtoMessage

func (*FetchModelServerVersionsRequest) ProtoMessage()

func (*FetchModelServerVersionsRequest) ProtoReflect

func (*FetchModelServerVersionsRequest) Reset

func (*FetchModelServerVersionsRequest) String

FetchModelServerVersionsResponse

type FetchModelServerVersionsResponse struct {

	// Output only. A list of available model server versions.
	ModelServerVersions []string `protobuf:"bytes,1,rep,name=model_server_versions,json=modelServerVersions,proto3" json:"model_server_versions,omitempty"`
	// Output only. A token which may be sent as
	// [page_token][FetchModelServerVersionsResponse.page_token] in a subsequent
	// `FetchModelServerVersionsResponse` call to retrieve the next page of
	// results. If this field is omitted or empty, then there are no more results
	// to return.
	NextPageToken string `protobuf:"bytes,2,opt,name=next_page_token,json=nextPageToken,proto3" json:"next_page_token,omitempty"`
	// contains filtered or unexported fields
}

Response message for [GkeInferenceQuickstart.FetchModelServerVersions][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServerVersions].

func (*FetchModelServerVersionsResponse) Descriptor

func (*FetchModelServerVersionsResponse) Descriptor() ([]byte, []int)

Deprecated: Use FetchModelServerVersionsResponse.ProtoReflect.Descriptor instead.

func (*FetchModelServerVersionsResponse) GetModelServerVersions

func (x *FetchModelServerVersionsResponse) GetModelServerVersions() []string

func (*FetchModelServerVersionsResponse) GetNextPageToken

func (x *FetchModelServerVersionsResponse) GetNextPageToken() string

func (*FetchModelServerVersionsResponse) ProtoMessage

func (*FetchModelServerVersionsResponse) ProtoMessage()

func (*FetchModelServerVersionsResponse) ProtoReflect

func (*FetchModelServerVersionsResponse) Reset

func (*FetchModelServerVersionsResponse) String

FetchModelServersRequest

type FetchModelServersRequest struct {

	// Required. The model for which to list model servers. Open-source models
	// follow the Huggingface Hub `owner/model_name` format. Use
	// [GkeInferenceQuickstart.FetchModels][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModels]
	// to find available models.
	Model string `protobuf:"bytes,1,opt,name=model,proto3" json:"model,omitempty"`
	// Optional. The target number of results to return in a single response.
	// If not specified, a default value will be chosen by the service.
	// Note that the response may include a partial list and a caller should
	// only rely on the response's
	// [next_page_token][google.cloud.gkerecommender.v1.FetchModelServersResponse.next_page_token]
	// to determine if there are more instances left to be queried.
	PageSize *int32 `protobuf:"varint,2,opt,name=page_size,json=pageSize,proto3,oneof" json:"page_size,omitempty"`
	// Optional. The value of
	// [next_page_token][google.cloud.gkerecommender.v1.FetchModelServersResponse.next_page_token]
	// received from a previous `FetchModelServersRequest` call.
	// Provide this to retrieve the subsequent page in a multi-page list of
	// results. When paginating, all other parameters provided to
	// `FetchModelServersRequest` must match the call that provided the page
	// token.
	PageToken *string `protobuf:"bytes,3,opt,name=page_token,json=pageToken,proto3,oneof" json:"page_token,omitempty"`
	// contains filtered or unexported fields
}

Request message for [GkeInferenceQuickstart.FetchModelServers][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServers].

func (*FetchModelServersRequest) Descriptor

func (*FetchModelServersRequest) Descriptor() ([]byte, []int)

Deprecated: Use FetchModelServersRequest.ProtoReflect.Descriptor instead.

func (*FetchModelServersRequest) GetModel

func (x *FetchModelServersRequest) GetModel() string

func (*FetchModelServersRequest) GetPageSize

func (x *FetchModelServersRequest) GetPageSize() int32

func (*FetchModelServersRequest) GetPageToken

func (x *FetchModelServersRequest) GetPageToken() string

func (*FetchModelServersRequest) ProtoMessage

func (*FetchModelServersRequest) ProtoMessage()

func (*FetchModelServersRequest) ProtoReflect

func (x *FetchModelServersRequest) ProtoReflect() protoreflect.Message

func (*FetchModelServersRequest) Reset

func (x *FetchModelServersRequest) Reset()

func (*FetchModelServersRequest) String

func (x *FetchModelServersRequest) String() string

FetchModelServersResponse

type FetchModelServersResponse struct {

	// Output only. List of available model servers. Open-source model servers use
	// simplified, lowercase names (e.g., `vllm`).
	ModelServers []string `protobuf:"bytes,1,rep,name=model_servers,json=modelServers,proto3" json:"model_servers,omitempty"`
	// Output only. A token which may be sent as
	// [page_token][FetchModelServersResponse.page_token] in a subsequent
	// `FetchModelServersResponse` call to retrieve the next page of results.
	// If this field is omitted or empty, then there are no more results to
	// return.
	NextPageToken string `protobuf:"bytes,2,opt,name=next_page_token,json=nextPageToken,proto3" json:"next_page_token,omitempty"`
	// contains filtered or unexported fields
}

Response message for [GkeInferenceQuickstart.FetchModelServers][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServers].

func (*FetchModelServersResponse) Descriptor

func (*FetchModelServersResponse) Descriptor() ([]byte, []int)

Deprecated: Use FetchModelServersResponse.ProtoReflect.Descriptor instead.

func (*FetchModelServersResponse) GetModelServers

func (x *FetchModelServersResponse) GetModelServers() []string

func (*FetchModelServersResponse) GetNextPageToken

func (x *FetchModelServersResponse) GetNextPageToken() string

func (*FetchModelServersResponse) ProtoMessage

func (*FetchModelServersResponse) ProtoMessage()

func (*FetchModelServersResponse) ProtoReflect

func (*FetchModelServersResponse) Reset

func (x *FetchModelServersResponse) Reset()

func (*FetchModelServersResponse) String

func (x *FetchModelServersResponse) String() string

FetchModelsRequest

type FetchModelsRequest struct {

	// Optional. The target number of results to return in a single response.
	// If not specified, a default value will be chosen by the service.
	// Note that the response may include a partial list and a caller should
	// only rely on the response's
	// [next_page_token][google.cloud.gkerecommender.v1.FetchModelsResponse.next_page_token]
	// to determine if there are more instances left to be queried.
	PageSize *int32 `protobuf:"varint,1,opt,name=page_size,json=pageSize,proto3,oneof" json:"page_size,omitempty"`
	// Optional. The value of
	// [next_page_token][google.cloud.gkerecommender.v1.FetchModelsResponse.next_page_token]
	// received from a previous `FetchModelsRequest` call.
	// Provide this to retrieve the subsequent page in a multi-page list of
	// results. When paginating, all other parameters provided to
	// `FetchModelsRequest` must match the call that provided the page token.
	PageToken *string `protobuf:"bytes,2,opt,name=page_token,json=pageToken,proto3,oneof" json:"page_token,omitempty"`
	// contains filtered or unexported fields
}

Request message for [GkeInferenceQuickstart.FetchModels][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModels].

func (*FetchModelsRequest) Descriptor

func (*FetchModelsRequest) Descriptor() ([]byte, []int)

Deprecated: Use FetchModelsRequest.ProtoReflect.Descriptor instead.

func (*FetchModelsRequest) GetPageSize

func (x *FetchModelsRequest) GetPageSize() int32

func (*FetchModelsRequest) GetPageToken

func (x *FetchModelsRequest) GetPageToken() string

func (*FetchModelsRequest) ProtoMessage

func (*FetchModelsRequest) ProtoMessage()

func (*FetchModelsRequest) ProtoReflect

func (x *FetchModelsRequest) ProtoReflect() protoreflect.Message

func (*FetchModelsRequest) Reset

func (x *FetchModelsRequest) Reset()

func (*FetchModelsRequest) String

func (x *FetchModelsRequest) String() string

FetchModelsResponse

type FetchModelsResponse struct {

	// Output only. List of available models. Open-source models follow the
	// Huggingface Hub `owner/model_name` format.
	Models []string `protobuf:"bytes,1,rep,name=models,proto3" json:"models,omitempty"`
	// Output only. A token which may be sent as
	// [page_token][FetchModelsResponse.page_token] in a subsequent
	// `FetchModelsResponse` call to retrieve the next page of results.
	// If this field is omitted or empty, then there are no more results to
	// return.
	NextPageToken string `protobuf:"bytes,2,opt,name=next_page_token,json=nextPageToken,proto3" json:"next_page_token,omitempty"`
	// contains filtered or unexported fields
}

Response message for [GkeInferenceQuickstart.FetchModels][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModels].

func (*FetchModelsResponse) Descriptor

func (*FetchModelsResponse) Descriptor() ([]byte, []int)

Deprecated: Use FetchModelsResponse.ProtoReflect.Descriptor instead.

func (*FetchModelsResponse) GetModels

func (x *FetchModelsResponse) GetModels() []string

func (*FetchModelsResponse) GetNextPageToken

func (x *FetchModelsResponse) GetNextPageToken() string

func (*FetchModelsResponse) ProtoMessage

func (*FetchModelsResponse) ProtoMessage()

func (*FetchModelsResponse) ProtoReflect

func (x *FetchModelsResponse) ProtoReflect() protoreflect.Message

func (*FetchModelsResponse) Reset

func (x *FetchModelsResponse) Reset()

func (*FetchModelsResponse) String

func (x *FetchModelsResponse) String() string

FetchProfilesRequest

type FetchProfilesRequest struct {

	// Optional. The model to filter profiles by. Open-source models follow the
	// Huggingface Hub `owner/model_name` format. If not provided, all models are
	// returned. Use
	// [GkeInferenceQuickstart.FetchModels][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModels]
	// to find available models.
	Model string `protobuf:"bytes,1,opt,name=model,proto3" json:"model,omitempty"`
	// Optional. The model server to filter profiles by. If not provided, all
	// model servers are returned. Use
	// [GkeInferenceQuickstart.FetchModelServers][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServers]
	// to find available model servers for a given model.
	ModelServer string `protobuf:"bytes,2,opt,name=model_server,json=modelServer,proto3" json:"model_server,omitempty"`
	// Optional. The model server version to filter profiles by. If not provided,
	// all model server versions are returned. Use
	// [GkeInferenceQuickstart.FetchModelServerVersions][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServerVersions]
	// to find available versions for a given model and server.
	ModelServerVersion string `protobuf:"bytes,3,opt,name=model_server_version,json=modelServerVersion,proto3" json:"model_server_version,omitempty"`
	// Optional. The performance requirements to filter profiles. Profiles that do
	// not meet these requirements are filtered out. If not provided, all profiles
	// are returned.
	PerformanceRequirements *PerformanceRequirements `protobuf:"bytes,4,opt,name=performance_requirements,json=performanceRequirements,proto3" json:"performance_requirements,omitempty"`
	// Optional. The target number of results to return in a single response. If
	// not specified, a default value will be chosen by the service. Note that the
	// response may include a partial list and a caller should only rely on the
	// response's
	// [next_page_token][google.cloud.gkerecommender.v1.FetchProfilesResponse.next_page_token]
	// to determine if there are more instances left to be queried.
	PageSize *int32 `protobuf:"varint,5,opt,name=page_size,json=pageSize,proto3,oneof" json:"page_size,omitempty"`
	// Optional. The value of
	// [next_page_token][google.cloud.gkerecommender.v1.FetchProfilesResponse.next_page_token]
	// received from a previous `FetchProfilesRequest` call.
	// Provide this to retrieve the subsequent page in a multi-page list of
	// results. When paginating, all other parameters provided to
	// `FetchProfilesRequest` must match the call that provided the page
	// token.
	PageToken *string `protobuf:"bytes,6,opt,name=page_token,json=pageToken,proto3,oneof" json:"page_token,omitempty"`
	// contains filtered or unexported fields
}

Request message for [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles].

func (*FetchProfilesRequest) Descriptor

func (*FetchProfilesRequest) Descriptor() ([]byte, []int)

Deprecated: Use FetchProfilesRequest.ProtoReflect.Descriptor instead.

func (*FetchProfilesRequest) GetModel

func (x *FetchProfilesRequest) GetModel() string

func (*FetchProfilesRequest) GetModelServer

func (x *FetchProfilesRequest) GetModelServer() string

func (*FetchProfilesRequest) GetModelServerVersion

func (x *FetchProfilesRequest) GetModelServerVersion() string

func (*FetchProfilesRequest) GetPageSize

func (x *FetchProfilesRequest) GetPageSize() int32

func (*FetchProfilesRequest) GetPageToken

func (x *FetchProfilesRequest) GetPageToken() string

func (*FetchProfilesRequest) GetPerformanceRequirements

func (x *FetchProfilesRequest) GetPerformanceRequirements() *PerformanceRequirements

func (*FetchProfilesRequest) ProtoMessage

func (*FetchProfilesRequest) ProtoMessage()

func (*FetchProfilesRequest) ProtoReflect

func (x *FetchProfilesRequest) ProtoReflect() protoreflect.Message

func (*FetchProfilesRequest) Reset

func (x *FetchProfilesRequest) Reset()

func (*FetchProfilesRequest) String

func (x *FetchProfilesRequest) String() string

FetchProfilesResponse

type FetchProfilesResponse struct {

	// Output only. List of profiles that match the given model server info and
	// performance requirements (if provided).
	Profile []*Profile `protobuf:"bytes,1,rep,name=profile,proto3" json:"profile,omitempty"`
	// Output only. The combined range of performance values observed across all
	// profiles in this response.
	PerformanceRange *PerformanceRange `protobuf:"bytes,2,opt,name=performance_range,json=performanceRange,proto3" json:"performance_range,omitempty"`
	// Output only. Additional comments related to the response.
	Comments string `protobuf:"bytes,3,opt,name=comments,proto3" json:"comments,omitempty"`
	// Output only. A token which may be sent as
	// [page_token][FetchProfilesResponse.page_token] in a subsequent
	// `FetchProfilesResponse` call to retrieve the next page of results. If this
	// field is omitted or empty, then there are no more results to return.
	NextPageToken string `protobuf:"bytes,4,opt,name=next_page_token,json=nextPageToken,proto3" json:"next_page_token,omitempty"`
	// contains filtered or unexported fields
}

Response message for [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles].

func (*FetchProfilesResponse) Descriptor

func (*FetchProfilesResponse) Descriptor() ([]byte, []int)

Deprecated: Use FetchProfilesResponse.ProtoReflect.Descriptor instead.

func (*FetchProfilesResponse) GetComments

func (x *FetchProfilesResponse) GetComments() string

func (*FetchProfilesResponse) GetNextPageToken

func (x *FetchProfilesResponse) GetNextPageToken() string

func (*FetchProfilesResponse) GetPerformanceRange

func (x *FetchProfilesResponse) GetPerformanceRange() *PerformanceRange

func (*FetchProfilesResponse) GetProfile

func (x *FetchProfilesResponse) GetProfile() []*Profile

func (*FetchProfilesResponse) ProtoMessage

func (*FetchProfilesResponse) ProtoMessage()

func (*FetchProfilesResponse) ProtoReflect

func (x *FetchProfilesResponse) ProtoReflect() protoreflect.Message

func (*FetchProfilesResponse) Reset

func (x *FetchProfilesResponse) Reset()

func (*FetchProfilesResponse) String

func (x *FetchProfilesResponse) String() string

GenerateOptimizedManifestRequest

type GenerateOptimizedManifestRequest struct {

	// Required. The model server configuration to generate the manifest for. Use
	// [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles]
	// to find valid configurations.
	ModelServerInfo *ModelServerInfo `protobuf:"bytes,1,opt,name=model_server_info,json=modelServerInfo,proto3" json:"model_server_info,omitempty"`
	// Required. The accelerator type. Use
	// [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles]
	// to find valid accelerators for a given `model_server_info`.
	AcceleratorType string `protobuf:"bytes,2,opt,name=accelerator_type,json=acceleratorType,proto3" json:"accelerator_type,omitempty"`
	// Optional. The kubernetes namespace to deploy the manifests in.
	KubernetesNamespace string `protobuf:"bytes,3,opt,name=kubernetes_namespace,json=kubernetesNamespace,proto3" json:"kubernetes_namespace,omitempty"`
	// Optional. The performance requirements to use for generating Horizontal Pod
	// Autoscaler (HPA) resources. If provided, the manifest includes HPA
	// resources to adjust the model server replica count to maintain the
	// specified targets (e.g., NTPOT, TTFT) at a P50 latency. Cost targets are
	// not currently supported for HPA generation. If the specified targets are
	// not achievable, the HPA manifest will not be generated.
	PerformanceRequirements *PerformanceRequirements `protobuf:"bytes,4,opt,name=performance_requirements,json=performanceRequirements,proto3" json:"performance_requirements,omitempty"`
	// Optional. The storage configuration for the model. If not provided, the
	// model is loaded from Huggingface.
	StorageConfig *StorageConfig `protobuf:"bytes,5,opt,name=storage_config,json=storageConfig,proto3" json:"storage_config,omitempty"`
	// contains filtered or unexported fields
}

Request message for [GkeInferenceQuickstart.GenerateOptimizedManifest][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.GenerateOptimizedManifest].

func (*GenerateOptimizedManifestRequest) Descriptor

func (*GenerateOptimizedManifestRequest) Descriptor() ([]byte, []int)

Deprecated: Use GenerateOptimizedManifestRequest.ProtoReflect.Descriptor instead.

func (*GenerateOptimizedManifestRequest) GetAcceleratorType

func (x *GenerateOptimizedManifestRequest) GetAcceleratorType() string

func (*GenerateOptimizedManifestRequest) GetKubernetesNamespace

func (x *GenerateOptimizedManifestRequest) GetKubernetesNamespace() string

func (*GenerateOptimizedManifestRequest) GetModelServerInfo

func (x *GenerateOptimizedManifestRequest) GetModelServerInfo() *ModelServerInfo

func (*GenerateOptimizedManifestRequest) GetPerformanceRequirements

func (x *GenerateOptimizedManifestRequest) GetPerformanceRequirements() *PerformanceRequirements

func (*GenerateOptimizedManifestRequest) GetStorageConfig

func (x *GenerateOptimizedManifestRequest) GetStorageConfig() *StorageConfig

func (*GenerateOptimizedManifestRequest) ProtoMessage

func (*GenerateOptimizedManifestRequest) ProtoMessage()

func (*GenerateOptimizedManifestRequest) ProtoReflect

func (*GenerateOptimizedManifestRequest) Reset

func (*GenerateOptimizedManifestRequest) String

GenerateOptimizedManifestResponse

type GenerateOptimizedManifestResponse struct {

	// Output only. A list of generated Kubernetes manifests.
	KubernetesManifests []*KubernetesManifest `protobuf:"bytes,1,rep,name=kubernetes_manifests,json=kubernetesManifests,proto3" json:"kubernetes_manifests,omitempty"`
	// Output only. Comments related to deploying the generated manifests.
	Comments []string `protobuf:"bytes,2,rep,name=comments,proto3" json:"comments,omitempty"`
	// Output only. Additional information about the versioned dependencies used
	// to generate the manifests. See [Run best practice inference with GKE
	// Inference Quickstart
	// recipes](https://cloud.google.com/kubernetes-engine/docs/how-to/machine-learning/inference/inference-quickstart)
	// for details.
	ManifestVersion string `protobuf:"bytes,3,opt,name=manifest_version,json=manifestVersion,proto3" json:"manifest_version,omitempty"`
	// contains filtered or unexported fields
}

Response message for [GkeInferenceQuickstart.GenerateOptimizedManifest][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.GenerateOptimizedManifest].

func (*GenerateOptimizedManifestResponse) Descriptor

func (*GenerateOptimizedManifestResponse) Descriptor() ([]byte, []int)

Deprecated: Use GenerateOptimizedManifestResponse.ProtoReflect.Descriptor instead.

func (*GenerateOptimizedManifestResponse) GetComments

func (x *GenerateOptimizedManifestResponse) GetComments() []string

func (*GenerateOptimizedManifestResponse) GetKubernetesManifests

func (x *GenerateOptimizedManifestResponse) GetKubernetesManifests() []*KubernetesManifest

func (*GenerateOptimizedManifestResponse) GetManifestVersion

func (x *GenerateOptimizedManifestResponse) GetManifestVersion() string

func (*GenerateOptimizedManifestResponse) ProtoMessage

func (*GenerateOptimizedManifestResponse) ProtoMessage()

func (*GenerateOptimizedManifestResponse) ProtoReflect

func (*GenerateOptimizedManifestResponse) Reset

func (*GenerateOptimizedManifestResponse) String

GkeInferenceQuickstartClient

type GkeInferenceQuickstartClient interface {
	// Fetches available models. Open-source models follow the Huggingface Hub
	// `owner/model_name` format.
	FetchModels(ctx context.Context, in *FetchModelsRequest, opts ...grpc.CallOption) (*FetchModelsResponse, error)
	// Fetches available model servers. Open-source model servers use simplified,
	// lowercase names (e.g., `vllm`).
	FetchModelServers(ctx context.Context, in *FetchModelServersRequest, opts ...grpc.CallOption) (*FetchModelServersResponse, error)
	// Fetches available model server versions. Open-source servers use their own
	// versioning schemas (e.g., `vllm` uses semver like `v1.0.0`).
	//
	// Some model servers have different versioning schemas depending on the
	// accelerator. For example, `vllm` uses semver on GPUs, but returns nightly
	// build tags on TPUs. All available versions will be returned when different
	// schemas are present.
	FetchModelServerVersions(ctx context.Context, in *FetchModelServerVersionsRequest, opts ...grpc.CallOption) (*FetchModelServerVersionsResponse, error)
	// Fetches available profiles. A profile contains performance metrics and
	// cost information for a specific model server setup. Profiles can be
	// filtered by parameters. If no filters are provided, all profiles are
	// returned.
	//
	// Profiles display a single value per performance metric based on the
	// provided performance requirements. If no requirements are given, the
	// metrics represent the inflection point. See [Run best practice inference
	// with GKE Inference Quickstart
	// recipes](https://cloud.google.com/kubernetes-engine/docs/how-to/machine-learning/inference/inference-quickstart#how)
	// for details.
	FetchProfiles(ctx context.Context, in *FetchProfilesRequest, opts ...grpc.CallOption) (*FetchProfilesResponse, error)
	// Generates an optimized deployment manifest for a given model and model
	// server, based on the specified accelerator, performance targets, and
	// configurations. See [Run best practice inference with GKE Inference
	// Quickstart
	// recipes](https://cloud.google.com/kubernetes-engine/docs/how-to/machine-learning/inference/inference-quickstart)
	// for deployment details.
	GenerateOptimizedManifest(ctx context.Context, in *GenerateOptimizedManifestRequest, opts ...grpc.CallOption) (*GenerateOptimizedManifestResponse, error)
	// Fetches all of the benchmarking data available for a profile. Benchmarking
	// data returns all of the performance metrics available for a given model
	// server setup on a given instance type.
	FetchBenchmarkingData(ctx context.Context, in *FetchBenchmarkingDataRequest, opts ...grpc.CallOption) (*FetchBenchmarkingDataResponse, error)
}

GkeInferenceQuickstartClient is the client API for GkeInferenceQuickstart service.

For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.

func NewGkeInferenceQuickstartClient

func NewGkeInferenceQuickstartClient(cc grpc.ClientConnInterface) GkeInferenceQuickstartClient

GkeInferenceQuickstartServer

type GkeInferenceQuickstartServer interface {
	// Fetches available models. Open-source models follow the Huggingface Hub
	// `owner/model_name` format.
	FetchModels(context.Context, *FetchModelsRequest) (*FetchModelsResponse, error)
	// Fetches available model servers. Open-source model servers use simplified,
	// lowercase names (e.g., `vllm`).
	FetchModelServers(context.Context, *FetchModelServersRequest) (*FetchModelServersResponse, error)
	// Fetches available model server versions. Open-source servers use their own
	// versioning schemas (e.g., `vllm` uses semver like `v1.0.0`).
	//
	// Some model servers have different versioning schemas depending on the
	// accelerator. For example, `vllm` uses semver on GPUs, but returns nightly
	// build tags on TPUs. All available versions will be returned when different
	// schemas are present.
	FetchModelServerVersions(context.Context, *FetchModelServerVersionsRequest) (*FetchModelServerVersionsResponse, error)
	// Fetches available profiles. A profile contains performance metrics and
	// cost information for a specific model server setup. Profiles can be
	// filtered by parameters. If no filters are provided, all profiles are
	// returned.
	//
	// Profiles display a single value per performance metric based on the
	// provided performance requirements. If no requirements are given, the
	// metrics represent the inflection point. See [Run best practice inference
	// with GKE Inference Quickstart
	// recipes](https://cloud.google.com/kubernetes-engine/docs/how-to/machine-learning/inference/inference-quickstart#how)
	// for details.
	FetchProfiles(context.Context, *FetchProfilesRequest) (*FetchProfilesResponse, error)
	// Generates an optimized deployment manifest for a given model and model
	// server, based on the specified accelerator, performance targets, and
	// configurations. See [Run best practice inference with GKE Inference
	// Quickstart
	// recipes](https://cloud.google.com/kubernetes-engine/docs/how-to/machine-learning/inference/inference-quickstart)
	// for deployment details.
	GenerateOptimizedManifest(context.Context, *GenerateOptimizedManifestRequest) (*GenerateOptimizedManifestResponse, error)
	// Fetches all of the benchmarking data available for a profile. Benchmarking
	// data returns all of the performance metrics available for a given model
	// server setup on a given instance type.
	FetchBenchmarkingData(context.Context, *FetchBenchmarkingDataRequest) (*FetchBenchmarkingDataResponse, error)
}

GkeInferenceQuickstartServer is the server API for GkeInferenceQuickstart service. All implementations should embed UnimplementedGkeInferenceQuickstartServer for forward compatibility

KubernetesManifest

type KubernetesManifest struct {

	// Output only. Kubernetes resource kind.
	Kind string `protobuf:"bytes,1,opt,name=kind,proto3" json:"kind,omitempty"`
	// Output only. Kubernetes API version.
	ApiVersion string `protobuf:"bytes,2,opt,name=api_version,json=apiVersion,proto3" json:"api_version,omitempty"`
	// Output only. YAML content.
	Content string `protobuf:"bytes,3,opt,name=content,proto3" json:"content,omitempty"`
	// contains filtered or unexported fields
}

A Kubernetes manifest.

func (*KubernetesManifest) Descriptor

func (*KubernetesManifest) Descriptor() ([]byte, []int)

Deprecated: Use KubernetesManifest.ProtoReflect.Descriptor instead.

func (*KubernetesManifest) GetApiVersion

func (x *KubernetesManifest) GetApiVersion() string

func (*KubernetesManifest) GetContent

func (x *KubernetesManifest) GetContent() string

func (*KubernetesManifest) GetKind

func (x *KubernetesManifest) GetKind() string

func (*KubernetesManifest) ProtoMessage

func (*KubernetesManifest) ProtoMessage()

func (*KubernetesManifest) ProtoReflect

func (x *KubernetesManifest) ProtoReflect() protoreflect.Message

func (*KubernetesManifest) Reset

func (x *KubernetesManifest) Reset()

func (*KubernetesManifest) String

func (x *KubernetesManifest) String() string

MillisecondRange

type MillisecondRange struct {

	// Output only. The minimum value of the range.
	Min int32 `protobuf:"varint,1,opt,name=min,proto3" json:"min,omitempty"`
	// Output only. The maximum value of the range.
	Max int32 `protobuf:"varint,2,opt,name=max,proto3" json:"max,omitempty"`
	// contains filtered or unexported fields
}

Represents a range of latency values in milliseconds.

func (*MillisecondRange) Descriptor

func (*MillisecondRange) Descriptor() ([]byte, []int)

Deprecated: Use MillisecondRange.ProtoReflect.Descriptor instead.

func (*MillisecondRange) GetMax

func (x *MillisecondRange) GetMax() int32

func (*MillisecondRange) GetMin

func (x *MillisecondRange) GetMin() int32

func (*MillisecondRange) ProtoMessage

func (*MillisecondRange) ProtoMessage()

func (*MillisecondRange) ProtoReflect

func (x *MillisecondRange) ProtoReflect() protoreflect.Message

func (*MillisecondRange) Reset

func (x *MillisecondRange) Reset()

func (*MillisecondRange) String

func (x *MillisecondRange) String() string

ModelServerInfo

type ModelServerInfo struct {

	// Required. The model. Open-source models follow the Huggingface Hub
	// `owner/model_name` format. Use
	// [GkeInferenceQuickstart.FetchModels][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModels]
	// to find available models.
	Model string `protobuf:"bytes,1,opt,name=model,proto3" json:"model,omitempty"`
	// Required. The model server. Open-source model servers use simplified,
	// lowercase names (e.g., `vllm`). Use
	// [GkeInferenceQuickstart.FetchModelServers][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServers]
	// to find available servers.
	ModelServer string `protobuf:"bytes,2,opt,name=model_server,json=modelServer,proto3" json:"model_server,omitempty"`
	// Optional. The model server version. Use
	// [GkeInferenceQuickstart.FetchModelServerVersions][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServerVersions]
	// to find available versions. If not provided, the latest available version
	// is used.
	ModelServerVersion string `protobuf:"bytes,3,opt,name=model_server_version,json=modelServerVersion,proto3" json:"model_server_version,omitempty"`
	// contains filtered or unexported fields
}

Model server information gives. Valid model server info combinations can be found using [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles].

func (*ModelServerInfo) Descriptor

func (*ModelServerInfo) Descriptor() ([]byte, []int)

Deprecated: Use ModelServerInfo.ProtoReflect.Descriptor instead.

func (*ModelServerInfo) GetModel

func (x *ModelServerInfo) GetModel() string

func (*ModelServerInfo) GetModelServer

func (x *ModelServerInfo) GetModelServer() string

func (*ModelServerInfo) GetModelServerVersion

func (x *ModelServerInfo) GetModelServerVersion() string

func (*ModelServerInfo) ProtoMessage

func (*ModelServerInfo) ProtoMessage()

func (*ModelServerInfo) ProtoReflect

func (x *ModelServerInfo) ProtoReflect() protoreflect.Message

func (*ModelServerInfo) Reset

func (x *ModelServerInfo) Reset()

func (*ModelServerInfo) String

func (x *ModelServerInfo) String() string

PerformanceRange

type PerformanceRange struct {

	// Output only. The range of throughput in output tokens per second. This is
	// measured as total_output_tokens_generated_by_server /
	// elapsed_time_in_seconds.
	ThroughputOutputRange *TokensPerSecondRange `protobuf:"bytes,1,opt,name=throughput_output_range,json=throughputOutputRange,proto3" json:"throughput_output_range,omitempty"`
	// Output only. The range of TTFT (Time To First Token) in milliseconds. TTFT
	// is the time it takes to generate the first token for a request.
	TtftRange *MillisecondRange `protobuf:"bytes,2,opt,name=ttft_range,json=ttftRange,proto3" json:"ttft_range,omitempty"`
	// Output only. The range of NTPOT (Normalized Time Per Output Token) in
	// milliseconds. NTPOT is the request latency normalized by the number of
	// output tokens, measured as request_latency / total_output_tokens.
	NtpotRange *MillisecondRange `protobuf:"bytes,3,opt,name=ntpot_range,json=ntpotRange,proto3" json:"ntpot_range,omitempty"`
	// contains filtered or unexported fields
}

Performance range for a model deployment.

func (*PerformanceRange) Descriptor

func (*PerformanceRange) Descriptor() ([]byte, []int)

Deprecated: Use PerformanceRange.ProtoReflect.Descriptor instead.

func (*PerformanceRange) GetNtpotRange

func (x *PerformanceRange) GetNtpotRange() *MillisecondRange

func (*PerformanceRange) GetThroughputOutputRange

func (x *PerformanceRange) GetThroughputOutputRange() *TokensPerSecondRange

func (*PerformanceRange) GetTtftRange

func (x *PerformanceRange) GetTtftRange() *MillisecondRange

func (*PerformanceRange) ProtoMessage

func (*PerformanceRange) ProtoMessage()

func (*PerformanceRange) ProtoReflect

func (x *PerformanceRange) ProtoReflect() protoreflect.Message

func (*PerformanceRange) Reset

func (x *PerformanceRange) Reset()

func (*PerformanceRange) String

func (x *PerformanceRange) String() string

PerformanceRequirements

type PerformanceRequirements struct {

	// Optional. The target Normalized Time Per Output Token (NTPOT) in
	// milliseconds. NTPOT is calculated as `request_latency /
	// total_output_tokens`. If not provided, this target will not be enforced.
	TargetNtpotMilliseconds *int32 `protobuf:"varint,1,opt,name=target_ntpot_milliseconds,json=targetNtpotMilliseconds,proto3,oneof" json:"target_ntpot_milliseconds,omitempty"`
	// Optional. The target Time To First Token (TTFT) in milliseconds. TTFT is
	// the time it takes to generate the first token for a request.  If not
	// provided, this target will not be enforced.
	TargetTtftMilliseconds *int32 `protobuf:"varint,2,opt,name=target_ttft_milliseconds,json=targetTtftMilliseconds,proto3,oneof" json:"target_ttft_milliseconds,omitempty"`
	// Optional. The target cost for running a profile's model server. If not
	// provided, this requirement will not be enforced.
	TargetCost *Cost `protobuf:"bytes,3,opt,name=target_cost,json=targetCost,proto3" json:"target_cost,omitempty"`
	// contains filtered or unexported fields
}

Performance requirements for a profile and or model deployment.

func (*PerformanceRequirements) Descriptor

func (*PerformanceRequirements) Descriptor() ([]byte, []int)

Deprecated: Use PerformanceRequirements.ProtoReflect.Descriptor instead.

func (*PerformanceRequirements) GetTargetCost

func (x *PerformanceRequirements) GetTargetCost() *Cost

func (*PerformanceRequirements) GetTargetNtpotMilliseconds

func (x *PerformanceRequirements) GetTargetNtpotMilliseconds() int32

func (*PerformanceRequirements) GetTargetTtftMilliseconds

func (x *PerformanceRequirements) GetTargetTtftMilliseconds() int32

func (*PerformanceRequirements) ProtoMessage

func (*PerformanceRequirements) ProtoMessage()

func (*PerformanceRequirements) ProtoReflect

func (x *PerformanceRequirements) ProtoReflect() protoreflect.Message

func (*PerformanceRequirements) Reset

func (x *PerformanceRequirements) Reset()

func (*PerformanceRequirements) String

func (x *PerformanceRequirements) String() string

PerformanceStats

type PerformanceStats struct {

	// Output only. The number of queries per second.
	// Note: This metric can vary widely based on context length and may not be a
	// reliable measure of LLM throughput.
	QueriesPerSecond float32 `protobuf:"fixed32,1,opt,name=queries_per_second,json=queriesPerSecond,proto3" json:"queries_per_second,omitempty"`
	// Output only. The number of output tokens per second. This is the throughput
	// measured as total_output_tokens_generated_by_server /
	// elapsed_time_in_seconds.
	OutputTokensPerSecond int32 `protobuf:"varint,2,opt,name=output_tokens_per_second,json=outputTokensPerSecond,proto3" json:"output_tokens_per_second,omitempty"`
	// Output only. The Normalized Time Per Output Token (NTPOT) in milliseconds.
	// This is the request latency normalized by the number of output tokens,
	// measured as request_latency / total_output_tokens.
	NtpotMilliseconds int32 `protobuf:"varint,3,opt,name=ntpot_milliseconds,json=ntpotMilliseconds,proto3" json:"ntpot_milliseconds,omitempty"`
	// Output only. The Time To First Token (TTFT) in milliseconds. This is the
	// time it takes to generate the first token for a request.
	TtftMilliseconds int32 `protobuf:"varint,4,opt,name=ttft_milliseconds,json=ttftMilliseconds,proto3" json:"ttft_milliseconds,omitempty"`
	// Output only. The cost of running the model deployment.
	Cost []*Cost `protobuf:"bytes,5,rep,name=cost,proto3" json:"cost,omitempty"`
	// contains filtered or unexported fields
}

Performance statistics for a model deployment.

func (*PerformanceStats) Descriptor

func (*PerformanceStats) Descriptor() ([]byte, []int)

Deprecated: Use PerformanceStats.ProtoReflect.Descriptor instead.

func (*PerformanceStats) GetCost

func (x *PerformanceStats) GetCost() []*Cost

func (*PerformanceStats) GetNtpotMilliseconds

func (x *PerformanceStats) GetNtpotMilliseconds() int32

func (*PerformanceStats) GetOutputTokensPerSecond

func (x *PerformanceStats) GetOutputTokensPerSecond() int32

func (*PerformanceStats) GetQueriesPerSecond

func (x *PerformanceStats) GetQueriesPerSecond() float32

func (*PerformanceStats) GetTtftMilliseconds

func (x *PerformanceStats) GetTtftMilliseconds() int32

func (*PerformanceStats) ProtoMessage

func (*PerformanceStats) ProtoMessage()

func (*PerformanceStats) ProtoReflect

func (x *PerformanceStats) ProtoReflect() protoreflect.Message

func (*PerformanceStats) Reset

func (x *PerformanceStats) Reset()

func (*PerformanceStats) String

func (x *PerformanceStats) String() string

Profile

type Profile struct {

	// Output only. The model server configuration. Use
	// [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles]
	// to find valid configurations.
	ModelServerInfo *ModelServerInfo `protobuf:"bytes,1,opt,name=model_server_info,json=modelServerInfo,proto3" json:"model_server_info,omitempty"`
	// Output only. The accelerator type. Expected format: `nvidia-h100-80gb`.
	AcceleratorType string `protobuf:"bytes,2,opt,name=accelerator_type,json=acceleratorType,proto3" json:"accelerator_type,omitempty"`
	// Output only. The TPU topology (if applicable).
	TpuTopology string `protobuf:"bytes,3,opt,name=tpu_topology,json=tpuTopology,proto3" json:"tpu_topology,omitempty"`
	// Output only. The instance type. Expected format: `a2-highgpu-1g`.
	InstanceType string `protobuf:"bytes,4,opt,name=instance_type,json=instanceType,proto3" json:"instance_type,omitempty"`
	// Output only. The resources used by the model deployment.
	ResourcesUsed *ResourcesUsed `protobuf:"bytes,5,opt,name=resources_used,json=resourcesUsed,proto3" json:"resources_used,omitempty"`
	// Output only. The performance statistics for this profile.
	PerformanceStats []*PerformanceStats `protobuf:"bytes,6,rep,name=performance_stats,json=performanceStats,proto3" json:"performance_stats,omitempty"`
	// contains filtered or unexported fields
}

A profile containing information about a model deployment.

func (*Profile) Descriptor

func (*Profile) Descriptor() ([]byte, []int)

Deprecated: Use Profile.ProtoReflect.Descriptor instead.

func (*Profile) GetAcceleratorType

func (x *Profile) GetAcceleratorType() string

func (*Profile) GetInstanceType

func (x *Profile) GetInstanceType() string

func (*Profile) GetModelServerInfo

func (x *Profile) GetModelServerInfo() *ModelServerInfo

func (*Profile) GetPerformanceStats

func (x *Profile) GetPerformanceStats() []*PerformanceStats

func (*Profile) GetResourcesUsed

func (x *Profile) GetResourcesUsed() *ResourcesUsed

func (*Profile) GetTpuTopology

func (x *Profile) GetTpuTopology() string

func (*Profile) ProtoMessage

func (*Profile) ProtoMessage()

func (*Profile) ProtoReflect

func (x *Profile) ProtoReflect() protoreflect.Message

func (*Profile) Reset

func (x *Profile) Reset()

func (*Profile) String

func (x *Profile) String() string

ResourcesUsed

type ResourcesUsed struct {

	// Output only. The number of accelerators (e.g., GPUs or TPUs) used by the
	// model deployment on the Kubernetes node.
	AcceleratorCount int32 `protobuf:"varint,1,opt,name=accelerator_count,json=acceleratorCount,proto3" json:"accelerator_count,omitempty"`
	// contains filtered or unexported fields
}

Resources used by a model deployment.

func (*ResourcesUsed) Descriptor

func (*ResourcesUsed) Descriptor() ([]byte, []int)

Deprecated: Use ResourcesUsed.ProtoReflect.Descriptor instead.

func (*ResourcesUsed) GetAcceleratorCount

func (x *ResourcesUsed) GetAcceleratorCount() int32

func (*ResourcesUsed) ProtoMessage

func (*ResourcesUsed) ProtoMessage()

func (*ResourcesUsed) ProtoReflect

func (x *ResourcesUsed) ProtoReflect() protoreflect.Message

func (*ResourcesUsed) Reset

func (x *ResourcesUsed) Reset()

func (*ResourcesUsed) String

func (x *ResourcesUsed) String() string

StorageConfig

type StorageConfig struct {

	// Optional. The Google Cloud Storage bucket URI to load the model from. This
	// URI must point to the directory containing the model's config file
	// (`config.json`) and model weights. A tuned GCSFuse setup can improve
	// LLM Pod startup time by more than 7x. Expected format:
	// `gs://

Storage configuration for a model deployment.

func (*StorageConfig) Descriptor

func (*StorageConfig) Descriptor() ([]byte, []int)

Deprecated: Use StorageConfig.ProtoReflect.Descriptor instead.

func (*StorageConfig) GetModelBucketUri

func (x *StorageConfig) GetModelBucketUri() string

func (*StorageConfig) GetXlaCacheBucketUri

func (x *StorageConfig) GetXlaCacheBucketUri() string

func (*StorageConfig) ProtoMessage

func (*StorageConfig) ProtoMessage()

func (*StorageConfig) ProtoReflect

func (x *StorageConfig) ProtoReflect() protoreflect.Message

func (*StorageConfig) Reset

func (x *StorageConfig) Reset()

func (*StorageConfig) String

func (x *StorageConfig) String() string

TokensPerSecondRange

type TokensPerSecondRange struct {

	// Output only. The minimum value of the range.
	Min int32 `protobuf:"varint,1,opt,name=min,proto3" json:"min,omitempty"`
	// Output only. The maximum value of the range.
	Max int32 `protobuf:"varint,2,opt,name=max,proto3" json:"max,omitempty"`
	// contains filtered or unexported fields
}

Represents a range of throughput values in tokens per second.

func (*TokensPerSecondRange) Descriptor

func (*TokensPerSecondRange) Descriptor() ([]byte, []int)

Deprecated: Use TokensPerSecondRange.ProtoReflect.Descriptor instead.

func (*TokensPerSecondRange) GetMax

func (x *TokensPerSecondRange) GetMax() int32

func (*TokensPerSecondRange) GetMin

func (x *TokensPerSecondRange) GetMin() int32

func (*TokensPerSecondRange) ProtoMessage

func (*TokensPerSecondRange) ProtoMessage()

func (*TokensPerSecondRange) ProtoReflect

func (x *TokensPerSecondRange) ProtoReflect() protoreflect.Message

func (*TokensPerSecondRange) Reset

func (x *TokensPerSecondRange) Reset()

func (*TokensPerSecondRange) String

func (x *TokensPerSecondRange) String() string

UnimplementedGkeInferenceQuickstartServer

type UnimplementedGkeInferenceQuickstartServer struct {
}

UnimplementedGkeInferenceQuickstartServer should be embedded to have forward compatible implementations.

func (UnimplementedGkeInferenceQuickstartServer) FetchBenchmarkingData

func (UnimplementedGkeInferenceQuickstartServer) FetchModelServerVersions

func (UnimplementedGkeInferenceQuickstartServer) FetchModelServers

func (UnimplementedGkeInferenceQuickstartServer) FetchModels

func (UnimplementedGkeInferenceQuickstartServer) FetchProfiles

func (UnimplementedGkeInferenceQuickstartServer) GenerateOptimizedManifest

UnsafeGkeInferenceQuickstartServer

type UnsafeGkeInferenceQuickstartServer interface {
	// contains filtered or unexported methods
}

UnsafeGkeInferenceQuickstartServer may be embedded to opt out of forward compatibility for this service. Use of this interface is not recommended, as added methods to GkeInferenceQuickstartServer will result in compilation errors.