GKE Recommender API v1 - Package cloud.google.com/go/gkerecommender/apiv1/gkerecommenderpb (v0.6.0)

Constants

GkeInferenceQuickstart_FetchModels_FullMethodName, GkeInferenceQuickstart_FetchModelServers_FullMethodName, GkeInferenceQuickstart_FetchModelServerVersions_FullMethodName, GkeInferenceQuickstart_FetchProfiles_FullMethodName, GkeInferenceQuickstart_GenerateOptimizedManifest_FullMethodName, GkeInferenceQuickstart_FetchBenchmarkingData_FullMethodName

const (
	GkeInferenceQuickstart_FetchModels_FullMethodName               = "/google.cloud.gkerecommender.v1.GkeInferenceQuickstart/FetchModels"
	GkeInferenceQuickstart_FetchModelServers_FullMethodName         = "/google.cloud.gkerecommender.v1.GkeInferenceQuickstart/FetchModelServers"
	GkeInferenceQuickstart_FetchModelServerVersions_FullMethodName  = "/google.cloud.gkerecommender.v1.GkeInferenceQuickstart/FetchModelServerVersions"
	GkeInferenceQuickstart_FetchProfiles_FullMethodName             = "/google.cloud.gkerecommender.v1.GkeInferenceQuickstart/FetchProfiles"
	GkeInferenceQuickstart_GenerateOptimizedManifest_FullMethodName = "/google.cloud.gkerecommender.v1.GkeInferenceQuickstart/GenerateOptimizedManifest"
	GkeInferenceQuickstart_FetchBenchmarkingData_FullMethodName     = "/google.cloud.gkerecommender.v1.GkeInferenceQuickstart/FetchBenchmarkingData"
)

Variables

File_google_cloud_gkerecommender_v1_gkerecommender_proto

var File_google_cloud_gkerecommender_v1_gkerecommender_proto protoreflect.FileDescriptor

GkeInferenceQuickstart_ServiceDesc

var GkeInferenceQuickstart_ServiceDesc = grpc.ServiceDesc{
	ServiceName: "google.cloud.gkerecommender.v1.GkeInferenceQuickstart",
	HandlerType: (*GkeInferenceQuickstartServer)(nil),
	Methods: []grpc.MethodDesc{
		{
			MethodName: "FetchModels",
			Handler:    _GkeInferenceQuickstart_FetchModels_Handler,
		},
		{
			MethodName: "FetchModelServers",
			Handler:    _GkeInferenceQuickstart_FetchModelServers_Handler,
		},
		{
			MethodName: "FetchModelServerVersions",
			Handler:    _GkeInferenceQuickstart_FetchModelServerVersions_Handler,
		},
		{
			MethodName: "FetchProfiles",
			Handler:    _GkeInferenceQuickstart_FetchProfiles_Handler,
		},
		{
			MethodName: "GenerateOptimizedManifest",
			Handler:    _GkeInferenceQuickstart_GenerateOptimizedManifest_Handler,
		},
		{
			MethodName: "FetchBenchmarkingData",
			Handler:    _GkeInferenceQuickstart_FetchBenchmarkingData_Handler,
		},
	},
	Streams:  []grpc.StreamDesc{},
	Metadata: "google/cloud/gkerecommender/v1/gkerecommender.proto",
}

GkeInferenceQuickstart_ServiceDesc is the grpc.ServiceDesc for GkeInferenceQuickstart service. It's only intended for direct use with grpc.RegisterService, and not to be introspected or modified (even as a copy)

Functions

func RegisterGkeInferenceQuickstartServer

func RegisterGkeInferenceQuickstartServer(s grpc.ServiceRegistrar, srv GkeInferenceQuickstartServer)

Amount

type Amount struct {

	// Output only. The whole units of the amount.
	// For example if `currencyCode` is `"USD"`, then 1 unit is one US dollar.
	Units int64 `protobuf:"varint,1,opt,name=units,proto3" json:"units,omitempty"`
	// Output only. Number of nano (10^-9) units of the amount.
	// The value must be between -999,999,999 and +999,999,999 inclusive.
	// If `units` is positive, `nanos` must be positive or zero.
	// If `units` is zero, `nanos` can be positive, zero, or negative.
	// If `units` is negative, `nanos` must be negative or zero.
	// For example $-1.75 is represented as `units`=-1 and `nanos`=-750,000,000.
	Nanos int32 `protobuf:"varint,2,opt,name=nanos,proto3" json:"nanos,omitempty"`
	// contains filtered or unexported fields
}

Represents an amount of money in a specific currency.

func (*Amount) Descriptor

func (*Amount) Descriptor() ([]byte, []int)

Deprecated: Use Amount.ProtoReflect.Descriptor instead.

func (*Amount) GetNanos

func (x *Amount) GetNanos() int32

func (*Amount) GetUnits

func (x *Amount) GetUnits() int64

func (*Amount) ProtoMessage

func (*Amount) ProtoMessage()

func (*Amount) ProtoReflect

func (x *Amount) ProtoReflect() protoreflect.Message

func (*Amount) Reset

func (x *Amount) Reset()

func (*Amount) String

func (x *Amount) String() string

Cost

type Cost struct {

	// Optional. The cost per million output tokens, calculated as:
	// $/output token = GPU $/s / (1/output-to-input-cost-ratio * input tokens/s +
	// output tokens/s)
	CostPerMillionOutputTokens *Amount `protobuf:"bytes,1,opt,name=cost_per_million_output_tokens,json=costPerMillionOutputTokens,proto3" json:"cost_per_million_output_tokens,omitempty"`
	// Optional. The cost per million input tokens. $/input token = ($/output
	// token) / output-to-input-cost-ratio.
	CostPerMillionInputTokens *Amount `protobuf:"bytes,2,opt,name=cost_per_million_input_tokens,json=costPerMillionInputTokens,proto3" json:"cost_per_million_input_tokens,omitempty"`
	// Optional. The pricing model used to calculate the cost. Can be one of:
	// `3-years-cud`, `1-year-cud`, `on-demand`, `spot`. If not provided, `spot`
	// will be used.
	PricingModel string `protobuf:"bytes,3,opt,name=pricing_model,json=pricingModel,proto3" json:"pricing_model,omitempty"`
	// Optional. The output-to-input cost ratio. This determines how the total GPU
	// cost is split between input and output tokens. If not provided, `4.0` is
	// used, assuming a 4:1 output:input cost ratio.
	OutputInputCostRatio *float32 `protobuf:"fixed32,4,opt,name=output_input_cost_ratio,json=outputInputCostRatio,proto3,oneof" json:"output_input_cost_ratio,omitempty"`
	// contains filtered or unexported fields
}

Cost for running a model deployment on a given instance type. Currently, only USD currency code is supported.

func (*Cost) Descriptor

func (*Cost) Descriptor() ([]byte, []int)

Deprecated: Use Cost.ProtoReflect.Descriptor instead.

func (*Cost) GetCostPerMillionInputTokens

func (x *Cost) GetCostPerMillionInputTokens() *Amount

func (*Cost) GetCostPerMillionOutputTokens

func (x *Cost) GetCostPerMillionOutputTokens() *Amount

func (*Cost) GetOutputInputCostRatio

func (x *Cost) GetOutputInputCostRatio() float32

func (*Cost) GetPricingModel

func (x *Cost) GetPricingModel() string

func (*Cost) ProtoMessage

func (*Cost) ProtoMessage()

func (*Cost) ProtoReflect

func (x *Cost) ProtoReflect() protoreflect.Message

func (*Cost) Reset

func (x *Cost) Reset()

func (*Cost) String

func (x *Cost) String() string

FetchBenchmarkingDataRequest

type FetchBenchmarkingDataRequest struct {

	// Required. The model server configuration to get benchmarking data for. Use
	// [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles]
	// to find valid configurations.
	ModelServerInfo *ModelServerInfo `protobuf:"bytes,1,opt,name=model_server_info,json=modelServerInfo,proto3" json:"model_server_info,omitempty"`
	// Optional. The instance type to filter benchmarking data. Instance types are
	// in the format `a2-highgpu-1g`. If not provided, all instance types for the
	// given profile's `model_server_info` will be returned. Use
	// [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles]
	// to find available instance types.
	InstanceType string `protobuf:"bytes,3,opt,name=instance_type,json=instanceType,proto3" json:"instance_type,omitempty"`
	// Optional. The pricing model to use for the benchmarking data. Defaults to
	// `spot`.
	PricingModel string `protobuf:"bytes,4,opt,name=pricing_model,json=pricingModel,proto3" json:"pricing_model,omitempty"`
	// contains filtered or unexported fields
}

Request message for [GkeInferenceQuickstart.FetchBenchmarkingData][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchBenchmarkingData].

func (*FetchBenchmarkingDataRequest) Descriptor

func (*FetchBenchmarkingDataRequest) Descriptor() ([]byte, []int)

Deprecated: Use FetchBenchmarkingDataRequest.ProtoReflect.Descriptor instead.

func (*FetchBenchmarkingDataRequest) GetInstanceType

func (x *FetchBenchmarkingDataRequest) GetInstanceType() string

func (*FetchBenchmarkingDataRequest) GetModelServerInfo

func (x *FetchBenchmarkingDataRequest) GetModelServerInfo() *ModelServerInfo

func (*FetchBenchmarkingDataRequest) GetPricingModel

func (x *FetchBenchmarkingDataRequest) GetPricingModel() string

func (*FetchBenchmarkingDataRequest) ProtoMessage

func (*FetchBenchmarkingDataRequest) ProtoMessage()

func (*FetchBenchmarkingDataRequest) ProtoReflect

func (x *FetchBenchmarkingDataRequest) ProtoReflect() protoreflect.Message

func (*FetchBenchmarkingDataRequest) Reset

func (x *FetchBenchmarkingDataRequest) Reset()

func (*FetchBenchmarkingDataRequest) String

func (x *FetchBenchmarkingDataRequest) String() string

FetchBenchmarkingDataResponse

type FetchBenchmarkingDataResponse struct {

	// Output only. List of profiles containing their respective benchmarking
	// data.
	Profile []*Profile `protobuf:"bytes,1,rep,name=profile,proto3" json:"profile,omitempty"`
	// contains filtered or unexported fields
}

Response message for [GkeInferenceQuickstart.FetchBenchmarkingData][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchBenchmarkingData].

func (*FetchBenchmarkingDataResponse) Descriptor

func (*FetchBenchmarkingDataResponse) Descriptor() ([]byte, []int)

Deprecated: Use FetchBenchmarkingDataResponse.ProtoReflect.Descriptor instead.

func (*FetchBenchmarkingDataResponse) GetProfile

func (x *FetchBenchmarkingDataResponse) GetProfile() []*Profile

func (*FetchBenchmarkingDataResponse) ProtoMessage

func (*FetchBenchmarkingDataResponse) ProtoMessage()

func (*FetchBenchmarkingDataResponse) ProtoReflect

func (x *FetchBenchmarkingDataResponse) ProtoReflect() protoreflect.Message

func (*FetchBenchmarkingDataResponse) Reset

func (x *FetchBenchmarkingDataResponse) Reset()

func (*FetchBenchmarkingDataResponse) String

func (x *FetchBenchmarkingDataResponse) String() string

FetchModelServerVersionsRequest

type FetchModelServerVersionsRequest struct {

	// Required. The model for which to list model server versions. Open-source
	// models follow the Huggingface Hub `owner/model_name` format. Use
	// [GkeInferenceQuickstart.FetchModels][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModels]
	// to find available models.
	Model string `protobuf:"bytes,1,opt,name=model,proto3" json:"model,omitempty"`
	// Required. The model server for which to list versions. Open-source model
	// servers use simplified, lowercase names (e.g., `vllm`). Use
	// [GkeInferenceQuickstart.FetchModelServers][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServers]
	// to find available model servers.
	ModelServer string `protobuf:"bytes,2,opt,name=model_server,json=modelServer,proto3" json:"model_server,omitempty"`
	// Optional. The target number of results to return in a single response.
	// If not specified, a default value will be chosen by the service.
	// Note that the response may include a partial list and a caller should
	// only rely on the response's
	// [next_page_token][google.cloud.gkerecommender.v1.FetchModelServerVersionsResponse.next_page_token]
	// to determine if there are more instances left to be queried.
	PageSize *int32 `protobuf:"varint,3,opt,name=page_size,json=pageSize,proto3,oneof" json:"page_size,omitempty"`
	// Optional. The value of
	// [next_page_token][google.cloud.gkerecommender.v1.FetchModelServerVersionsResponse.next_page_token]
	// received from a previous `FetchModelServerVersionsRequest` call.
	// Provide this to retrieve the subsequent page in a multi-page list of
	// results. When paginating, all other parameters provided to
	// `FetchModelServerVersionsRequest` must match the call that provided the
	// page token.
	PageToken *string `protobuf:"bytes,4,opt,name=page_token,json=pageToken,proto3,oneof" json:"page_token,omitempty"`
	// contains filtered or unexported fields
}

Request message for [GkeInferenceQuickstart.FetchModelServerVersions][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServerVersions].

func (*FetchModelServerVersionsRequest) Descriptor

func (*FetchModelServerVersionsRequest) Descriptor() ([]byte, []int)

Deprecated: Use FetchModelServerVersionsRequest.ProtoReflect.Descriptor instead.

func (*FetchModelServerVersionsRequest) GetModel

func (x *FetchModelServerVersionsRequest) GetModel() string

func (*FetchModelServerVersionsRequest) GetModelServer

func (x *FetchModelServerVersionsRequest) GetModelServer() string

func (*FetchModelServerVersionsRequest) GetPageSize

func (x *FetchModelServerVersionsRequest) GetPageSize() int32

func (*FetchModelServerVersionsRequest) GetPageToken

func (x *FetchModelServerVersionsRequest) GetPageToken() string

func (*FetchModelServerVersionsRequest) ProtoMessage

func (*FetchModelServerVersionsRequest) ProtoMessage()

func (*FetchModelServerVersionsRequest) ProtoReflect

func (x *FetchModelServerVersionsRequest) ProtoReflect() protoreflect.Message

func (*FetchModelServerVersionsRequest) Reset

func (x *FetchModelServerVersionsRequest) Reset()

func (*FetchModelServerVersionsRequest) String

func (x *FetchModelServerVersionsRequest) String() string

FetchModelServerVersionsResponse

type FetchModelServerVersionsResponse struct {

	// Output only. A list of available model server versions.
	ModelServerVersions []string `protobuf:"bytes,1,rep,name=model_server_versions,json=modelServerVersions,proto3" json:"model_server_versions,omitempty"`
	// Output only. A token which may be sent as
	// [page_token][FetchModelServerVersionsResponse.page_token] in a subsequent
	// `FetchModelServerVersionsResponse` call to retrieve the next page of
	// results. If this field is omitted or empty, then there are no more results
	// to return.
	NextPageToken string `protobuf:"bytes,2,opt,name=next_page_token,json=nextPageToken,proto3" json:"next_page_token,omitempty"`
	// contains filtered or unexported fields
}

Response message for [GkeInferenceQuickstart.FetchModelServerVersions][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServerVersions].

func (*FetchModelServerVersionsResponse) Descriptor

func (*FetchModelServerVersionsResponse) Descriptor() ([]byte, []int)

Deprecated: Use FetchModelServerVersionsResponse.ProtoReflect.Descriptor instead.

func (*FetchModelServerVersionsResponse) GetModelServerVersions

func (x *FetchModelServerVersionsResponse) GetModelServerVersions() []string

func (*FetchModelServerVersionsResponse) GetNextPageToken

func (x *FetchModelServerVersionsResponse) GetNextPageToken() string

func (*FetchModelServerVersionsResponse) ProtoMessage

func (*FetchModelServerVersionsResponse) ProtoMessage()

func (*FetchModelServerVersionsResponse) ProtoReflect

func (x *FetchModelServerVersionsResponse) ProtoReflect() protoreflect.Message

func (*FetchModelServerVersionsResponse) Reset

func (x *FetchModelServerVersionsResponse) Reset()

func (*FetchModelServerVersionsResponse) String

func (x *FetchModelServerVersionsResponse) String() string

FetchModelServersRequest

type FetchModelServersRequest struct {

	// Required. The model for which to list model servers. Open-source models
	// follow the Huggingface Hub `owner/model_name` format. Use
	// [GkeInferenceQuickstart.FetchModels][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModels]
	// to find available models.
	Model string `protobuf:"bytes,1,opt,name=model,proto3" json:"model,omitempty"`
	// Optional. The target number of results to return in a single response.
	// If not specified, a default value will be chosen by the service.
	// Note that the response may include a partial list and a caller should
	// only rely on the response's
	// [next_page_token][google.cloud.gkerecommender.v1.FetchModelServersResponse.next_page_token]
	// to determine if there are more instances left to be queried.
	PageSize *int32 `protobuf:"varint,2,opt,name=page_size,json=pageSize,proto3,oneof" json:"page_size,omitempty"`
	// Optional. The value of
	// [next_page_token][google.cloud.gkerecommender.v1.FetchModelServersResponse.next_page_token]
	// received from a previous `FetchModelServersRequest` call.
	// Provide this to retrieve the subsequent page in a multi-page list of
	// results. When paginating, all other parameters provided to
	// `FetchModelServersRequest` must match the call that provided the page
	// token.
	PageToken *string `protobuf:"bytes,3,opt,name=page_token,json=pageToken,proto3,oneof" json:"page_token,omitempty"`
	// contains filtered or unexported fields
}

Request message for [GkeInferenceQuickstart.FetchModelServers][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServers].

func (*FetchModelServersRequest) Descriptor

func (*FetchModelServersRequest) Descriptor() ([]byte, []int)

Deprecated: Use FetchModelServersRequest.ProtoReflect.Descriptor instead.

func (*FetchModelServersRequest) GetModel

func (x *FetchModelServersRequest) GetModel() string

func (*FetchModelServersRequest) GetPageSize

func (x *FetchModelServersRequest) GetPageSize() int32

func (*FetchModelServersRequest) GetPageToken

func (x *FetchModelServersRequest) GetPageToken() string

func (*FetchModelServersRequest) ProtoMessage

func (*FetchModelServersRequest) ProtoMessage()

func (*FetchModelServersRequest) ProtoReflect

func (x *FetchModelServersRequest) ProtoReflect() protoreflect.Message

func (*FetchModelServersRequest) Reset

func (x *FetchModelServersRequest) Reset()

func (*FetchModelServersRequest) String

func (x *FetchModelServersRequest) String() string

FetchModelServersResponse

type FetchModelServersResponse struct {

	// Output only. List of available model servers. Open-source model servers use
	// simplified, lowercase names (e.g., `vllm`).
	ModelServers []string `protobuf:"bytes,1,rep,name=model_servers,json=modelServers,proto3" json:"model_servers,omitempty"`
	// Output only. A token which may be sent as
	// [page_token][FetchModelServersResponse.page_token] in a subsequent
	// `FetchModelServersResponse` call to retrieve the next page of results.
	// If this field is omitted or empty, then there are no more results to
	// return.
	NextPageToken string `protobuf:"bytes,2,opt,name=next_page_token,json=nextPageToken,proto3" json:"next_page_token,omitempty"`
	// contains filtered or unexported fields
}

Response message for [GkeInferenceQuickstart.FetchModelServers][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServers].

func (*FetchModelServersResponse) Descriptor

func (*FetchModelServersResponse) Descriptor() ([]byte, []int)

Deprecated: Use FetchModelServersResponse.ProtoReflect.Descriptor instead.

func (*FetchModelServersResponse) GetModelServers

func (x *FetchModelServersResponse) GetModelServers() []string

func (*FetchModelServersResponse) GetNextPageToken

func (x *FetchModelServersResponse) GetNextPageToken() string

func (*FetchModelServersResponse) ProtoMessage

func (*FetchModelServersResponse) ProtoMessage()

func (*FetchModelServersResponse) ProtoReflect

func (x *FetchModelServersResponse) ProtoReflect() protoreflect.Message

func (*FetchModelServersResponse) Reset

func (x *FetchModelServersResponse) Reset()

func (*FetchModelServersResponse) String

func (x *FetchModelServersResponse) String() string

FetchModelsRequest

type FetchModelsRequest struct {

	// Optional. The target number of results to return in a single response.
	// If not specified, a default value will be chosen by the service.
	// Note that the response may include a partial list and a caller should
	// only rely on the response's
	// [next_page_token][google.cloud.gkerecommender.v1.FetchModelsResponse.next_page_token]
	// to determine if there are more instances left to be queried.
	PageSize *int32 `protobuf:"varint,1,opt,name=page_size,json=pageSize,proto3,oneof" json:"page_size,omitempty"`
	// Optional. The value of
	// [next_page_token][google.cloud.gkerecommender.v1.FetchModelsResponse.next_page_token]
	// received from a previous `FetchModelsRequest` call.
	// Provide this to retrieve the subsequent page in a multi-page list of
	// results. When paginating, all other parameters provided to
	// `FetchModelsRequest` must match the call that provided the page token.
	PageToken *string `protobuf:"bytes,2,opt,name=page_token,json=pageToken,proto3,oneof" json:"page_token,omitempty"`
	// contains filtered or unexported fields
}

Request message for [GkeInferenceQuickstart.FetchModels][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModels].

func (*FetchModelsRequest) Descriptor

func (*FetchModelsRequest) Descriptor() ([]byte, []int)

Deprecated: Use FetchModelsRequest.ProtoReflect.Descriptor instead.

func (*FetchModelsRequest) GetPageSize

func (x *FetchModelsRequest) GetPageSize() int32

func (*FetchModelsRequest) GetPageToken

func (x *FetchModelsRequest) GetPageToken() string

func (*FetchModelsRequest) ProtoMessage

func (*FetchModelsRequest) ProtoMessage()

func (*FetchModelsRequest) ProtoReflect

func (x *FetchModelsRequest) ProtoReflect() protoreflect.Message

func (*FetchModelsRequest) Reset

func (x *FetchModelsRequest) Reset()

func (*FetchModelsRequest) String

func (x *FetchModelsRequest) String() string

FetchModelsResponse

type FetchModelsResponse struct {

	// Output only. List of available models. Open-source models follow the
	// Huggingface Hub `owner/model_name` format.
	Models []string `protobuf:"bytes,1,rep,name=models,proto3" json:"models,omitempty"`
	// Output only. A token which may be sent as
	// [page_token][FetchModelsResponse.page_token] in a subsequent
	// `FetchModelsResponse` call to retrieve the next page of results.
	// If this field is omitted or empty, then there are no more results to
	// return.
	NextPageToken string `protobuf:"bytes,2,opt,name=next_page_token,json=nextPageToken,proto3" json:"next_page_token,omitempty"`
	// contains filtered or unexported fields
}

Response message for [GkeInferenceQuickstart.FetchModels][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModels].

func (*FetchModelsResponse) Descriptor

func (*FetchModelsResponse) Descriptor() ([]byte, []int)

Deprecated: Use FetchModelsResponse.ProtoReflect.Descriptor instead.

func (*FetchModelsResponse) GetModels

func (x *FetchModelsResponse) GetModels() []string

func (*FetchModelsResponse) GetNextPageToken

func (x *FetchModelsResponse) GetNextPageToken() string

func (*FetchModelsResponse) ProtoMessage

func (*FetchModelsResponse) ProtoMessage()

func (*FetchModelsResponse) ProtoReflect

func (x *FetchModelsResponse) ProtoReflect() protoreflect.Message

func (*FetchModelsResponse) Reset

func (x *FetchModelsResponse) Reset()

func (*FetchModelsResponse) String

func (x *FetchModelsResponse) String() string

FetchProfilesRequest

type FetchProfilesRequest struct {

	// Optional. The model to filter profiles by. Open-source models follow the
	// Huggingface Hub `owner/model_name` format. If not provided, all models are
	// returned. Use
	// [GkeInferenceQuickstart.FetchModels][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModels]
	// to find available models.
	Model string `protobuf:"bytes,1,opt,name=model,proto3" json:"model,omitempty"`
	// Optional. The model server to filter profiles by. If not provided, all
	// model servers are returned. Use
	// [GkeInferenceQuickstart.FetchModelServers][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServers]
	// to find available model servers for a given model.
	ModelServer string `protobuf:"bytes,2,opt,name=model_server,json=modelServer,proto3" json:"model_server,omitempty"`
	// Optional. The model server version to filter profiles by. If not provided,
	// all model server versions are returned. Use
	// [GkeInferenceQuickstart.FetchModelServerVersions][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServerVersions]
	// to find available versions for a given model and server.
	ModelServerVersion string `protobuf:"bytes,3,opt,name=model_server_version,json=modelServerVersion,proto3" json:"model_server_version,omitempty"`
	// Optional. The performance requirements to filter profiles. Profiles that do
	// not meet these requirements are filtered out. If not provided, all profiles
	// are returned.
	PerformanceRequirements *PerformanceRequirements `protobuf:"bytes,4,opt,name=performance_requirements,json=performanceRequirements,proto3" json:"performance_requirements,omitempty"`
	// Optional. The target number of results to return in a single response. If
	// not specified, a default value will be chosen by the service. Note that the
	// response may include a partial list and a caller should only rely on the
	// response's
	// [next_page_token][google.cloud.gkerecommender.v1.FetchProfilesResponse.next_page_token]
	// to determine if there are more instances left to be queried.
	PageSize *int32 `protobuf:"varint,5,opt,name=page_size,json=pageSize,proto3,oneof" json:"page_size,omitempty"`
	// Optional. The value of
	// [next_page_token][google.cloud.gkerecommender.v1.FetchProfilesResponse.next_page_token]
	// received from a previous `FetchProfilesRequest` call.
	// Provide this to retrieve the subsequent page in a multi-page list of
	// results. When paginating, all other parameters provided to
	// `FetchProfilesRequest` must match the call that provided the page
	// token.
	PageToken *string `protobuf:"bytes,6,opt,name=page_token,json=pageToken,proto3,oneof" json:"page_token,omitempty"`
	// contains filtered or unexported fields
}

Request message for [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles].

func (*FetchProfilesRequest) Descriptor

func (*FetchProfilesRequest) Descriptor() ([]byte, []int)

Deprecated: Use FetchProfilesRequest.ProtoReflect.Descriptor instead.

func (*FetchProfilesRequest) GetModel

func (x *FetchProfilesRequest) GetModel() string

func (*FetchProfilesRequest) GetModelServer

func (x *FetchProfilesRequest) GetModelServer() string

func (*FetchProfilesRequest) GetModelServerVersion

func (x *FetchProfilesRequest) GetModelServerVersion() string

func (*FetchProfilesRequest) GetPageSize

func (x *FetchProfilesRequest) GetPageSize() int32

func (*FetchProfilesRequest) GetPageToken

func (x *FetchProfilesRequest) GetPageToken() string

func (*FetchProfilesRequest) GetPerformanceRequirements

func (x *FetchProfilesRequest) GetPerformanceRequirements() *PerformanceRequirements

func (*FetchProfilesRequest) ProtoMessage

func (*FetchProfilesRequest) ProtoMessage()

func (*FetchProfilesRequest) ProtoReflect

func (x *FetchProfilesRequest) ProtoReflect() protoreflect.Message

func (*FetchProfilesRequest) Reset

func (x *FetchProfilesRequest) Reset()

func (*FetchProfilesRequest) String

func (x *FetchProfilesRequest) String() string

FetchProfilesResponse

type FetchProfilesResponse struct {

	// Output only. List of profiles that match the given model server info and
	// performance requirements (if provided).
	Profile []*Profile `protobuf:"bytes,1,rep,name=profile,proto3" json:"profile,omitempty"`
	// Output only. The combined range of performance values observed across all
	// profiles in this response.
	PerformanceRange *PerformanceRange `protobuf:"bytes,2,opt,name=performance_range,json=performanceRange,proto3" json:"performance_range,omitempty"`
	// Output only. Additional comments related to the response.
	Comments string `protobuf:"bytes,3,opt,name=comments,proto3" json:"comments,omitempty"`
	// Output only. A token which may be sent as
	// [page_token][FetchProfilesResponse.page_token] in a subsequent
	// `FetchProfilesResponse` call to retrieve the next page of results. If this
	// field is omitted or empty, then there are no more results to return.
	NextPageToken string `protobuf:"bytes,4,opt,name=next_page_token,json=nextPageToken,proto3" json:"next_page_token,omitempty"`
	// contains filtered or unexported fields
}

Response message for [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles].

func (*FetchProfilesResponse) Descriptor

func (*FetchProfilesResponse) Descriptor() ([]byte, []int)

Deprecated: Use FetchProfilesResponse.ProtoReflect.Descriptor instead.

func (*FetchProfilesResponse) GetComments

func (x *FetchProfilesResponse) GetComments() string

func (*FetchProfilesResponse) GetNextPageToken

func (x *FetchProfilesResponse) GetNextPageToken() string

func (*FetchProfilesResponse) GetPerformanceRange

func (x *FetchProfilesResponse) GetPerformanceRange() *PerformanceRange

func (*FetchProfilesResponse) GetProfile

func (x *FetchProfilesResponse) GetProfile() []*Profile

func (*FetchProfilesResponse) ProtoMessage

func (*FetchProfilesResponse) ProtoMessage()

func (*FetchProfilesResponse) ProtoReflect

func (x *FetchProfilesResponse) ProtoReflect() protoreflect.Message

func (*FetchProfilesResponse) Reset

func (x *FetchProfilesResponse) Reset()

func (*FetchProfilesResponse) String

func (x *FetchProfilesResponse) String() string

GenerateOptimizedManifestRequest

type GenerateOptimizedManifestRequest struct {

	// Required. The model server configuration to generate the manifest for. Use
	// [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles]
	// to find valid configurations.
	ModelServerInfo *ModelServerInfo `protobuf:"bytes,1,opt,name=model_server_info,json=modelServerInfo,proto3" json:"model_server_info,omitempty"`
	// Required. The accelerator type. Use
	// [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles]
	// to find valid accelerators for a given `model_server_info`.
	AcceleratorType string `protobuf:"bytes,2,opt,name=accelerator_type,json=acceleratorType,proto3" json:"accelerator_type,omitempty"`
	// Optional. The kubernetes namespace to deploy the manifests in.
	KubernetesNamespace string `protobuf:"bytes,3,opt,name=kubernetes_namespace,json=kubernetesNamespace,proto3" json:"kubernetes_namespace,omitempty"`
	// Optional. The performance requirements to use for generating Horizontal Pod
	// Autoscaler (HPA) resources. If provided, the manifest includes HPA
	// resources to adjust the model server replica count to maintain the
	// specified targets (e.g., NTPOT, TTFT) at a P50 latency. Cost targets are
	// not currently supported for HPA generation. If the specified targets are
	// not achievable, the HPA manifest will not be generated.
	PerformanceRequirements *PerformanceRequirements `protobuf:"bytes,4,opt,name=performance_requirements,json=performanceRequirements,proto3" json:"performance_requirements,omitempty"`
	// Optional. The storage configuration for the model. If not provided, the
	// model is loaded from Huggingface.
	StorageConfig *StorageConfig `protobuf:"bytes,5,opt,name=storage_config,json=storageConfig,proto3" json:"storage_config,omitempty"`
	// contains filtered or unexported fields
}

Request message for [GkeInferenceQuickstart.GenerateOptimizedManifest][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.GenerateOptimizedManifest].

func (*GenerateOptimizedManifestRequest) Descriptor

func (*GenerateOptimizedManifestRequest) Descriptor() ([]byte, []int)

Deprecated: Use GenerateOptimizedManifestRequest.ProtoReflect.Descriptor instead.

func (*GenerateOptimizedManifestRequest) GetAcceleratorType

func (x *GenerateOptimizedManifestRequest) GetAcceleratorType() string

func (*GenerateOptimizedManifestRequest) GetKubernetesNamespace

func (x *GenerateOptimizedManifestRequest) GetKubernetesNamespace() string

func (*GenerateOptimizedManifestRequest) GetModelServerInfo

func (x *GenerateOptimizedManifestRequest) GetModelServerInfo() *ModelServerInfo

func (*GenerateOptimizedManifestRequest) GetPerformanceRequirements

func (x *GenerateOptimizedManifestRequest) GetPerformanceRequirements() *PerformanceRequirements

func (*GenerateOptimizedManifestRequest) GetStorageConfig

func (x *GenerateOptimizedManifestRequest) GetStorageConfig() *StorageConfig

func (*GenerateOptimizedManifestRequest) ProtoMessage

func (*GenerateOptimizedManifestRequest) ProtoMessage()

func (*GenerateOptimizedManifestRequest) ProtoReflect

func (x *GenerateOptimizedManifestRequest) ProtoReflect() protoreflect.Message

func (*GenerateOptimizedManifestRequest) Reset

func (x *GenerateOptimizedManifestRequest) Reset()

func (*GenerateOptimizedManifestRequest) String

func (x *GenerateOptimizedManifestRequest) String() string

GenerateOptimizedManifestResponse

type GenerateOptimizedManifestResponse struct {

	// Output only. A list of generated Kubernetes manifests.
	KubernetesManifests []*KubernetesManifest `protobuf:"bytes,1,rep,name=kubernetes_manifests,json=kubernetesManifests,proto3" json:"kubernetes_manifests,omitempty"`
	// Output only. Comments related to deploying the generated manifests.
	Comments []string `protobuf:"bytes,2,rep,name=comments,proto3" json:"comments,omitempty"`
	// Output only. Additional information about the versioned dependencies used
	// to generate the manifests. See [Run best practice inference with GKE
	// Inference Quickstart
	// recipes](https://cloud.google.com/kubernetes-engine/docs/how-to/machine-learning/inference/inference-quickstart)
	// for details.
	ManifestVersion string `protobuf:"bytes,3,opt,name=manifest_version,json=manifestVersion,proto3" json:"manifest_version,omitempty"`
	// contains filtered or unexported fields
}

Response message for [GkeInferenceQuickstart.GenerateOptimizedManifest][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.GenerateOptimizedManifest].

func (*GenerateOptimizedManifestResponse) Descriptor

func (*GenerateOptimizedManifestResponse) Descriptor() ([]byte, []int)

Deprecated: Use GenerateOptimizedManifestResponse.ProtoReflect.Descriptor instead.

func (*GenerateOptimizedManifestResponse) GetComments

func (x *GenerateOptimizedManifestResponse) GetComments() []string

func (*GenerateOptimizedManifestResponse) GetKubernetesManifests

func (x *GenerateOptimizedManifestResponse) GetKubernetesManifests() []*KubernetesManifest

func (*GenerateOptimizedManifestResponse) GetManifestVersion

func (x *GenerateOptimizedManifestResponse) GetManifestVersion() string

func (*GenerateOptimizedManifestResponse) ProtoMessage

func (*GenerateOptimizedManifestResponse) ProtoMessage()

func (*GenerateOptimizedManifestResponse) ProtoReflect

func (x *GenerateOptimizedManifestResponse) ProtoReflect() protoreflect.Message

func (*GenerateOptimizedManifestResponse) Reset

func (x *GenerateOptimizedManifestResponse) Reset()

func (*GenerateOptimizedManifestResponse) String

func (x *GenerateOptimizedManifestResponse) String() string

GkeInferenceQuickstartClient

type GkeInferenceQuickstartClient interface {
	// Fetches available models. Open-source models follow the Huggingface Hub
	// `owner/model_name` format.
	FetchModels(ctx context.Context, in *FetchModelsRequest, opts ...grpc.CallOption) (*FetchModelsResponse, error)
	// Fetches available model servers. Open-source model servers use simplified,
	// lowercase names (e.g., `vllm`).
	FetchModelServers(ctx context.Context, in *FetchModelServersRequest, opts ...grpc.CallOption) (*FetchModelServersResponse, error)
	// Fetches available model server versions. Open-source servers use their own
	// versioning schemas (e.g., `vllm` uses semver like `v1.0.0`).
	//
	// Some model servers have different versioning schemas depending on the
	// accelerator. For example, `vllm` uses semver on GPUs, but returns nightly
	// build tags on TPUs. All available versions will be returned when different
	// schemas are present.
	FetchModelServerVersions(ctx context.Context, in *FetchModelServerVersionsRequest, opts ...grpc.CallOption) (*FetchModelServerVersionsResponse, error)
	// Fetches available profiles. A profile contains performance metrics and
	// cost information for a specific model server setup. Profiles can be
	// filtered by parameters. If no filters are provided, all profiles are
	// returned.
	//
	// Profiles display a single value per performance metric based on the
	// provided performance requirements. If no requirements are given, the
	// metrics represent the inflection point. See [Run best practice inference
	// with GKE Inference Quickstart
	// recipes](https://cloud.google.com/kubernetes-engine/docs/how-to/machine-learning/inference/inference-quickstart#how)
	// for details.
	FetchProfiles(ctx context.Context, in *FetchProfilesRequest, opts ...grpc.CallOption) (*FetchProfilesResponse, error)
	// Generates an optimized deployment manifest for a given model and model
	// server, based on the specified accelerator, performance targets, and
	// configurations. See [Run best practice inference with GKE Inference
	// Quickstart
	// recipes](https://cloud.google.com/kubernetes-engine/docs/how-to/machine-learning/inference/inference-quickstart)
	// for deployment details.
	GenerateOptimizedManifest(ctx context.Context, in *GenerateOptimizedManifestRequest, opts ...grpc.CallOption) (*GenerateOptimizedManifestResponse, error)
	// Fetches all of the benchmarking data available for a profile. Benchmarking
	// data returns all of the performance metrics available for a given model
	// server setup on a given instance type.
	FetchBenchmarkingData(ctx context.Context, in *FetchBenchmarkingDataRequest, opts ...grpc.CallOption) (*FetchBenchmarkingDataResponse, error)
}

GkeInferenceQuickstartClient is the client API for GkeInferenceQuickstart service.

For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.

func NewGkeInferenceQuickstartClient

func NewGkeInferenceQuickstartClient(cc grpc.ClientConnInterface) GkeInferenceQuickstartClient

GkeInferenceQuickstartServer

type GkeInferenceQuickstartServer interface {
	// Fetches available models. Open-source models follow the Huggingface Hub
	// `owner/model_name` format.
	FetchModels(context.Context, *FetchModelsRequest) (*FetchModelsResponse, error)
	// Fetches available model servers. Open-source model servers use simplified,
	// lowercase names (e.g., `vllm`).
	FetchModelServers(context.Context, *FetchModelServersRequest) (*FetchModelServersResponse, error)
	// Fetches available model server versions. Open-source servers use their own
	// versioning schemas (e.g., `vllm` uses semver like `v1.0.0`).
	//
	// Some model servers have different versioning schemas depending on the
	// accelerator. For example, `vllm` uses semver on GPUs, but returns nightly
	// build tags on TPUs. All available versions will be returned when different
	// schemas are present.
	FetchModelServerVersions(context.Context, *FetchModelServerVersionsRequest) (*FetchModelServerVersionsResponse, error)
	// Fetches available profiles. A profile contains performance metrics and
	// cost information for a specific model server setup. Profiles can be
	// filtered by parameters. If no filters are provided, all profiles are
	// returned.
	//
	// Profiles display a single value per performance metric based on the
	// provided performance requirements. If no requirements are given, the
	// metrics represent the inflection point. See [Run best practice inference
	// with GKE Inference Quickstart
	// recipes](https://cloud.google.com/kubernetes-engine/docs/how-to/machine-learning/inference/inference-quickstart#how)
	// for details.
	FetchProfiles(context.Context, *FetchProfilesRequest) (*FetchProfilesResponse, error)
	// Generates an optimized deployment manifest for a given model and model
	// server, based on the specified accelerator, performance targets, and
	// configurations. See [Run best practice inference with GKE Inference
	// Quickstart
	// recipes](https://cloud.google.com/kubernetes-engine/docs/how-to/machine-learning/inference/inference-quickstart)
	// for deployment details.
	GenerateOptimizedManifest(context.Context, *GenerateOptimizedManifestRequest) (*GenerateOptimizedManifestResponse, error)
	// Fetches all of the benchmarking data available for a profile. Benchmarking
	// data returns all of the performance metrics available for a given model
	// server setup on a given instance type.
	FetchBenchmarkingData(context.Context, *FetchBenchmarkingDataRequest) (*FetchBenchmarkingDataResponse, error)
}

GkeInferenceQuickstartServer is the server API for GkeInferenceQuickstart service. All implementations should embed UnimplementedGkeInferenceQuickstartServer for forward compatibility

KubernetesManifest

type KubernetesManifest struct {

	// Output only. Kubernetes resource kind.
	Kind string `protobuf:"bytes,1,opt,name=kind,proto3" json:"kind,omitempty"`
	// Output only. Kubernetes API version.
	ApiVersion string `protobuf:"bytes,2,opt,name=api_version,json=apiVersion,proto3" json:"api_version,omitempty"`
	// Output only. YAML content.
	Content string `protobuf:"bytes,3,opt,name=content,proto3" json:"content,omitempty"`
	// contains filtered or unexported fields
}

A Kubernetes manifest.

func (*KubernetesManifest) Descriptor

func (*KubernetesManifest) Descriptor() ([]byte, []int)

Deprecated: Use KubernetesManifest.ProtoReflect.Descriptor instead.

func (*KubernetesManifest) GetApiVersion

func (x *KubernetesManifest) GetApiVersion() string

func (*KubernetesManifest) GetContent

func (x *KubernetesManifest) GetContent() string

func (*KubernetesManifest) GetKind

func (x *KubernetesManifest) GetKind() string

func (*KubernetesManifest) ProtoMessage

func (*KubernetesManifest) ProtoMessage()

func (*KubernetesManifest) ProtoReflect

func (x *KubernetesManifest) ProtoReflect() protoreflect.Message

func (*KubernetesManifest) Reset

func (x *KubernetesManifest) Reset()

func (*KubernetesManifest) String

func (x *KubernetesManifest) String() string

MillisecondRange

type MillisecondRange struct {

	// Output only. The minimum value of the range.
	Min int32 `protobuf:"varint,1,opt,name=min,proto3" json:"min,omitempty"`
	// Output only. The maximum value of the range.
	Max int32 `protobuf:"varint,2,opt,name=max,proto3" json:"max,omitempty"`
	// contains filtered or unexported fields
}

Represents a range of latency values in milliseconds.

func (*MillisecondRange) Descriptor

func (*MillisecondRange) Descriptor() ([]byte, []int)

Deprecated: Use MillisecondRange.ProtoReflect.Descriptor instead.

func (*MillisecondRange) GetMax

func (x *MillisecondRange) GetMax() int32

func (*MillisecondRange) GetMin

func (x *MillisecondRange) GetMin() int32

func (*MillisecondRange) ProtoMessage

func (*MillisecondRange) ProtoMessage()

func (*MillisecondRange) ProtoReflect

func (x *MillisecondRange) ProtoReflect() protoreflect.Message

func (*MillisecondRange) Reset

func (x *MillisecondRange) Reset()

func (*MillisecondRange) String

func (x *MillisecondRange) String() string

ModelServerInfo

type ModelServerInfo struct {

	// Required. The model. Open-source models follow the Huggingface Hub
	// `owner/model_name` format. Use
	// [GkeInferenceQuickstart.FetchModels][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModels]
	// to find available models.
	Model string `protobuf:"bytes,1,opt,name=model,proto3" json:"model,omitempty"`
	// Required. The model server. Open-source model servers use simplified,
	// lowercase names (e.g., `vllm`). Use
	// [GkeInferenceQuickstart.FetchModelServers][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServers]
	// to find available servers.
	ModelServer string `protobuf:"bytes,2,opt,name=model_server,json=modelServer,proto3" json:"model_server,omitempty"`
	// Optional. The model server version. Use
	// [GkeInferenceQuickstart.FetchModelServerVersions][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServerVersions]
	// to find available versions. If not provided, the latest available version
	// is used.
	ModelServerVersion string `protobuf:"bytes,3,opt,name=model_server_version,json=modelServerVersion,proto3" json:"model_server_version,omitempty"`
	// contains filtered or unexported fields
}

Model server information gives. Valid model server info combinations can be found using [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles].

func (*ModelServerInfo) Descriptor

func (*ModelServerInfo) Descriptor() ([]byte, []int)

Deprecated: Use ModelServerInfo.ProtoReflect.Descriptor instead.

func (*ModelServerInfo) GetModel

func (x *ModelServerInfo) GetModel() string

func (*ModelServerInfo) GetModelServer

func (x *ModelServerInfo) GetModelServer() string

func (*ModelServerInfo) GetModelServerVersion

func (x *ModelServerInfo) GetModelServerVersion() string

func (*ModelServerInfo) ProtoMessage

func (*ModelServerInfo) ProtoMessage()

func (*ModelServerInfo) ProtoReflect

func (x *ModelServerInfo) ProtoReflect() protoreflect.Message

func (*ModelServerInfo) Reset

func (x *ModelServerInfo) Reset()

func (*ModelServerInfo) String

func (x *ModelServerInfo) String() string

PerformanceRange

type PerformanceRange struct {

	// Output only. The range of throughput in output tokens per second. This is
	// measured as total_output_tokens_generated_by_server /
	// elapsed_time_in_seconds.
	ThroughputOutputRange *TokensPerSecondRange `protobuf:"bytes,1,opt,name=throughput_output_range,json=throughputOutputRange,proto3" json:"throughput_output_range,omitempty"`
	// Output only. The range of TTFT (Time To First Token) in milliseconds. TTFT
	// is the time it takes to generate the first token for a request.
	TtftRange *MillisecondRange `protobuf:"bytes,2,opt,name=ttft_range,json=ttftRange,proto3" json:"ttft_range,omitempty"`
	// Output only. The range of NTPOT (Normalized Time Per Output Token) in
	// milliseconds. NTPOT is the request latency normalized by the number of
	// output tokens, measured as request_latency / total_output_tokens.
	NtpotRange *MillisecondRange `protobuf:"bytes,3,opt,name=ntpot_range,json=ntpotRange,proto3" json:"ntpot_range,omitempty"`
	// contains filtered or unexported fields
}

Performance range for a model deployment.

func (*PerformanceRange) Descriptor

func (*PerformanceRange) Descriptor() ([]byte, []int)

Deprecated: Use PerformanceRange.ProtoReflect.Descriptor instead.

func (*PerformanceRange) GetNtpotRange

func (x *PerformanceRange) GetNtpotRange() *MillisecondRange

func (*PerformanceRange) GetThroughputOutputRange

func (x *PerformanceRange) GetThroughputOutputRange() *TokensPerSecondRange

func (*PerformanceRange) GetTtftRange

func (x *PerformanceRange) GetTtftRange() *MillisecondRange

func (*PerformanceRange) ProtoMessage

func (*PerformanceRange) ProtoMessage()

func (*PerformanceRange) ProtoReflect

func (x *PerformanceRange) ProtoReflect() protoreflect.Message

func (*PerformanceRange) Reset

func (x *PerformanceRange) Reset()

func (*PerformanceRange) String

func (x *PerformanceRange) String() string

PerformanceRequirements

type PerformanceRequirements struct {

	// Optional. The target Normalized Time Per Output Token (NTPOT) in
	// milliseconds. NTPOT is calculated as `request_latency /
	// total_output_tokens`. If not provided, this target will not be enforced.
	TargetNtpotMilliseconds *int32 `protobuf:"varint,1,opt,name=target_ntpot_milliseconds,json=targetNtpotMilliseconds,proto3,oneof" json:"target_ntpot_milliseconds,omitempty"`
	// Optional. The target Time To First Token (TTFT) in milliseconds. TTFT is
	// the time it takes to generate the first token for a request.  If not
	// provided, this target will not be enforced.
	TargetTtftMilliseconds *int32 `protobuf:"varint,2,opt,name=target_ttft_milliseconds,json=targetTtftMilliseconds,proto3,oneof" json:"target_ttft_milliseconds,omitempty"`
	// Optional. The target cost for running a profile's model server. If not
	// provided, this requirement will not be enforced.
	TargetCost *Cost `protobuf:"bytes,3,opt,name=target_cost,json=targetCost,proto3" json:"target_cost,omitempty"`
	// contains filtered or unexported fields
}

Performance requirements for a profile and or model deployment.

func (*PerformanceRequirements) Descriptor

func (*PerformanceRequirements) Descriptor() ([]byte, []int)

Deprecated: Use PerformanceRequirements.ProtoReflect.Descriptor instead.

func (*PerformanceRequirements) GetTargetCost

func (x *PerformanceRequirements) GetTargetCost() *Cost

func (*PerformanceRequirements) GetTargetNtpotMilliseconds

func (x *PerformanceRequirements) GetTargetNtpotMilliseconds() int32

func (*PerformanceRequirements) GetTargetTtftMilliseconds

func (x *PerformanceRequirements) GetTargetTtftMilliseconds() int32

func (*PerformanceRequirements) ProtoMessage

func (*PerformanceRequirements) ProtoMessage()

func (*PerformanceRequirements) ProtoReflect

func (x *PerformanceRequirements) ProtoReflect() protoreflect.Message

func (*PerformanceRequirements) Reset

func (x *PerformanceRequirements) Reset()

func (*PerformanceRequirements) String

func (x *PerformanceRequirements) String() string

PerformanceStats

type PerformanceStats struct {

	// Output only. The number of queries per second.
	// Note: This metric can vary widely based on context length and may not be a
	// reliable measure of LLM throughput.
	QueriesPerSecond float32 `protobuf:"fixed32,1,opt,name=queries_per_second,json=queriesPerSecond,proto3" json:"queries_per_second,omitempty"`
	// Output only. The number of output tokens per second. This is the throughput
	// measured as total_output_tokens_generated_by_server /
	// elapsed_time_in_seconds.
	OutputTokensPerSecond int32 `protobuf:"varint,2,opt,name=output_tokens_per_second,json=outputTokensPerSecond,proto3" json:"output_tokens_per_second,omitempty"`
	// Output only. The Normalized Time Per Output Token (NTPOT) in milliseconds.
	// This is the request latency normalized by the number of output tokens,
	// measured as request_latency / total_output_tokens.
	NtpotMilliseconds int32 `protobuf:"varint,3,opt,name=ntpot_milliseconds,json=ntpotMilliseconds,proto3" json:"ntpot_milliseconds,omitempty"`
	// Output only. The Time To First Token (TTFT) in milliseconds. This is the
	// time it takes to generate the first token for a request.
	TtftMilliseconds int32 `protobuf:"varint,4,opt,name=ttft_milliseconds,json=ttftMilliseconds,proto3" json:"ttft_milliseconds,omitempty"`
	// Output only. The cost of running the model deployment.
	Cost []*Cost `protobuf:"bytes,5,rep,name=cost,proto3" json:"cost,omitempty"`
	// contains filtered or unexported fields
}

Performance statistics for a model deployment.

func (*PerformanceStats) Descriptor

func (*PerformanceStats) Descriptor() ([]byte, []int)

Deprecated: Use PerformanceStats.ProtoReflect.Descriptor instead.

func (*PerformanceStats) GetCost

func (x *PerformanceStats) GetCost() []*Cost

func (*PerformanceStats) GetNtpotMilliseconds

func (x *PerformanceStats) GetNtpotMilliseconds() int32

func (*PerformanceStats) GetOutputTokensPerSecond

func (x *PerformanceStats) GetOutputTokensPerSecond() int32

func (*PerformanceStats) GetQueriesPerSecond

func (x *PerformanceStats) GetQueriesPerSecond() float32

func (*PerformanceStats) GetTtftMilliseconds

func (x *PerformanceStats) GetTtftMilliseconds() int32

func (*PerformanceStats) ProtoMessage

func (*PerformanceStats) ProtoMessage()

func (*PerformanceStats) ProtoReflect

func (x *PerformanceStats) ProtoReflect() protoreflect.Message

func (*PerformanceStats) Reset

func (x *PerformanceStats) Reset()

func (*PerformanceStats) String

func (x *PerformanceStats) String() string

Profile

type Profile struct {

	// Output only. The model server configuration. Use
	// [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles]
	// to find valid configurations.
	ModelServerInfo *ModelServerInfo `protobuf:"bytes,1,opt,name=model_server_info,json=modelServerInfo,proto3" json:"model_server_info,omitempty"`
	// Output only. The accelerator type. Expected format: `nvidia-h100-80gb`.
	AcceleratorType string `protobuf:"bytes,2,opt,name=accelerator_type,json=acceleratorType,proto3" json:"accelerator_type,omitempty"`
	// Output only. The TPU topology (if applicable).
	TpuTopology string `protobuf:"bytes,3,opt,name=tpu_topology,json=tpuTopology,proto3" json:"tpu_topology,omitempty"`
	// Output only. The instance type. Expected format: `a2-highgpu-1g`.
	InstanceType string `protobuf:"bytes,4,opt,name=instance_type,json=instanceType,proto3" json:"instance_type,omitempty"`
	// Output only. The resources used by the model deployment.
	ResourcesUsed *ResourcesUsed `protobuf:"bytes,5,opt,name=resources_used,json=resourcesUsed,proto3" json:"resources_used,omitempty"`
	// Output only. The performance statistics for this profile.
	PerformanceStats []*PerformanceStats `protobuf:"bytes,6,rep,name=performance_stats,json=performanceStats,proto3" json:"performance_stats,omitempty"`
	// contains filtered or unexported fields
}

A profile containing information about a model deployment.

func (*Profile) Descriptor

func (*Profile) Descriptor() ([]byte, []int)

Deprecated: Use Profile.ProtoReflect.Descriptor instead.

func (*Profile) GetAcceleratorType

func (x *Profile) GetAcceleratorType() string

func (*Profile) GetInstanceType

func (x *Profile) GetInstanceType() string

func (*Profile) GetModelServerInfo

func (x *Profile) GetModelServerInfo() *ModelServerInfo

func (*Profile) GetPerformanceStats

func (x *Profile) GetPerformanceStats() []*PerformanceStats

func (*Profile) GetResourcesUsed

func (x *Profile) GetResourcesUsed() *ResourcesUsed

func (*Profile) GetTpuTopology

func (x *Profile) GetTpuTopology() string

func (*Profile) ProtoMessage

func (*Profile) ProtoMessage()

func (*Profile) ProtoReflect

func (x *Profile) ProtoReflect() protoreflect.Message

func (*Profile) Reset

func (x *Profile) Reset()

func (*Profile) String

func (x *Profile) String() string

ResourcesUsed

type ResourcesUsed struct {

	// Output only. The number of accelerators (e.g., GPUs or TPUs) used by the
	// model deployment on the Kubernetes node.
	AcceleratorCount int32 `protobuf:"varint,1,opt,name=accelerator_count,json=acceleratorCount,proto3" json:"accelerator_count,omitempty"`
	// contains filtered or unexported fields
}

Resources used by a model deployment.

func (*ResourcesUsed) Descriptor

func (*ResourcesUsed) Descriptor() ([]byte, []int)

Deprecated: Use ResourcesUsed.ProtoReflect.Descriptor instead.

func (*ResourcesUsed) GetAcceleratorCount

func (x *ResourcesUsed) GetAcceleratorCount() int32

func (*ResourcesUsed) ProtoMessage

func (*ResourcesUsed) ProtoMessage()

func (*ResourcesUsed) ProtoReflect

func (x *ResourcesUsed) ProtoReflect() protoreflect.Message

func (*ResourcesUsed) Reset

func (x *ResourcesUsed) Reset()

func (*ResourcesUsed) String

func (x *ResourcesUsed) String() string

StorageConfig

type StorageConfig struct {

	// Optional. The Google Cloud Storage bucket URI to load the model from. This
	// URI must point to the directory containing the model's config file
	// (`config.json`) and model weights. A tuned GCSFuse setup can improve
	// LLM Pod startup time by more than 7x. Expected format:
	// `gs://

Storage configuration for a model deployment.

func (*StorageConfig) Descriptor

func (*StorageConfig) Descriptor() ([]byte, []int)

Deprecated: Use StorageConfig.ProtoReflect.Descriptor instead.

func (*StorageConfig) GetModelBucketUri

func (x *StorageConfig) GetModelBucketUri() string

func (*StorageConfig) GetXlaCacheBucketUri

func (x *StorageConfig) GetXlaCacheBucketUri() string

func (*StorageConfig) ProtoMessage

func (*StorageConfig) ProtoMessage()

func (*StorageConfig) ProtoReflect

func (x *StorageConfig) ProtoReflect() protoreflect.Message

func (*StorageConfig) Reset

func (x *StorageConfig) Reset()

func (*StorageConfig) String

func (x *StorageConfig) String() string

TokensPerSecondRange

type TokensPerSecondRange struct {

	// Output only. The minimum value of the range.
	Min int32 `protobuf:"varint,1,opt,name=min,proto3" json:"min,omitempty"`
	// Output only. The maximum value of the range.
	Max int32 `protobuf:"varint,2,opt,name=max,proto3" json:"max,omitempty"`
	// contains filtered or unexported fields
}

Represents a range of throughput values in tokens per second.

func (*TokensPerSecondRange) Descriptor

func (*TokensPerSecondRange) Descriptor() ([]byte, []int)

Deprecated: Use TokensPerSecondRange.ProtoReflect.Descriptor instead.

func (*TokensPerSecondRange) GetMax

func (x *TokensPerSecondRange) GetMax() int32

func (*TokensPerSecondRange) GetMin

func (x *TokensPerSecondRange) GetMin() int32

func (*TokensPerSecondRange) ProtoMessage

func (*TokensPerSecondRange) ProtoMessage()

func (*TokensPerSecondRange) ProtoReflect

func (x *TokensPerSecondRange) ProtoReflect() protoreflect.Message

func (*TokensPerSecondRange) Reset

func (x *TokensPerSecondRange) Reset()

func (*TokensPerSecondRange) String

func (x *TokensPerSecondRange) String() string

UnimplementedGkeInferenceQuickstartServer

type UnimplementedGkeInferenceQuickstartServer struct {
}

UnimplementedGkeInferenceQuickstartServer should be embedded to have forward compatible implementations.

func (UnimplementedGkeInferenceQuickstartServer) FetchBenchmarkingData

func (UnimplementedGkeInferenceQuickstartServer) FetchBenchmarkingData(context.Context, *FetchBenchmarkingDataRequest) (*FetchBenchmarkingDataResponse, error)

func (UnimplementedGkeInferenceQuickstartServer) FetchModelServerVersions

func (UnimplementedGkeInferenceQuickstartServer) FetchModelServerVersions(context.Context, *FetchModelServerVersionsRequest) (*FetchModelServerVersionsResponse, error)

func (UnimplementedGkeInferenceQuickstartServer) FetchModelServers

func (UnimplementedGkeInferenceQuickstartServer) FetchModelServers(context.Context, *FetchModelServersRequest) (*FetchModelServersResponse, error)

func (UnimplementedGkeInferenceQuickstartServer) FetchModels

func (UnimplementedGkeInferenceQuickstartServer) FetchModels(context.Context, *FetchModelsRequest) (*FetchModelsResponse, error)

func (UnimplementedGkeInferenceQuickstartServer) FetchProfiles

func (UnimplementedGkeInferenceQuickstartServer) FetchProfiles(context.Context, *FetchProfilesRequest) (*FetchProfilesResponse, error)

func (UnimplementedGkeInferenceQuickstartServer) GenerateOptimizedManifest

func (UnimplementedGkeInferenceQuickstartServer) GenerateOptimizedManifest(context.Context, *GenerateOptimizedManifestRequest) (*GenerateOptimizedManifestResponse, error)

UnsafeGkeInferenceQuickstartServer

type UnsafeGkeInferenceQuickstartServer interface {
	// contains filtered or unexported methods
}

UnsafeGkeInferenceQuickstartServer may be embedded to opt out of forward compatibility for this service. Use of this interface is not recommended, as added methods to GkeInferenceQuickstartServer will result in compilation errors.