public interface PerformanceStatsOrBuilder extends MessageOrBuilderImplements
MessageOrBuilderMethods
getCost(int index)
public abstract Cost getCost(int index)Output only. The cost of running the model deployment.
repeated .google.cloud.gkerecommender.v1.Cost cost = 5 [(.google.api.field_behavior) = OUTPUT_ONLY];
| Parameter | |
|---|---|
| Name | Description |
index |
int |
| Returns | |
|---|---|
| Type | Description |
Cost |
|
getCostCount()
public abstract int getCostCount()Output only. The cost of running the model deployment.
repeated .google.cloud.gkerecommender.v1.Cost cost = 5 [(.google.api.field_behavior) = OUTPUT_ONLY];
| Returns | |
|---|---|
| Type | Description |
int |
|
getCostList()
public abstract List<Cost> getCostList()Output only. The cost of running the model deployment.
repeated .google.cloud.gkerecommender.v1.Cost cost = 5 [(.google.api.field_behavior) = OUTPUT_ONLY];
| Returns | |
|---|---|
| Type | Description |
List<Cost> |
|
getCostOrBuilder(int index)
public abstract CostOrBuilder getCostOrBuilder(int index)Output only. The cost of running the model deployment.
repeated .google.cloud.gkerecommender.v1.Cost cost = 5 [(.google.api.field_behavior) = OUTPUT_ONLY];
| Parameter | |
|---|---|
| Name | Description |
index |
int |
| Returns | |
|---|---|
| Type | Description |
CostOrBuilder |
|
getCostOrBuilderList()
public abstract List<? extends CostOrBuilder> getCostOrBuilderList()Output only. The cost of running the model deployment.
repeated .google.cloud.gkerecommender.v1.Cost cost = 5 [(.google.api.field_behavior) = OUTPUT_ONLY];
| Returns | |
|---|---|
| Type | Description |
List<? extends com.google.cloud.gkerecommender.v1.CostOrBuilder> |
|
getNtpotMilliseconds()
public abstract int getNtpotMilliseconds()Output only. The Normalized Time Per Output Token (NTPOT) in milliseconds. This is the request latency normalized by the number of output tokens, measured as request_latency / total_output_tokens.
int32 ntpot_milliseconds = 3 [(.google.api.field_behavior) = OUTPUT_ONLY];
| Returns | |
|---|---|
| Type | Description |
int |
The ntpotMilliseconds. |
getOutputTokensPerSecond()
public abstract int getOutputTokensPerSecond()Output only. The number of output tokens per second. This is the throughput measured as total_output_tokens_generated_by_server / elapsed_time_in_seconds.
int32 output_tokens_per_second = 2 [(.google.api.field_behavior) = OUTPUT_ONLY];
| Returns | |
|---|---|
| Type | Description |
int |
The outputTokensPerSecond. |
getQueriesPerSecond()
public abstract float getQueriesPerSecond()Output only. The number of queries per second. Note: This metric can vary widely based on context length and may not be a reliable measure of LLM throughput.
float queries_per_second = 1 [(.google.api.field_behavior) = OUTPUT_ONLY];
| Returns | |
|---|---|
| Type | Description |
float |
The queriesPerSecond. |
getTtftMilliseconds()
public abstract int getTtftMilliseconds()Output only. The Time To First Token (TTFT) in milliseconds. This is the time it takes to generate the first token for a request.
int32 ttft_milliseconds = 4 [(.google.api.field_behavior) = OUTPUT_ONLY];
| Returns | |
|---|---|
| Type | Description |
int |
The ttftMilliseconds. |