package inference

Mouse Melon logoGet desktop application:
View/edit binary Protocol Buffers messages

service GRPCInferenceService

grpc_service.proto:41

@@ @@.. cpp:var:: service InferenceService @@ @@ Inference Server GRPC endpoints. @@

message BatchInput

model_config.proto:490

@@ .. cpp:var:: message BatchInput @@ @@ A batch input is an additional input that must be added by @@ the backend based on all the requests in a batch. @@

Used in: ModelConfig

enum BatchInput.Kind

model_config.proto:497

@@ @@ .. cpp:enum:: Kind @@ @@ The kind of the batch input. @@

Used in: BatchInput

message BatchOutput

model_config.proto:592

@@.. cpp:var:: message BatchOutput @@ @@ A batch output is an output produced by the model that must be handled @@ differently by the backend based on all the requests in a batch. @@

Used in: ModelConfig

enum BatchOutput.Kind

model_config.proto:599

@@ @@ .. cpp:enum:: Kind @@ @@ The kind of the batch output. @@

Used in: BatchOutput

message CudaSharedMemoryStatusResponse.RegionStatus

grpc_service.proto:1575

@@ @@ .. cpp:var:: message RegionStatus @@ @@ Status for a shared memory region. @@

Used in: CudaSharedMemoryStatusResponse

enum DataType

model_config.proto:41

@@ @@.. cpp:enum:: DataType @@ @@ Data types supported for input and output tensors. @@

Used in: BatchInput, ModelInput, ModelOutput, ModelSequenceBatching.Control, ModelSequenceBatching.InitialState, ModelSequenceBatching.State, ModelWarmup.Input

message InferBatchStatistics

grpc_service.proto:1068

@@ @@.. cpp:var:: message InferBatchStatistics @@ @@ Inference batch statistics. @@

Used in: ModelStatistics

message InferParameter

grpc_service.proto:440

@@ @@.. cpp:var:: message InferParameter @@ @@ An inference parameter value. @@

Used in: ModelInferRequest, ModelInferRequest.InferInputTensor, ModelInferRequest.InferRequestedOutputTensor, ModelInferResponse, ModelInferResponse.InferOutputTensor

message InferResponseStatistics

grpc_service.proto:1022

@@ @@.. cpp:var:: message InferResponseStatistics @@ @@ Statistics per response. @@

Used in: ModelStatistics

message InferStatistics

grpc_service.proto:925

@@ @@.. cpp:var:: message InferStatistics @@ @@ Inference statistics. @@

Used in: ModelStatistics

message InferTensorContents

grpc_service.proto:494

@@ @@.. cpp:var:: message InferTensorContents @@ @@ The data contained in a tensor represented by the repeated type @@ that matches the tensor's data type. Protobuf oneof is not used @@ because oneofs cannot contain repeated fields. @@

Used in: ModelInferRequest.InferInputTensor, ModelInferResponse.InferOutputTensor

message LogSettingsRequest.SettingValue

grpc_service.proto:1746

Used in: LogSettingsRequest

message LogSettingsResponse.SettingValue

grpc_service.proto:1783

Used in: LogSettingsResponse

message MemoryUsage

grpc_service.proto:1107

@@ @@.. cpp:var:: message MemoryUsage @@ @@ Memory usage. @@

Used in: ModelStatistics

message ModelConfig

model_config.proto:1974

@@ @@.. cpp:var:: message ModelConfig @@ @@ A model configuration. @@

Used in: ModelConfigResponse

message ModelDynamicBatching

model_config.proto:1123

@@ @@.. cpp:var:: message ModelDynamicBatching @@ @@ Dynamic batching configuration. These settings control how dynamic @@ batching operates for the model. @@

Used in: ModelConfig

message ModelEnsembling

model_config.proto:1607

@@ @@.. cpp:var:: message ModelEnsembling @@ @@ Model ensembling configuration. These settings specify the models that @@ compose the ensemble and how data flows between the models. @@

Used in: ModelConfig

message ModelEnsembling.Step

model_config.proto:1615

@@ .. cpp:var:: message Step @@ @@ Each step specifies a model included in the ensemble, @@ maps ensemble tensor names to the model input tensors, @@ and maps model output tensors to ensemble tensor names @@

Used in: ModelEnsembling

message ModelInferRequest

grpc_service.proto:576

@@ @@.. cpp:var:: message ModelInferRequest @@ @@ Request message for ModelInfer. @@

Used as request type in: GRPCInferenceService.ModelInfer, GRPCInferenceService.ModelStreamInfer

message ModelInferRequest.InferInputTensor

grpc_service.proto:583

@@ @@ .. cpp:var:: message InferInputTensor @@ @@ An input tensor for an inference request. @@

Used in: ModelInferRequest

message ModelInferRequest.InferRequestedOutputTensor

grpc_service.proto:626

@@ @@ .. cpp:var:: message InferRequestedOutputTensor @@ @@ An output tensor requested for an inference request. @@

Used in: ModelInferRequest

message ModelInferResponse

grpc_service.proto:715

@@ @@.. cpp:var:: message ModelInferResponse @@ @@ Response message for ModelInfer. @@

Used as response type in: GRPCInferenceService.ModelInfer

Used as field type in: ModelStreamInferResponse

message ModelInferResponse.InferOutputTensor

grpc_service.proto:722

@@ @@ .. cpp:var:: message InferOutputTensor @@ @@ An output tensor returned for an inference request. @@

Used in: ModelInferResponse

message ModelInput

model_config.proto:318

@@ @@.. cpp:var:: message ModelInput @@ @@ An input required by the model. @@

Used in: ModelConfig

enum ModelInput.Format

model_config.proto:325

@@ @@ .. cpp:enum:: Format @@ @@ The format for the input. @@

Used in: ModelInput

message ModelInstanceGroup

model_config.proto:144

@@ @@.. cpp:var:: message ModelInstanceGroup @@ @@ A group of one or more instances of a model and resources made @@ available for those instances. @@

Used in: ModelConfig

enum ModelInstanceGroup.Kind

model_config.proto:151

@@ @@ .. cpp:enum:: Kind @@ @@ Kind of this instance group. @@

Used in: ModelInstanceGroup

message ModelInstanceGroup.SecondaryDevice

model_config.proto:190

@@ @@ .. cpp:var:: message SecondaryDevice @@ @@ A secondary device required for a model instance. @@

Used in: ModelInstanceGroup

enum ModelInstanceGroup.SecondaryDevice.SecondaryDeviceKind

model_config.proto:197

@@ @@ .. cpp:enum:: SecondaryDeviceKind @@ @@ The kind of the secondary device. @@

Used in: SecondaryDevice

message ModelMetadataResponse.TensorMetadata

grpc_service.proto:374

@@ @@ .. cpp:var:: message TensorMetadata @@ @@ Metadata for a tensor. @@

Used in: ModelMetadataResponse

message ModelMetrics

model_config.proto:1897

@@ @@ .. cpp:var:: message ModelMetrics @@ @@ The metrics setting of this model. @@ NOTE: Consider reusing this message body for backend metric custom @@ configuration. @@

Used in: ModelConfig

message ModelMetrics.MetricControl

model_config.proto:1904

@@ @@ .. cpp:var:: message MetricControl @@ @@ Override metrics settings of this model. @@

Used in: ModelMetrics

message ModelMetrics.MetricControl.HistogramOptions

model_config.proto:1927

@@ .. cpp:var:: message HistogramOptions @@ @@ Histogram metrics options. @@

Used in: MetricControl

message ModelMetrics.MetricControl.MetricIdentifier

model_config.proto:1911

@@ @@ .. cpp:var:: message MetricIdentifier @@ @@ Specify metrics to be overridden with metric_option. @@

Used in: MetricControl

message ModelOperations

model_config.proto:1803

@@ @@ .. cpp:var:: message ModelOperations @@ @@ The metadata of libraries providing custom operations for this model. @@

Used in: ModelConfig

message ModelOptimizationPolicy

model_config.proto:708

@@ @@.. cpp:var:: message ModelOptimizationPolicy @@ @@ Optimization settings for a model. These settings control if/how a @@ model is optimized and prioritized by the backend framework when @@ it is loaded. @@

Used in: ModelConfig

message ModelOptimizationPolicy.Cuda

model_config.proto:766

@@ @@ .. cpp:var:: message Cuda @@ @@ CUDA-specific optimization settings. @@

Used in: ModelOptimizationPolicy

message ModelOptimizationPolicy.Cuda.GraphSpec

model_config.proto:772

@@ .. cpp:var:: message GraphSpec @@ @@ Specification of the CUDA graph to be captured. @@

Used in: Cuda

message ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound

model_config.proto:787

Used in: GraphSpec

message ModelOptimizationPolicy.Cuda.GraphSpec.Shape

model_config.proto:778

@@ .. cpp:var:: message Dims @@ @@ Specification of tensor dimension. @@

Used in: GraphSpec, LowerBound

message ModelOptimizationPolicy.ExecutionAccelerators

model_config.proto:890

@@ @@ .. cpp:var:: message ExecutionAccelerators @@ @@ Specify the preferred execution accelerators to be used to execute @@ the model. Currently only recognized by ONNX Runtime backend and @@ TensorFlow backend. @@ @@ For ONNX Runtime backend, it will deploy the model with the execution @@ accelerators by priority, the priority is determined based on the @@ order that they are set, i.e. the provider at the front has highest @@ priority. Overall, the priority will be in the following order: @@ <gpu_execution_accelerator> (if instance is on GPU) @@ CUDA Execution Provider (if instance is on GPU) @@ <cpu_execution_accelerator> @@ Default CPU Execution Provider @@

Used in: ModelOptimizationPolicy

message ModelOptimizationPolicy.ExecutionAccelerators.Accelerator

model_config.proto:899

@@ @@ .. cpp:var:: message Accelerator @@ @@ Specify the accelerator to be used to execute the model. @@ Accelerator with the same name may accept different parameters @@ depending on the backends. @@

Used in: ExecutionAccelerators

message ModelOptimizationPolicy.Graph

model_config.proto:720

@@ @@ .. cpp:var:: message Graph @@ @@ Enable generic graph optimization of the model. If not specified @@ the framework's default level of optimization is used. Supports @@ TensorFlow graphdef and savedmodel and Onnx models. For TensorFlow @@ causes XLA to be enabled/disabled for the model. For Onnx defaults @@ to enabling all optimizations, -1 enables only basic optimizations, @@ +1 enables only basic and extended optimizations. @@

Used in: ModelOptimizationPolicy

enum ModelOptimizationPolicy.ModelPriority

model_config.proto:741

@@ @@ .. cpp:enum:: ModelPriority @@ @@ Model priorities. A model will be given scheduling and execution @@ preference over models at lower priorities. Current model @@ priorities only work for TensorRT models. @@

Used in: ModelOptimizationPolicy

message ModelOptimizationPolicy.PinnedMemoryBuffer

model_config.proto:978

@@ @@ .. cpp:var:: message PinnedMemoryBuffer @@ @@ Specify whether to use a pinned memory buffer when transferring data @@ between non-pinned system memory and GPU memory. Using a pinned @@ memory buffer for system from/to GPU transfers will typically provide @@ increased performance. For example, in the common use case where the @@ request provides inputs and delivers outputs via non-pinned system @@ memory, if the model instance accepts GPU IOs, the inputs will be @@ processed by two copies: from non-pinned system memory to pinned @@ memory, and from pinned memory to GPU memory. Similarly, pinned @@ memory will be used for delivering the outputs. @@

Used in: ModelOptimizationPolicy

message ModelOutput

model_config.proto:429

@@ @@.. cpp:var:: message ModelOutput @@ @@ An output produced by the model. @@

Used in: ModelConfig

message ModelParameter

model_config.proto:1687

@@ @@.. cpp:var:: message ModelParameter @@ @@ A model parameter. @@

Used in: ModelConfig

message ModelQueuePolicy

model_config.proto:1056

@@ @@.. cpp:var:: message ModelQueuePolicy @@ @@ Queue policy for inference requests. @@

Used in: ModelDynamicBatching

enum ModelQueuePolicy.TimeoutAction

model_config.proto:1063

@@ @@ .. cpp:enum:: TimeoutAction @@ @@ The action applied to timed-out requests. @@

Used in: ModelQueuePolicy

message ModelRateLimiter

model_config.proto:87

@@ @@ .. cpp:var:: message ModelRateLimiter @@ @@ The specifications required by the rate limiter to properly @@ schedule the inference requests across the different models @@ and their instances. @@

Used in: ModelInstanceGroup

message ModelRateLimiter.Resource

model_config.proto:93

@@ .. cpp:var:: message Resource @@ @@ The resource property. @@

Used in: ModelRateLimiter

message ModelRepositoryAgents

model_config.proto:1838

@@ @@.. cpp:var:: message ModelRepositoryAgents @@ @@ The repository agents for the model. @@

Used in: ModelConfig

message ModelRepositoryAgents.Agent

model_config.proto:1846

@@ @@ .. cpp:var:: message Agent @@ @@ A repository agent that should be invoked for the specified @@ repository actions for this model. @@

Used in: ModelRepositoryAgents

message ModelRepositoryParameter

grpc_service.proto:1241

@@ @@.. cpp:var:: message ModelRepositoryParameter @@ @@ An model repository parameter value. @@

Used in: RepositoryModelLoadRequest, RepositoryModelUnloadRequest

message ModelResponseCache

model_config.proto:1876

@@ @@.. cpp:var:: message ModelResponseCache @@ @@ The response cache setting for the model. @@

Used in: ModelConfig

message ModelSequenceBatching

model_config.proto:1198

@@ @@.. cpp:var:: message ModelSequenceBatching @@ @@ Sequence batching configuration. These settings control how sequence @@ batching operates for the model. @@

Used in: ModelConfig

message ModelSequenceBatching.Control

model_config.proto:1205

@@ .. cpp:var:: message Control @@ @@ A control is a signal that the sequence batcher uses to @@ communicate with a backend. @@

Used in: ControlInput

enum ModelSequenceBatching.Control.Kind

model_config.proto:1212

@@ @@ .. cpp:enum:: Kind @@ @@ The kind of the control. @@

Used in: Control

message ModelSequenceBatching.ControlInput

model_config.proto:1302

@@ .. cpp:var:: message ControlInput @@ @@ The sequence control values to communicate by a model input. @@

Used in: ModelSequenceBatching

message ModelSequenceBatching.InitialState

model_config.proto:1323

@@ @@ .. cpp:var:: message InitialState @@ @@ Settings used to initialize data for implicit state. @@

Used in: State

message ModelSequenceBatching.State

model_config.proto:1373

@@ .. cpp:var:: message State @@ @@ An input / output pair of tensors that carry state for the sequence. @@

Used in: ModelSequenceBatching

message ModelSequenceBatching.StrategyDirect

model_config.proto:1448

@@ .. cpp:var:: message StrategyDirect @@ @@ The sequence batcher uses a specific, unique batch @@ slot for each sequence. All inference requests in a @@ sequence are directed to the same batch slot in the same @@ model instance over the lifetime of the sequence. This @@ is the default strategy. @@

Used in: ModelSequenceBatching

message ModelSequenceBatching.StrategyOldest

model_config.proto:1485

@@ .. cpp:var:: message StrategyOldest @@ @@ The sequence batcher maintains up to 'max_candidate_sequences' @@ candidate sequences. 'max_candidate_sequences' can be greater @@ than the model's 'max_batch_size'. For inferencing the batcher @@ chooses from the candidate sequences up to 'max_batch_size' @@ inference requests. Requests are chosen in an oldest-first @@ manner across all candidate sequences. A given sequence is @@ not guaranteed to be assigned to the same batch slot for @@ all inference requests of that sequence. @@

Used in: ModelSequenceBatching

message ModelStatistics

grpc_service.proto:1134

@@ @@.. cpp:var:: message ModelStatistics @@ @@ Statistics for a specific model and version. @@

Used in: ModelStatisticsResponse

message ModelTensorReshape

model_config.proto:304

@@ @@.. cpp:var:: message ModelTensorReshape @@ @@ Reshape specification for input and output tensors. @@

Used in: ModelInput, ModelOutput

message ModelTransactionPolicy

model_config.proto:1819

@@ @@ .. cpp:var:: message ModelTransactionPolicy @@ @@ The specification that describes the nature of transactions @@ to be expected from the model. @@

Used in: ModelConfig

message ModelVersionPolicy

model_config.proto:636

@@ @@.. cpp:var:: message ModelVersionPolicy @@ @@ Policy indicating which versions of a model should be made @@ available by the inference server. @@

Used in: ModelConfig

message ModelVersionPolicy.All

model_config.proto:659

@@ .. cpp:var:: message All @@ @@ Serve all versions of the model. @@

Used in: ModelVersionPolicy

(message has no fields)

message ModelVersionPolicy.Latest

model_config.proto:643

@@ .. cpp:var:: message Latest @@ @@ Serve only the latest version(s) of a model. This is @@ the default policy. @@

Used in: ModelVersionPolicy

message ModelVersionPolicy.Specific

model_config.proto:665

@@ .. cpp:var:: message Specific @@ @@ Serve only specific versions of the model. @@

Used in: ModelVersionPolicy

message ModelWarmup

model_config.proto:1701

@@ @@.. cpp:var:: message ModelWarmup @@ @@ Settings used to construct the request sample for model warmup. @@

Used in: ModelConfig

message ModelWarmup.Input

model_config.proto:1708

@@ @@ .. cpp:var:: message Input @@ @@ Meta data associated with an input. @@

Used in: ModelWarmup

message RepositoryIndexResponse.ModelIndex

grpc_service.proto:1309

@@ @@ .. cpp:var:: message ModelIndex @@ @@ Index entry for a model. @@

Used in: RepositoryIndexResponse

message StatisticDuration

grpc_service.proto:905

@@ @@.. cpp:var:: message StatisticDuration @@ @@ Statistic recording a cumulative duration metric. @@

Used in: InferBatchStatistics, InferResponseStatistics, InferStatistics

message SystemSharedMemoryStatusResponse.RegionStatus

grpc_service.proto:1443

@@ @@ .. cpp:var:: message RegionStatus @@ @@ Status for a shared memory region. @@

Used in: SystemSharedMemoryStatusResponse

message TraceSettingRequest.SettingValue

grpc_service.proto:1683

@@ @@ .. cpp:var:: message SettingValue @@ @@ The values to be associated with a trace setting. @@ If no value is provided, the setting will be clear and @@ the global setting value will be used. @@

Used in: TraceSettingRequest

message TraceSettingResponse.SettingValue

grpc_service.proto:1721

@@ @@ .. cpp:var:: message SettingValue @@ @@ The values to be associated with a trace setting. @@

Used in: TraceSettingResponse