package nvidia.inferenceserver

Mouse Melon logoGet desktop application:
View/edit binary Protocol Buffers messages

service GRPCInferenceService

grpc_service_v2.proto:40

@@ @@.. cpp:var:: service InferenceService @@ @@ Inference Server GRPC endpoints. @@

service GRPCService

grpc_service.proto:42

@@ @@.. cpp:var:: service GRPCService @@ @@ Inference Server GRPC endpoints. @@

message CudaSharedMemoryStatusResponse.RegionStatus

grpc_service_v2.proto:1168

@@ @@ .. cpp:var:: message RegionStatus @@ @@ Status for a shared memory region. @@

Used in: CudaSharedMemoryStatusResponse

enum DataType

model_config.proto:40

@@ @@.. cpp:enum:: DataType @@ @@ Data types supported for input and output tensors. @@

Used in: InferResponseHeader.Output, ModelInput, ModelOutput, ModelSequenceBatching.Control, ModelWarmup.Input

message HealthRequestStats

server_status.proto:75

@@ @@.. cpp:var:: message HealthRequestStats @@ @@ Statistics collected for Health requests. @@

Used in: ServerStatus

message InferParameter

grpc_service_v2.proto:427

@@ @@.. cpp:var:: message InferParameter @@ @@ An inference parameter value. @@

Used in: ModelInferRequest, ModelInferRequest.InferInputTensor, ModelInferRequest.InferRequestedOutputTensor, ModelInferResponse

message InferRequest

grpc_service.proto:425

@@ @@.. cpp:var:: message InferRequest @@ @@ Request message for Infer gRPC endpoint. @@

Used as request type in: GRPCService.Infer, GRPCService.StreamInfer

message InferRequestHeader

api.proto:70

@@ @@.. cpp:var:: message InferRequestHeader @@ @@ Meta-data for an inferencing request. The actual input data is @@ delivered separate from this header, in the HTTP body for an HTTP @@ request, or in the :cpp:var:`InferRequest` message for a gRPC request. @@

Used in: InferRequest

enum InferRequestHeader.Flag

api.proto:78

@@ .. cpp:enum:: Flag @@ @@ Flags that can be associated with an inference request. @@ All flags are packed bitwise into the 'flags' field and @@ so the value of each must be a power-of-2. @@

message InferRequestHeader.Input

api.proto:103

@@ .. cpp:var:: message Input @@ @@ Meta-data for an input tensor provided as part of an inferencing @@ request. @@

Used in: InferRequestHeader

message InferRequestHeader.Output

api.proto:142

@@ .. cpp:var:: message Output @@ @@ Meta-data for a requested output tensor as part of an inferencing @@ request. @@

Used in: InferRequestHeader

message InferRequestHeader.Output.Class

api.proto:154

@@ .. cpp:var:: message Class @@ @@ Options for an output returned as a classification. @@

Used in: Output

message InferRequestStats

server_status.proto:139

@@ @@.. cpp:var:: message InferRequestStats @@ @@ Statistics collected for Infer requests. @@

Used in: ModelVersionStatus

message InferResponse

grpc_service.proto:459

@@ @@.. cpp:var:: message InferResponse @@ @@ Response message for Infer gRPC endpoint. @@

Used as response type in: GRPCService.Infer, GRPCService.StreamInfer

message InferResponseHeader

api.proto:260

@@ @@.. cpp:var:: message InferResponseHeader @@ @@ Meta-data for the response to an inferencing request. The actual output @@ data is delivered separate from this header, in the HTTP body for an HTTP @@ request, or in the :cpp:var:`InferResponse` message for a gRPC request. @@

Used in: InferResponse

message InferResponseHeader.Output

api.proto:267

@@ .. cpp:var:: message Output @@ @@ Meta-data for an output tensor requested as part of an inferencing @@ request. @@

Used in: InferResponseHeader

message InferResponseHeader.Output.Class

api.proto:306

@@ .. cpp:var:: message Class @@ @@ Information about each classification for this output. @@

Used in: Classes

message InferResponseHeader.Output.Classes

api.proto:333

@@ .. cpp:var:: message Classes @@ @@ Meta-data for an output tensor being returned as classifications. @@

Used in: Output

message InferResponseHeader.Output.Raw

api.proto:285

@@ .. cpp:var:: message Raw @@ @@ Meta-data for an output tensor being returned as raw data. @@

Used in: Output

message InferSharedMemory

api.proto:40

@@.. cpp:var:: message InferSharedMemory @@ @@ The meta-data for the shared memory from which to read the input @@ data and/or write the output data. @@

Used in: InferRequestHeader.Input, InferRequestHeader.Output

message InferStatistics

grpc_service_v2.proto:847

@@ @@.. cpp:var:: message InferStatistics @@ @@ Inference statistics. @@

Used in: ModelStatisticsResponse

message InferTensorContents

grpc_service_v2.proto:464

@@ @@.. cpp:var:: message InferTensorContents @@ @@ The data contained in a tensor. For a given data type the @@ tensor contents can be represented in "raw" bytes form or in @@ the repeated type that matches the tensor's data type. Protobuf @@ oneof is not used because oneofs cannot contain repeated fields. @@

Used in: ModelInferRequest.InferInputTensor, ModelInferResponse.InferOutputTensor

message ModelConfig

model_config.proto:1137

@@ @@.. cpp:var:: message ModelConfig @@ @@ A model configuration. @@

Used in: ModelConfigResponse, ModelStatus

enum ModelControlRequest.Type

grpc_service.proto:186

@@ .. cpp:enum:: Type @@ @@ Types of control operation @@

Used in: ModelControlRequest

message ModelControlRequestStats

server_status.proto:90

@@ @@.. cpp:var:: message ModelControlRequestStats @@ @@ Statistics collected for ModelControl requests. @@

Used in: ServerStatus

message ModelDynamicBatching

model_config.proto:700

@@ @@.. cpp:var:: message ModelDynamicBatching @@ @@ Dynamic batching configuration. These settings control how dynamic @@ batching operates for the model. @@

Used in: ModelConfig

message ModelEnsembling

model_config.proto:982

@@ @@.. cpp:var:: message ModelEnsembling @@ @@ Model ensembling configuration. These settings specify the models that @@ compose the ensemble and how data flows between the models. @@

Used in: ModelConfig

message ModelEnsembling.Step

model_config.proto:990

@@ .. cpp:var:: message Step @@ @@ Each step specifies a model included in the ensemble, @@ maps ensemble tensor names to the model input tensors, @@ and maps model output tensors to ensemble tensor names @@

Used in: ModelEnsembling

message ModelInferRequest

grpc_service_v2.proto:559

@@ @@.. cpp:var:: message ModelInferRequest @@ @@ Request message for ModelInfer. @@

Used as request type in: GRPCInferenceService.ModelInfer, GRPCInferenceService.ModelStreamInfer

message ModelInferRequest.InferInputTensor

grpc_service_v2.proto:566

@@ @@ .. cpp:var:: message InferInputTensor @@ @@ An input tensor for an inference request. @@

Used in: ModelInferRequest

message ModelInferRequest.InferRequestedOutputTensor

grpc_service_v2.proto:607

@@ @@ .. cpp:var:: message InferRequestedOutputTensor @@ @@ An output tensor requested for an inference request. @@

Used in: ModelInferRequest

message ModelInferResponse

grpc_service_v2.proto:671

@@ @@.. cpp:var:: message ModelInferResponse @@ @@ Response message for ModelInfer. @@

Used as response type in: GRPCInferenceService.ModelInfer

Used as field type in: ModelStreamInferResponse

message ModelInferResponse.InferOutputTensor

grpc_service_v2.proto:678

@@ @@ .. cpp:var:: message InferOutputTensor @@ @@ An output tensor returned for an inference request. @@

Used in: ModelInferResponse

message ModelInput

model_config.proto:190

@@ @@.. cpp:var:: message ModelInput @@ @@ An input required by the model. @@

Used in: ModelConfig

enum ModelInput.Format

model_config.proto:197

@@ @@ .. cpp:enum:: Format @@ @@ The format for the input. @@

Used in: ModelInput

message ModelInstanceGroup

model_config.proto:82

@@ @@.. cpp:var:: message ModelInstanceGroup @@ @@ A group of one or more instances of a model and resources made @@ available for those instances. @@

Used in: ModelConfig

enum ModelInstanceGroup.Kind

model_config.proto:89

@@ @@ .. cpp:enum:: Kind @@ @@ Kind of this instance group. @@

Used in: ModelInstanceGroup

message ModelMetadataResponse.TensorMetadata

grpc_service_v2.proto:361

@@ @@ .. cpp:var:: message TensorMetadata @@ @@ Metadata for a tensor. @@

Used in: ModelMetadataResponse

message ModelOptimizationPolicy

model_config.proto:410

@@ @@.. cpp:var:: message ModelOptimizationPolicy @@ @@ Optimization settings for a model. These settings control if/how a @@ model is optimized and prioritized by the backend framework when @@ it is loaded. @@

Used in: ModelConfig

message ModelOptimizationPolicy.Cuda

model_config.proto:468

@@ @@ .. cpp:var:: message Cuda @@ @@ CUDA-specific optimization settings. @@

Used in: ModelOptimizationPolicy

message ModelOptimizationPolicy.ExecutionAccelerators

model_config.proto:495

@@ @@ .. cpp:var:: message ExecutionAccelerators @@ @@ Specify the preferred execution accelerators to be used to execute @@ the model. Currently only recognized by ONNX Runtime backend and @@ TensorFlow backend. @@ @@ For ONNX Runtime backend, it will deploy the model with the execution @@ accelerators by priority, the priority is determined based on the @@ order that they are set, i.e. the provider at the front has highest @@ priority. Overall, the priority will be in the following order: @@ <gpu_execution_accelerator> (if instance is on GPU) @@ CUDA Execution Provider (if instance is on GPU) @@ <cpu_execution_accelerator> @@ Default CPU Execution Provider @@

Used in: ModelOptimizationPolicy

message ModelOptimizationPolicy.ExecutionAccelerators.Accelerator

model_config.proto:504

@@ @@ .. cpp:var:: message Accelerator @@ @@ Specify the accelerator to be used to execute the model. @@ Accelerator with the same name may accept different parameters @@ depending on the backends. @@

Used in: ExecutionAccelerators

message ModelOptimizationPolicy.Graph

model_config.proto:422

@@ @@ .. cpp:var:: message Graph @@ @@ Enable generic graph optimization of the model. If not specified @@ the framework's default level of optimization is used. Supports @@ TensorFlow graphdef and savedmodel and Onnx models. For TensorFlow @@ causes XLA to be enabled/disabled for the model. For Onnx defaults @@ to enabling all optimizations, -1 enables only basic optimizations, @@ +1 enables only basic and extended optimizations. @@

Used in: ModelOptimizationPolicy

enum ModelOptimizationPolicy.ModelPriority

model_config.proto:443

@@ @@ .. cpp:enum:: ModelPriority @@ @@ Model priorities. A model will be given scheduling and execution @@ preference over models at lower priorities. Current model @@ priorities only work for TensorRT models. @@

Used in: ModelOptimizationPolicy

message ModelOptimizationPolicy.PinnedMemoryBuffer

model_config.proto:578

@@ @@ .. cpp:var:: message PinnedMemoryBuffer @@ @@ Specify whether to use a pinned memory buffer when transferring data @@ between non-pinned system memory and GPU memory. Using a pinned @@ memory buffer for system from/to GPU transfers will typically provide @@ increased performance. For example, in the common use case where the @@ request provides inputs and delivers outputs via non-pinned system @@ memory, if the model instance accepts GPU IOs, the inputs will be @@ processed by two copies: from non-pinned system memory to pinned @@ memory, and from pinned memory to GPU memory. Similarly, pinned @@ memory will be used for delivering the outputs. @@

Used in: ModelOptimizationPolicy

message ModelOutput

model_config.proto:285

@@ @@.. cpp:var:: message ModelOutput @@ @@ An output produced by the model. @@

Used in: ModelConfig

message ModelParameter

model_config.proto:1038

@@ @@.. cpp:var:: message ModelParameter @@ @@ A model parameter. @@

Used in: ModelConfig

message ModelQueuePolicy

model_config.proto:633

@@ @@.. cpp:var:: message ModelQueuePolicy @@ @@ Queue policy for inference requests. @@

Used in: ModelDynamicBatching

enum ModelQueuePolicy.TimeoutAction

model_config.proto:640

@@ @@ .. cpp:enum:: TimeoutAction @@ @@ The action applied to timed-out requests. @@

Used in: ModelQueuePolicy

enum ModelReadyState

server_status.proto:199

@@ @@.. cpp:enum:: ModelReadyState @@ @@ Readiness status for models. @@

Used in: ModelVersionStatus

message ModelReadyStateReason

server_status.proto:241

@@ @@.. cpp:enum:: ModelReadyStateReason @@ @@ Detail associated with a model's readiness status. @@

Used in: ModelVersionStatus

message ModelRepositoryIndex

server_status.proto:527

@@ @@.. cpp:var:: message ModelRepositoryIndex @@ @@ Index of the model repository monitored by the inference server. @@

Used in: RepositoryResponse

message ModelRepositoryIndex.ModelEntry

server_status.proto:534

@@ @@ .. cpp:var:: message ModelEntry @@ @@ The basic information for a model. @@

Used in: ModelRepositoryIndex

message ModelSequenceBatching

model_config.proto:775

@@ @@.. cpp:var:: message ModelSequenceBatching @@ @@ Sequence batching configuration. These settings control how sequence @@ batching operates for the model. @@

Used in: ModelConfig

message ModelSequenceBatching.Control

model_config.proto:782

@@ .. cpp:var:: message Control @@ @@ A control is a signal that the sequence batcher uses to @@ communicate with a backend. @@

Used in: ControlInput

enum ModelSequenceBatching.Control.Kind

model_config.proto:789

@@ @@ .. cpp:enum:: Kind @@ @@ The kind of the control. @@

Used in: Control

message ModelSequenceBatching.ControlInput

model_config.proto:868

@@ .. cpp:var:: message ControlInput @@ @@ The sequence control values to communicate by a model input. @@

Used in: ModelSequenceBatching

message ModelSequenceBatching.StrategyDirect

model_config.proto:892

@@ .. cpp:var:: message StrategyDirect @@ @@ The sequence batcher uses a specific, unique batch @@ slot for each sequence. All inference requests in a @@ sequence are directed to the same batch slot in the same @@ model instance over the lifetime of the sequence. This @@ is the default strategy. @@

Used in: ModelSequenceBatching

(message has no fields)

message ModelSequenceBatching.StrategyOldest

model_config.proto:905

@@ .. cpp:var:: message StrategyOldest @@ @@ The sequence batcher maintains up to 'max_candidate_sequences' @@ candidate sequences. 'max_candidate_sequences' can be greater @@ than the model's 'max_batch_size'. For inferencing the batcher @@ chooses from the candidate sequences up to 'max_batch_size' @@ inference requests. Requests are chosen in an oldest-first @@ manner across all candidate sequences. A given sequence is @@ not guaranteed to be assigned to the same batch slot for @@ all inference requests of that sequence. @@

Used in: ModelSequenceBatching

message ModelStatus

server_status.proto:310

@@ @@.. cpp:var:: message ModelStatus @@ @@ Status for a model. @@

Used in: ServerStatus

message ModelTensorReshape

model_config.proto:176

@@ @@.. cpp:var:: message ModelTensorReshape @@ @@ Reshape specification for input and output tensors. @@

Used in: ModelInput, ModelOutput

message ModelVersionPolicy

model_config.proto:338

@@ @@.. cpp:var:: message ModelVersionPolicy @@ @@ Policy indicating which versions of a model should be made @@ available by the inference server. @@

Used in: ModelConfig

message ModelVersionPolicy.All

model_config.proto:361

@@ .. cpp:var:: message All @@ @@ Serve all versions of the model. @@

Used in: ModelVersionPolicy

(message has no fields)

message ModelVersionPolicy.Latest

model_config.proto:345

@@ .. cpp:var:: message Latest @@ @@ Serve only the latest version(s) of a model. This is @@ the default policy. @@

Used in: ModelVersionPolicy

message ModelVersionPolicy.Specific

model_config.proto:367

@@ .. cpp:var:: message Specific @@ @@ Serve only specific versions of the model. @@

Used in: ModelVersionPolicy

message ModelVersionStatus

server_status.proto:256

@@ @@.. cpp:var:: message ModelVersionStatus @@ @@ Status for a version of a model. @@

Used in: ModelStatus

message ModelWarmup

model_config.proto:1052

@@ @@.. cpp:var:: message ModelWarmup @@ @@ Settings used to construct the request sample for model warmup. @@

Used in: ModelConfig

message ModelWarmup.Input

model_config.proto:1059

@@ @@ .. cpp:var:: message Input @@ @@ Meta data associated with an input. @@

Used in: ModelWarmup

message RepositoryIndexResponse.ModelIndex

grpc_service_v2.proto:934

@@ @@ .. cpp:var:: message ModelIndex @@ @@ Index entry for a model. @@

Used in: RepositoryIndexResponse

message RepositoryRequestStats

server_status.proto:124

@@ @@.. cpp:var:: message RepositoryRequestStats @@ @@ Statistics collected for Repository requests. @@

Used in: ServerStatus

message RequestStatus

request_status.proto:105

@@ @@.. cpp:var:: message RequestStatus @@ @@ Status returned for all inference server requests. The @@ RequestStatus provides a :cpp:enum:`RequestStatusCode`, an @@ optional status message, and server and request IDs. @@

Used in: HealthResponse, InferResponse, ModelControlResponse, RepositoryResponse, SharedMemoryControlResponse, StatusResponse

enum RequestStatusCode

request_status.proto:40

@@ @@.. cpp:enum:: RequestStatusCode @@ @@ Status codes returned for inference server requests. The @@ :cpp:enumerator:`RequestStatusCode::SUCCESS` status code indicates @@ not error, all other codes indicate an error. @@

Used in: RequestStatus

enum ServerReadyState

server_status.proto:334

@@ @@.. cpp:enum:: ServerReadyState @@ @@ Readiness status for the inference server. @@

Used in: ServerStatus

message ServerStatus

server_status.proto:442

@@ @@.. cpp:var:: message ServerStatus @@ @@ Status for the inference server. @@

Used in: StatusResponse

message SharedMemoryControlRequest.Register

grpc_service.proto:242

@@ .. cpp:var:: message Register @@ @@ Register a shared memory region. @@

Used in: SharedMemoryControlRequest

message SharedMemoryControlRequest.Register.CUDASharedMemoryIdentifier

grpc_service.proto:279

@@ @@ .. cpp:var:: message CUDASharedMemoryIdentifier @@ @@ The identifier for this system shared memory region. @@

Used in: Register

message SharedMemoryControlRequest.Register.SystemSharedMemoryIdentifier

grpc_service.proto:256

@@ @@ .. cpp:var:: message SystemSharedMemoryIdentifier @@ @@ The identifier for this system shared memory region. @@

Used in: Register

message SharedMemoryControlRequest.Status

grpc_service.proto:346

@@ .. cpp:var:: message GetStatus @@ @@ Get the status of all active shared memory regions. @@

Used in: SharedMemoryControlRequest

(message has no fields)

message SharedMemoryControlRequest.Unregister

grpc_service.proto:326

@@ .. cpp:var:: message Unregister @@ @@ Unregister a specified shared memory region. @@

Used in: SharedMemoryControlRequest

message SharedMemoryControlRequest.UnregisterAll

grpc_service.proto:340

@@ .. cpp:var:: message UnregisterAll @@ @@ Unregister all shared memory regions. @@

Used in: SharedMemoryControlRequest

(message has no fields)

message SharedMemoryControlRequestStats

server_status.proto:108

@@ @@.. cpp:var:: message SharedMemoryControlRequestStats @@ @@ Statistics for SharedMemoryControl requests @@ @@ [DEPRECATED] The message has been deprecated and will @@ always report 0. @@

Used in: ServerStatus

message SharedMemoryControlResponse.Status

grpc_service.proto:392

@@ @@.. cpp:var:: message Status @@ @@ Status of all active shared memory regions. @@

Used in: SharedMemoryControlResponse

message SharedMemoryRegion

server_status.proto:372

@@.. cpp:var:: message SharedMemoryRegion @@ @@ The meta-data for the shared memory region registered in the inference @@ server. @@

Used in: SharedMemoryControlResponse.Status, SharedMemoryStatus

message SharedMemoryRegion.CudaSharedMemory

server_status.proto:399

Used in: SharedMemoryRegion

message SharedMemoryRegion.SystemSharedMemory

server_status.proto:381

Used in: SharedMemoryRegion

message SharedMemoryStatus

server_status.proto:511

@@ @@.. cpp:var:: message SharedMemoryStatus @@ @@ Shared memory status for the inference server. @@

message StatDuration

server_status.proto:40

@@ @@.. cpp:var:: message StatDuration @@ @@ Statistic collecting a duration metric. @@

Used in: HealthRequestStats, InferRequestStats, ModelControlRequestStats, RepositoryRequestStats, SharedMemoryControlRequestStats, StatusRequestStats

message StatisticDuration

grpc_service_v2.proto:827

@@ @@.. cpp:var:: message StatisticDuration @@ @@ Statistic recording a cumulative duration metric. @@

Used in: InferStatistics

message StatusRequestStats

server_status.proto:60

@@ @@.. cpp:var:: message StatusRequestStats @@ @@ Statistics collected for Status requests. @@

Used in: ServerStatus

message SystemSharedMemoryStatusResponse.RegionStatus

grpc_service_v2.proto:1036

@@ @@ .. cpp:var:: message RegionStatus @@ @@ Status for a shared memory region. @@

Used in: SystemSharedMemoryStatusResponse