package inference

Get desktop application:
View/edit binary Protocol Buffers messages

Inference Server GRPC endpoints.

rpc ServerLive (ServerLiveRequest, ServerLiveResponse)
dataplane.proto:14
Check liveness of the inference server.
message ServerLiveRequest
dataplane.proto:51
ServerLive messages.
(message has no fields)
message ServerLiveResponse
dataplane.proto:53
- bool live = 1
  True if the inference server is live, false if not live.
rpc ServerReady (ServerReadyRequest, ServerReadyResponse)
dataplane.proto:17
Check readiness of the inference server.
message ServerReadyRequest
dataplane.proto:62
ServerReady messages.
(message has no fields)
message ServerReadyResponse
dataplane.proto:64
- bool ready = 1
  True if the inference server is ready, false if not ready.
rpc ModelReady (ModelReadyRequest, ModelReadyResponse)
dataplane.proto:20
Check readiness of a model in the inference server.
message ModelReadyRequest
dataplane.proto:73
ModelReady messages.
- string name = 1
  The name of the model to check for readiness.
- string version = 2
  The version of the model to check for readiness. If not given the server will choose a version based on the model and internal policy.
message ModelReadyResponse
dataplane.proto:83
- bool ready = 1
  True if the model is ready, false if not ready.
rpc ServerMetadata (ServerMetadataRequest, ServerMetadataResponse)
dataplane.proto:23
Get server metadata.
message ServerMetadataRequest
dataplane.proto:92
ServerMetadata messages.
(message has no fields)
message ServerMetadataResponse
dataplane.proto:94
- string name = 1
  The server name.
- string version = 2
  The server version.
- repeated string extensions = 3
  The extensions supported by the server.
rpc ModelMetadata (ModelMetadataRequest, ModelMetadataResponse)
dataplane.proto:26
Get model metadata.
message ModelMetadataRequest
dataplane.proto:109
ModelMetadata messages.
- string name = 1
  The name of the model.
- string version = 2
  The version of the model to check for readiness. If not given the server will choose a version based on the model and internal policy.
message ModelMetadataResponse
dataplane.proto:119
- string name = 1
  The model name.
- repeated string versions = 2
  The versions of the model available on the server.
- string platform = 3
  The model's platform. See Platforms.
- repeated ModelMetadataResponse.TensorMetadata inputs = 4
  The model's inputs.
- repeated ModelMetadataResponse.TensorMetadata outputs = 5
  The model's outputs.
- map<string, InferParameter> parameters = 6
  Optional default parameters for the request / response. NOTE: This is an extension to the standard
rpc ModelInfer (ModelInferRequest, ModelInferResponse)
dataplane.proto:29
Perform inference using a specific model.
rpc ModelStreamInfer (stream ModelInferRequest, stream ModelInferResponse)
dataplane.proto:32
Perform stream inference using a specific model.
rpc RepositoryIndex (RepositoryIndexRequest, RepositoryIndexResponse)
dataplane.proto:35
Get the index of model repository contents.
message RepositoryIndexRequest
dataplane.proto:397
- string repository_name = 1
  The name of the repository. If empty the index is returned for all repositories.
- bool ready = 2
  If true return only models currently ready for inferencing.
message RepositoryIndexResponse
dataplane.proto:407
- repeated RepositoryIndexResponse.ModelIndex models = 1
  An index entry for each model.
rpc RepositoryModelLoad (RepositoryModelLoadRequest, RepositoryModelLoadResponse)
dataplane.proto:39
Load or reload a model from a repository.
message RepositoryModelLoadRequest
dataplane.proto:428
- string repository_name = 1
  The name of the repository to load from. If empty the model is loaded from any repository.
- string model_name = 2
  The name of the model to load, or reload.
- map<string, ModelRepositoryParameter> parameters = 3
  Optional model repository request parameters.
message RepositoryModelLoadResponse
dataplane.proto:441
(message has no fields)
rpc RepositoryModelUnload (RepositoryModelUnloadRequest, RepositoryModelUnloadResponse)
dataplane.proto:43
Unload a model.
message RepositoryModelUnloadRequest
dataplane.proto:445
- string repository_name = 1
  The name of the repository from which the model was originally loaded. If empty the repository is not considered.
- string model_name = 2
  The name of the model to unload.
- map<string, ModelRepositoryParameter> parameters = 3
  Optional model repository request parameters.
message RepositoryModelUnloadResponse
dataplane.proto:458
(message has no fields)

An inference parameter value.

Used in: ModelInferRequest, ModelInferRequest.InferInputTensor, ModelInferRequest.InferRequestedOutputTensor, ModelInferResponse, ModelInferResponse.InferOutputTensor, ModelMetadataResponse, ModelMetadataResponse.TensorMetadata

oneof parameter_choice
The parameter value can be a string, an int64, a boolean or a message specific to a predefined parameter.
- bool bool_param = 1
  A boolean parameter value.
- int64 int64_param = 2
  An int64 parameter value.
- string string_param = 3
  A string parameter value.

The data contained in a tensor. For a given data type the tensor contents can be represented in "raw" bytes form or in the repeated type that matches the tensor's data type. Protobuf oneof is not used because oneofs cannot contain repeated fields.

Used in: ModelInferRequest.InferInputTensor, ModelInferResponse.InferOutputTensor

repeated bool bool_contents = 1
Representation for BOOL data type. The size must match what is expected by the tensor's shape. The contents must be the flattened, one-dimensional, row-major order of the tensor elements.
repeated int32 int_contents = 2
Representation for INT8, INT16, and INT32 data types. The size must match what is expected by the tensor's shape. The contents must be the flattened, one-dimensional, row-major order of the tensor elements.
repeated int64 int64_contents = 3
Representation for INT64 data types. The size must match what is expected by the tensor's shape. The contents must be the flattened, one-dimensional, row-major order of the tensor elements.
repeated uint32 uint_contents = 4
Representation for UINT8, UINT16, and UINT32 data types. The size must match what is expected by the tensor's shape. The contents must be the flattened, one-dimensional, row-major order of the tensor elements.
repeated uint64 uint64_contents = 5
Representation for UINT64 data types. The size must match what is expected by the tensor's shape. The contents must be the flattened, one-dimensional, row-major order of the tensor elements.
repeated float fp32_contents = 6
Representation for FP32 data type. The size must match what is expected by the tensor's shape. The contents must be the flattened, one-dimensional, row-major order of the tensor elements.
repeated double fp64_contents = 7
Representation for FP64 data type. The size must match what is expected by the tensor's shape. The contents must be the flattened, one-dimensional, row-major order of the tensor elements.
repeated bytes bytes_contents = 8
Representation for BYTES data type. The size must match what is expected by the tensor's shape. The contents must be the flattened, one-dimensional, row-major order of the tensor elements.

ModelInfer messages.

Used as request type in: GRPCInferenceService.ModelInfer, GRPCInferenceService.ModelStreamInfer

string model_name = 1
The name of the model to use for inferencing.
string model_version = 2
The version of the model to use for inference. If not given the server will choose a version based on the model and internal policy.
string id = 3
Optional identifier for the request. If specified will be returned in the response.
map<string, InferParameter> parameters = 4
Optional inference parameters.
repeated ModelInferRequest.InferInputTensor inputs = 5
The input tensors for the inference.
repeated ModelInferRequest.InferRequestedOutputTensor outputs = 6
The requested output tensors for the inference. Optional, if not specified all outputs produced by the model will be returned.
repeated bytes raw_input_contents = 7
The data contained in an input tensor can be represented in "raw" bytes form or in the repeated type that matches the tensor's data type. Using the "raw" bytes form will typically allow higher performance due to the way protobuf allocation and reuse interacts with GRPC. For example, see https://github.com/grpc/grpc/issues/23231. To use the raw representation 'raw_input_contents' must be initialized with data for each tensor in the same order as 'inputs'. For each tensor, the size of this content must match what is expected by the tensor's shape and data type. The raw data must be the flattened, one-dimensional, row-major order of the tensor elements without any stride or padding between the elements. Note that the FP16 and BF16 data types must be represented as raw content as there is no specific data type for a 16-bit float type. If this field is specified then InferInputTensor::contents must not be specified for any input tensor.

An input tensor for an inference request.

Used in: ModelInferRequest

string name = 1
The tensor name.
string datatype = 2
The tensor data type.
repeated int64 shape = 3
The tensor shape.
map<string, InferParameter> parameters = 4
Optional inference input tensor parameters.
optional InferTensorContents contents = 5
The input tensor data. This field must not be specified if tensor contents are being specified in ModelInferRequest.raw_input_contents.

An output tensor requested for an inference request.

Used in: ModelInferRequest

string name = 1
The tensor name.
map<string, InferParameter> parameters = 2
Optional requested output tensor parameters.

Used as response type in: GRPCInferenceService.ModelInfer, GRPCInferenceService.ModelStreamInfer

string model_name = 1
The name of the model used for inference.
string model_version = 2
The version of the model used for inference.
string id = 3
The id of the inference request if one was specified.
map<string, InferParameter> parameters = 4
Optional inference response parameters.
repeated ModelInferResponse.InferOutputTensor outputs = 5
The output tensors holding inference results.
repeated bytes raw_output_contents = 6
The data contained in an output tensor can be represented in "raw" bytes form or in the repeated type that matches the tensor's data type. Using the "raw" bytes form will typically allow higher performance due to the way protobuf allocation and reuse interacts with GRPC. For example, see https://github.com/grpc/grpc/issues/23231. To use the raw representation 'raw_output_contents' must be initialized with data for each tensor in the same order as 'outputs'. For each tensor, the size of this content must match what is expected by the tensor's shape and data type. The raw data must be the flattened, one-dimensional, row-major order of the tensor elements without any stride or padding between the elements. Note that the FP16 and BF16 data types must be represented as raw content as there is no specific data type for a 16-bit float type. If this field is specified then InferOutputTensor::contents must not be specified for any output tensor.

An output tensor returned for an inference request.

Used in: ModelInferResponse

string name = 1
The tensor name.
string datatype = 2
The tensor data type.
repeated int64 shape = 3
The tensor shape.
map<string, InferParameter> parameters = 4
Optional output tensor parameters.
optional InferTensorContents contents = 5
The output tensor data. This field must not be specified if tensor contents are being specified in ModelInferResponse.raw_output_contents.

Metadata for a tensor.

Used in: ModelMetadataResponse

string name = 1
The tensor name.
string datatype = 2
The tensor data type.
repeated int64 shape = 3
The tensor shape. A variable-size dimension is represented by a -1 value.
map<string, InferParameter> parameters = 4
Optional default parameters for input. NOTE: This is an extension to the standard

An model repository parameter value.

Used in: RepositoryModelLoadRequest, RepositoryModelUnloadRequest

oneof parameter_choice
The parameter value can be a string, an int64 or a boolean
- bool bool_param = 1
  A boolean parameter value.
- int64 int64_param = 2
  An int64 parameter value.
- string string_param = 3
  A string parameter value.
- bytes bytes_param = 4
  A bytes parameter value.

Index entry for a model.

Used in: RepositoryIndexResponse

string name = 1
The name of the model.
string version = 2
The version of the model.
string state = 3
The state of the model.
string reason = 4
The reason, if any, that the model is in the given state.

package inference

service GRPCInferenceService

rpc ServerLive (ServerLiveRequest, ServerLiveResponse)

message ServerLiveRequest

message ServerLiveResponse

bool live = 1

rpc ServerReady (ServerReadyRequest, ServerReadyResponse)

message ServerReadyRequest

message ServerReadyResponse

bool ready = 1

rpc ModelReady (ModelReadyRequest, ModelReadyResponse)

message ModelReadyRequest

string name = 1

string version = 2

message ModelReadyResponse

bool ready = 1

rpc ServerMetadata (ServerMetadataRequest, ServerMetadataResponse)

message ServerMetadataRequest

message ServerMetadataResponse

string name = 1

string version = 2

repeated string extensions = 3

rpc ModelMetadata (ModelMetadataRequest, ModelMetadataResponse)

message ModelMetadataRequest

string name = 1

string version = 2

message ModelMetadataResponse

string name = 1

repeated string versions = 2

string platform = 3

repeated ModelMetadataResponse.TensorMetadata inputs = 4

repeated ModelMetadataResponse.TensorMetadata outputs = 5

map<string, InferParameter> parameters = 6

rpc ModelInfer (ModelInferRequest, ModelInferResponse)

rpc ModelStreamInfer (stream ModelInferRequest, stream ModelInferResponse)

rpc RepositoryIndex (RepositoryIndexRequest, RepositoryIndexResponse)

message RepositoryIndexRequest

string repository_name = 1

bool ready = 2

message RepositoryIndexResponse

repeated RepositoryIndexResponse.ModelIndex models = 1

rpc RepositoryModelLoad (RepositoryModelLoadRequest, RepositoryModelLoadResponse)

message RepositoryModelLoadRequest

string repository_name = 1

string model_name = 2

map<string, ModelRepositoryParameter> parameters = 3

message RepositoryModelLoadResponse

rpc RepositoryModelUnload (RepositoryModelUnloadRequest, RepositoryModelUnloadResponse)

message RepositoryModelUnloadRequest

string repository_name = 1

string model_name = 2

map<string, ModelRepositoryParameter> parameters = 3

message RepositoryModelUnloadResponse

message InferParameter

oneof parameter_choice

bool bool_param = 1

int64 int64_param = 2

string string_param = 3

message InferTensorContents

repeated bool bool_contents = 1

repeated int32 int_contents = 2

repeated int64 int64_contents = 3

repeated uint32 uint_contents = 4

repeated uint64 uint64_contents = 5

repeated float fp32_contents = 6

repeated double fp64_contents = 7

repeated bytes bytes_contents = 8

message ModelInferRequest

string model_name = 1

string model_version = 2

string id = 3

map<string, InferParameter> parameters = 4

repeated ModelInferRequest.InferInputTensor inputs = 5

repeated ModelInferRequest.InferRequestedOutputTensor outputs = 6

repeated bytes raw_input_contents = 7

message ModelInferRequest.InferInputTensor

string name = 1

string datatype = 2

repeated int64 shape = 3

map<string, InferParameter> parameters = 4