package inference

Get desktop application:
View/edit binary Protocol Buffers messages

Inference Server GRPC endpoints.

rpc ServerLive (ServerLiveRequest, ServerLiveResponse)
kfs_inference_v2.proto:10
The ServerLive API indicates if the inference server is able to receive and respond to metadata and inference requests.
message ServerLiveRequest
kfs_inference_v2.proto:34
(message has no fields)
message ServerLiveResponse
kfs_inference_v2.proto:36
- bool live = 1
  True if the inference server is live, false if not live.
rpc ServerReady (ServerReadyRequest, ServerReadyResponse)
kfs_inference_v2.proto:13
The ServerReady API indicates if the server is ready for inferencing.
message ServerReadyRequest
kfs_inference_v2.proto:42
(message has no fields)
message ServerReadyResponse
kfs_inference_v2.proto:44
- bool ready = 1
  True if the inference server is ready, false if not ready.
rpc ModelReady (ModelReadyRequest, ModelReadyResponse)
kfs_inference_v2.proto:16
The ModelReady API indicates if a specific model is ready for inferencing.
message ModelReadyRequest
kfs_inference_v2.proto:50
- string name = 1
  The name of the model to check for readiness.
- string version = 2
  The version of the model to check for readiness. If not given the server will choose a version based on the model and internal policy.
message ModelReadyResponse
kfs_inference_v2.proto:60
- bool ready = 1
  True if the model is ready, false if not ready.
rpc ServerMetadata (ServerMetadataRequest, ServerMetadataResponse)
kfs_inference_v2.proto:21
The ServerMetadata API provides information about the server. Errors are indicated by the google.rpc.Status returned for the request. The OK code indicates success and other codes indicate failure.
message ServerMetadataRequest
kfs_inference_v2.proto:66
(message has no fields)
message ServerMetadataResponse
kfs_inference_v2.proto:68
- string name = 1
  The server name.
- string version = 2
  The server version.
- repeated string extensions = 3
  The extensions supported by the server.
rpc ModelMetadata (ModelMetadataRequest, ModelMetadataResponse)
kfs_inference_v2.proto:26
The per-model metadata API provides information about a model. Errors are indicated by the google.rpc.Status returned for the request. The OK code indicates success and other codes indicate failure.
message ModelMetadataRequest
kfs_inference_v2.proto:80
- string name = 1
  The name of the model.
- string version = 2
  The version of the model to check for readiness. If not given the server will choose a version based on the model and internal policy.
message ModelMetadataResponse
kfs_inference_v2.proto:90
- string name = 1
  The model name.
- repeated string versions = 2
  The versions of the model available on the server.
- string platform = 3
  The model's platform. See Platforms.
- repeated ModelMetadataResponse.TensorMetadata inputs = 4
  The model's inputs.
- repeated ModelMetadataResponse.TensorMetadata outputs = 5
  The model's outputs.
rpc ModelInfer (ModelInferRequest, ModelInferResponse)
kfs_inference_v2.proto:31
The ModelInfer API performs inference using the specified model. Errors are indicated by the google.rpc.Status returned for the request. The OK code indicates success and other codes indicate failure.
message ModelInferRequest
kfs_inference_v2.proto:122
- string model_name = 1
  The name of the model to use for inferencing.
- string model_version = 2
  The version of the model to use for inference. If not given the server will choose a version based on the model and internal policy.
- string id = 3
  Optional identifier for the request. If specified will be returned in the response.
- map<string, InferParameter> parameters = 4
  Optional inference parameters.
- repeated ModelInferRequest.InferInputTensor inputs = 5
  The input tensors for the inference.
- repeated ModelInferRequest.InferRequestedOutputTensor outputs = 6
  The requested output tensors for the inference. Optional, if not specified all outputs produced by the model will be returned.
- repeated bytes raw_input_contents = 7
  The data contained in an input tensor can be represented in "raw" bytes form or in the repeated type that matches the tensor's data type. To use the raw representation 'raw_input_contents' must be initialized with data for each tensor in the same order as 'inputs'. For each tensor, the size of this content must match what is expected by the tensor's shape and data type. The raw data must be the flattened, one-dimensional, row-major order of the tensor elements without any stride or padding between the elements. Note that the FP16 data type must be represented as raw content as there is no specific data type for a 16-bit float type. If this field is specified then InferInputTensor::contents must not be specified for any input tensor.
message ModelInferResponse
kfs_inference_v2.proto:193
- string model_name = 1
  The name of the model used for inference.
- string model_version = 2
  The version of the model used for inference.
- string id = 3
  The id of the inference request if one was specified.
- map<string, InferParameter> parameters = 4
  Optional inference response parameters.
- repeated ModelInferResponse.InferOutputTensor outputs = 5
  The output tensors holding inference results.
- repeated bytes raw_output_contents = 6
  The data contained in an output tensor can be represented in "raw" bytes form or in the repeated type that matches the tensor's data type. To use the raw representation 'raw_output_contents' must be initialized with data for each tensor in the same order as 'outputs'. For each tensor, the size of this content must match what is expected by the tensor's shape and data type. The raw data must be the flattened, one-dimensional, row-major order of the tensor elements without any stride or padding between the elements. Note that the FP16 data type must be represented as raw content as there is no specific data type for a 16-bit float type. If this field is specified then InferOutputTensor::contents must not be specified for any output tensor.

An inference parameter value. The Parameters message describes a “name”/”value” pair, where the “name” is the name of the parameter and the “value” is a boolean, integer, or string corresponding to the parameter.

Used in: ModelInferRequest, ModelInferRequest.InferInputTensor, ModelInferRequest.InferRequestedOutputTensor, ModelInferResponse, ModelInferResponse.InferOutputTensor

oneof parameter_choice
The parameter value can be a string, an int64, a boolean or a message specific to a predefined parameter.
- bool bool_param = 1
  A boolean parameter value.
- int64 int64_param = 2
  An int64 parameter value.
- string string_param = 3
  A string parameter value.

The data contained in a tensor represented by the repeated type that matches the tensor's data type. Protobuf oneof is not used because oneofs cannot contain repeated fields.

Used in: ModelInferRequest.InferInputTensor, ModelInferResponse.InferOutputTensor

repeated bool bool_contents = 1
Representation for BOOL data type. The size must match what is expected by the tensor's shape. The contents must be the flattened, one-dimensional, row-major order of the tensor elements.
repeated int32 int_contents = 2
Representation for INT8, INT16, and INT32 data types. The size must match what is expected by the tensor's shape. The contents must be the flattened, one-dimensional, row-major order of the tensor elements.
repeated int64 int64_contents = 3
Representation for INT64 data types. The size must match what is expected by the tensor's shape. The contents must be the flattened, one-dimensional, row-major order of the tensor elements.
repeated uint32 uint_contents = 4
Representation for UINT8, UINT16, and UINT32 data types. The size must match what is expected by the tensor's shape. The contents must be the flattened, one-dimensional, row-major order of the tensor elements.
repeated uint64 uint64_contents = 5
Representation for UINT64 data types. The size must match what is expected by the tensor's shape. The contents must be the flattened, one-dimensional, row-major order of the tensor elements.
repeated float fp32_contents = 6
Representation for FP32 data type. The size must match what is expected by the tensor's shape. The contents must be the flattened, one-dimensional, row-major order of the tensor elements.
repeated double fp64_contents = 7
Representation for FP64 data type. The size must match what is expected by the tensor's shape. The contents must be the flattened, one-dimensional, row-major order of the tensor elements.
repeated bytes bytes_contents = 8
Representation for BYTES data type. The size must match what is expected by the tensor's shape. The contents must be the flattened, one-dimensional, row-major order of the tensor elements.

An input tensor for an inference request.

Used in: ModelInferRequest

string name = 1
The tensor name.
string datatype = 2
The tensor data type.
repeated int64 shape = 3
The tensor shape.
map<string, InferParameter> parameters = 4
Optional inference input tensor parameters.
optional InferTensorContents contents = 5
The tensor contents using a data-type format. This field must not be specified if "raw" tensor contents are being used for the inference request.

An output tensor requested for an inference request.

Used in: ModelInferRequest

string name = 1
The tensor name.
map<string, InferParameter> parameters = 2
Optional requested output tensor parameters.

An output tensor returned for an inference request.

Used in: ModelInferResponse

string name = 1
The tensor name.
string datatype = 2
The tensor data type.
repeated int64 shape = 3
The tensor shape.
map<string, InferParameter> parameters = 4
Optional output tensor parameters.
optional InferTensorContents contents = 5
The tensor contents using a data-type format. This field must not be specified if "raw" tensor contents are being used for the inference response.

Metadata for a tensor.

Used in: ModelMetadataResponse

string name = 1
The tensor name.
string datatype = 2
The tensor data type.
repeated int64 shape = 3
The tensor shape. A variable-size dimension is represented by a -1 value.

package inference

service GRPCInferenceService

rpc ServerLive (ServerLiveRequest, ServerLiveResponse)

message ServerLiveRequest

message ServerLiveResponse

bool live = 1

rpc ServerReady (ServerReadyRequest, ServerReadyResponse)

message ServerReadyRequest

message ServerReadyResponse

bool ready = 1

rpc ModelReady (ModelReadyRequest, ModelReadyResponse)

message ModelReadyRequest

string name = 1

string version = 2

message ModelReadyResponse

bool ready = 1

rpc ServerMetadata (ServerMetadataRequest, ServerMetadataResponse)

message ServerMetadataRequest

message ServerMetadataResponse

string name = 1

string version = 2

repeated string extensions = 3

rpc ModelMetadata (ModelMetadataRequest, ModelMetadataResponse)

message ModelMetadataRequest

string name = 1

string version = 2

message ModelMetadataResponse

string name = 1

repeated string versions = 2

string platform = 3

repeated ModelMetadataResponse.TensorMetadata inputs = 4

repeated ModelMetadataResponse.TensorMetadata outputs = 5

rpc ModelInfer (ModelInferRequest, ModelInferResponse)

message ModelInferRequest

string model_name = 1

string model_version = 2

string id = 3

map<string, InferParameter> parameters = 4

repeated ModelInferRequest.InferInputTensor inputs = 5

repeated ModelInferRequest.InferRequestedOutputTensor outputs = 6

repeated bytes raw_input_contents = 7

message ModelInferResponse

string model_name = 1

string model_version = 2

string id = 3

map<string, InferParameter> parameters = 4

repeated ModelInferResponse.InferOutputTensor outputs = 5

repeated bytes raw_output_contents = 6

message InferParameter

oneof parameter_choice

bool bool_param = 1

int64 int64_param = 2

string string_param = 3

message InferTensorContents

repeated bool bool_contents = 1

repeated int32 int_contents = 2

repeated int64 int64_contents = 3

repeated uint32 uint_contents = 4

repeated uint64 uint64_contents = 5

repeated float fp32_contents = 6

repeated double fp64_contents = 7

repeated bytes bytes_contents = 8

message ModelInferRequest.InferInputTensor

string name = 1

string datatype = 2

repeated int64 shape = 3

map<string, InferParameter> parameters = 4

optional InferTensorContents contents = 5

message ModelInferRequest.InferRequestedOutputTensor

string name = 1

map<string, InferParameter> parameters = 2

message ModelInferResponse.InferOutputTensor

string name = 1

string datatype = 2

repeated int64 shape = 3

map<string, InferParameter> parameters = 4

optional InferTensorContents contents = 5

message ModelMetadataResponse.TensorMetadata

string name = 1

string datatype = 2