package mmesh

Get desktop application:
View/edit binary Protocol Buffers messages

this is a grpc version of the external model-mesh interface for managing and serving models

rpc registerModel (RegisterModelRequest, ModelStatusInfo)
model-mesh-external.proto:33
Registers a trained model to this model-mesh cluster
message RegisterModelRequest
model-mesh-external.proto:65
- string modelId = 1
- optional ModelInfo modelInfo = 2
- bool loadNow = 3
  whether the model should be loaded immediately
- bool sync = 4
  if loadNow is true, whether this method should block until the load completes
- uint64 lastUsedTime = 5
  OPTIONAL, ADVANCED - lastUsed timestamp to assign to newly registered model, for initial priority in cache. This should not typically be set (defaults to "recent")
rpc unregisterModel (UnregisterModelRequest, UnregisterModelResponse)
model-mesh-external.proto:36
Unregisters (deletes) a model from this model-mesh cluster, has no effect if the specified model isn't found
message UnregisterModelRequest
model-mesh-external.proto:124
- string modelId = 1
message UnregisterModelResponse
model-mesh-external.proto:128
(message has no fields)
rpc getModelStatus (GetStatusRequest, ModelStatusInfo)
model-mesh-external.proto:39
Returns the status of the specified model. See the ModelStatus enum
message GetStatusRequest
model-mesh-external.proto:130
- string modelId = 1
rpc ensureLoaded (EnsureLoadedRequest, ModelStatusInfo)
model-mesh-external.proto:41
Ensures the model with the specified id is loaded in this model-mesh cluster
message EnsureLoadedRequest
model-mesh-external.proto:134
- string modelId = 1
- uint64 lastUsedTime = 2
  timestamp to use when touching the model, 0 for "now" (default)
- bool sync = 4
  whether to block until specified model completes loading
rpc setVModel (SetVModelRequest, VModelStatusInfo)
model-mesh-external.proto:49
Creates a new vmodel id (alias) which maps to a new or existing concrete model, or sets the target model for an existing vmodel to a new or existing concrete model
message SetVModelRequest
model-mesh-external.proto:190
- string vModelId = 1
- string owner = 10
  if set and the vmodel does not already exist, it will be created with this owner. if set and the vmodel already exists, the existing vmodel's owner must match or else the call will fail with an ALREADY_EXISTS error
- string targetModelId = 2
- bool updateOnly = 3
  if true, the request will fail with NOT_FOUND if the vmodel does not already exist; if false, non-existent vmodel ids will be created
- optional ModelInfo modelInfo = 4
  optional ModelInfo for target model - if provided then target model will be created, otherwise it's expected to already exist
- bool autoDeleteTargetModel = 5
  whether the newly created target model should be automatically deleted once no longer referenced by any vmodel(s); applies only if modelInfo is provided
- bool loadNow = 6
  whether the new target model should be loaded immediately, even if the current active model isn't loaded (otherwise the target model will be loaded to the same scale as the current active model before it becomes the active model)
- bool force = 7
  if true, the active model will be updated immediately, regardless of the relative states of the target and currently-active models
- bool sync = 8
  whether this method should block until the transition completes. if the vmodel didn't already exist and loadNow is set to true, this will cause the method to block until the target of the newly created vmodel has completed loading
- string expectedTargetModelId = 9
  if provided, the request will only succeed (atomically) if the value matches the vmodel's current targetModelId. If the provided value is equal to the targetModelId in this same request message, the request will succeed only if the vmodel doesn't already exist *or* exists with the same targetModelId (in the latter case having no effect)
rpc deleteVModel (DeleteVModelRequest, DeleteVModelResponse)
model-mesh-external.proto:53
Deletes a vmodel, optionally deleting any referenced concrete models at the same time
message DeleteVModelRequest
model-mesh-external.proto:181
- string vModelId = 1
- string owner = 2
  if provided the specified vmodel will be deleted only if its owner matches
message DeleteVModelResponse
model-mesh-external.proto:188
(message has no fields)
rpc getVModelStatus (GetVModelStatusRequest, VModelStatusInfo)
model-mesh-external.proto:58
Gets the status of a vmodel, including associated target/active model ids If the vmodel is not found, the returned VModelStatusInfo will have empty active and target model ids and an active model status of NOT_FOUND
message GetVModelStatusRequest
model-mesh-external.proto:228
- string vModelId = 1
- string owner = 2
  if provided the specified vmodel must have matching owner or else the returned response will indicate not found

this is the internal "sidecar" API for interfacing with a colocated model runtime container

rpc loadModel (LoadModelRequest, LoadModelResponse)
model-runtime.proto:39
Load a model, return when model is fully loaded. Include size of loaded model in response if no additional cost. A gRPC error code of PRECONDITION_FAILED or INVALID_ARGUMENT should be returned if no attempt to load the model was made (so can be sure that no space remains used). Note that the RPC may be cancelled by model-mesh prior to completion, after which an unloadModel call will immediately be sent for the same model. To avoid state inconsistency and "leaking" memory, implementors should ensure that this case is properly handled, i.e. that the model doesn't remain loaded after returning successfully from this unloadModel call.
message LoadModelRequest
model-runtime.proto:69
- string modelId = 1
- string modelType = 2
- string modelPath = 3
- string modelKey = 4
message LoadModelResponse
model-runtime.proto:77
- uint64 sizeInBytes = 1
  OPTIONAL - If nontrivial cost is involved in determining the size, return 0 here and do the sizing in the modelSize function
- uint32 maxConcurrency = 2
  EXPERIMENTAL - Applies only if limitModelConcurrency = true was returned from runtimeStatus rpc. See RuntimeStatusResponse.limitModelConcurrency for more detail
rpc unloadModel (UnloadModelRequest, UnloadModelResponse)
model-runtime.proto:43
Unload a previously loaded (or failed) model. Return when model is fully unloaded, or immediately if not found/loaded.
message UnloadModelRequest
model-runtime.proto:89
- string modelId = 1
message UnloadModelResponse
model-runtime.proto:93
(message has no fields)
rpc predictModelSize (PredictModelSizeRequest, PredictModelSizeResponse)
model-runtime.proto:49
Predict size of not-yet-loaded model - must return almost immediately. Should not perform expensive computation or remote lookups. Should be a conservative estimate. NOTE: Implementation of this RPC is optional.
message PredictModelSizeRequest
model-runtime.proto:95
- string modelId = 1
- string modelType = 2
- string modelPath = 3
- string modelKey = 4
message PredictModelSizeResponse
model-runtime.proto:103
- uint64 sizeInBytes = 1
rpc modelSize (ModelSizeRequest, ModelSizeResponse)
model-runtime.proto:56
Calculate size (memory consumption) of currently-loaded model. NOTE: Implementation of this RPC is only required if models' size is not returned in the response to loadModel. If the size computation takes a nontrivial amount of time, it's better to return from loadModel immediately and implement this to perform the sizing separately.
message ModelSizeRequest
model-runtime.proto:107
- string modelId = 1
message ModelSizeResponse
model-runtime.proto:111
- uint64 sizeInBytes = 1
rpc runtimeStatus (RuntimeStatusRequest, RuntimeStatusResponse)
model-runtime.proto:65
Provide basic runtime status and parameters; called only during startup. Before returning a READY status, implementations should check for and purge any/all currently-loaded models. Since this is only called during startup, there should very rarely be any, but if there are it implies the model-mesh container restarted unexpectedly and such a purge must be done to ensure continued consistency of state and avoid over-committing resources.
message RuntimeStatusRequest
model-runtime.proto:115
(message has no fields)
message RuntimeStatusResponse
model-runtime.proto:118
- RuntimeStatusResponse.Status status = 1
- uint64 capacityInBytes = 2
  memory capacity for static loaded models, in bytes
- uint32 maxLoadingConcurrency = 3
  maximum number of model loads that can be in-flight at the same time
- uint32 modelLoadingTimeoutMs = 4
  timeout for model loads in milliseconds
- uint64 defaultModelSizeInBytes = 5
  conservative "default" model size, such that "most" models are smaller than this
- string runtimeVersion = 6
  version string for this model server code
- uint64 numericRuntimeVersion = 7
  DEPRECATED - the value of this field is not used, it will be removed in a future update
- map<string, RuntimeStatusResponse.MethodInfo> methodInfos = 8
  Map containing information about specific inferencing gRPC methods exposed by this runtime, such as a path within the protobuf message indicating where the model id should be injected. If non-empty, and allowAnyMethod is not set to true, only RPCs of inference methods contained in this map will be forwarded to the runtime (acts as an allow-list). The method name keys in the map must be fully qualified, including the service name, i.e. "package.ServiceName/MethodName"
- bool limitModelConcurrency = 9
  EXPERIMENTAL - Set to true to enable the mode where each loaded model reports a maximum inferencing concurrency via the maxConcurrency field of the LoadModelResponse message. Additional requests are queued in the modelmesh framework. Turning this on will also enable latency-based autoscaling for the models, which attempts to minimize request queueing time and requires no other configuration/tuning.
- bool allowAnyMethod = 10
  If true, any/all RPCs will be forwarded to the runtime irrespective of the service/method name. Otherwise, only those present in the methodInfos map will be permitted. NOTE that this will default to being effectively true if the methodInfos map is empty.

Parameters holding information necessary to locate and load a given model, optional and for use only by your model runtime logic - they are passed to the model runtime loadModel api each time the model is loaded. These should *not* be use to store large amounts of data - the size of the strings should be as small as possible.

Used in: RegisterModelRequest, SetVModelRequest

string type = 1
arbitrary model metadata parameter, must be non-empty
string path = 2
arbitrary model metadata parameter
string key = 3
arbitrary model metadata parameter

Used as response type in: ModelMesh.ensureLoaded, ModelMesh.getModelStatus, ModelMesh.registerModel

Used as field type in: VModelStatusInfo

ModelStatusInfo.ModelStatus status = 1
repeated string errors = 2
repeated ModelStatusInfo.ModelCopyInfo modelCopyInfos = 3
Internal state of individual copies of this model - intended for debugging/advanced uses only. The top-level model status field should be sufficient for most cases. Arranged in reverse chronological order.

Used in: ModelStatusInfo

string location = 1
id of instance in which the model copy resides
ModelStatus copyStatus = 2
status of this copy, one of LOADING, LOADED, LOADING_FAILED, UNKNOWN
uint64 time = 3
time of latest state change

Used in: ModelStatusInfo, ModelCopyInfo

NOT_FOUND = 0
model is not registered with the cluster
NOT_LOADED = 1
model is registered but not currently loaded anywhere
LOADING = 2
model is in the process of loading somewhere (and otherwise not loaded)
LOADED = 3
model is loaded in at least one cluster instance
LOADING_FAILED = 4
model loading failed; will be retried periodically
UNKNOWN = 5

Used in: RuntimeStatusResponse

repeated uint32 idInjectionPath = 1
Optional path of protobuf field numbers, pointing to a string field within the RPC's request message that should be replaced with the model id for which the request applies to. All but the last field in the list must be of "embedded message" type, the last one must be of string type.

Used in: RuntimeStatusResponse

STARTING = 0
READY = 1
FAILING = 2
not used yet

Used as response type in: ModelMesh.getVModelStatus, ModelMesh.setVModel

VModelStatusInfo.VModelStatus status = 1
string activeModelId = 2
id of underlying model to which apply/prediction requests sent to this vmodel will be routed
string targetModelId = 3
if targetModelId is not equal to activeModelId then the vmodel is in a transitional state (waiting for the target model to be in an appropriate state before it's promoted to be the active model)
optional ModelStatusInfo activeModelStatus = 4
status of the currently active model
optional ModelStatusInfo targetModelStatus = 5
status of the target model, set only if targetModelId != activeModelId
string owner = 6
the owner of this vmodel, if any

Used in: VModelStatusInfo

NOT_FOUND = 0
vmodel is not registered with the cluster
DEFINED = 1
vmodel is registered and in a steady-state (activeModelId == targetModelId)
TRANSITIONING = 2
vmodel is waiting for a new target model to be ready before transitioning to it (activeModelId != targetModelId)
TRANSITION_FAILED = 3
the target model failed to load and so the transition is blocked; will be retried periodically so *may* automatically recover from this state
UNKNOWN = 5

package mmesh

service ModelMesh

rpc registerModel (RegisterModelRequest, ModelStatusInfo)

message RegisterModelRequest

string modelId = 1

optional ModelInfo modelInfo = 2

bool loadNow = 3

bool sync = 4

uint64 lastUsedTime = 5

rpc unregisterModel (UnregisterModelRequest, UnregisterModelResponse)

message UnregisterModelRequest

string modelId = 1

message UnregisterModelResponse

rpc getModelStatus (GetStatusRequest, ModelStatusInfo)

message GetStatusRequest

string modelId = 1

rpc ensureLoaded (EnsureLoadedRequest, ModelStatusInfo)

message EnsureLoadedRequest

string modelId = 1

uint64 lastUsedTime = 2

bool sync = 4

rpc setVModel (SetVModelRequest, VModelStatusInfo)

message SetVModelRequest

string vModelId = 1

string owner = 10

string targetModelId = 2

bool updateOnly = 3

optional ModelInfo modelInfo = 4

bool autoDeleteTargetModel = 5

bool loadNow = 6

bool force = 7

bool sync = 8

string expectedTargetModelId = 9

rpc deleteVModel (DeleteVModelRequest, DeleteVModelResponse)

message DeleteVModelRequest

string vModelId = 1

string owner = 2

message DeleteVModelResponse

rpc getVModelStatus (GetVModelStatusRequest, VModelStatusInfo)

message GetVModelStatusRequest

string vModelId = 1

string owner = 2

service ModelRuntime

rpc loadModel (LoadModelRequest, LoadModelResponse)

message LoadModelRequest

string modelId = 1

string modelType = 2

string modelPath = 3

string modelKey = 4

message LoadModelResponse

uint64 sizeInBytes = 1

uint32 maxConcurrency = 2

rpc unloadModel (UnloadModelRequest, UnloadModelResponse)

message UnloadModelRequest

string modelId = 1

message UnloadModelResponse

rpc predictModelSize (PredictModelSizeRequest, PredictModelSizeResponse)

message PredictModelSizeRequest

string modelId = 1

string modelType = 2

string modelPath = 3

string modelKey = 4

message PredictModelSizeResponse

uint64 sizeInBytes = 1

rpc modelSize (ModelSizeRequest, ModelSizeResponse)

message ModelSizeRequest

string modelId = 1

message ModelSizeResponse

uint64 sizeInBytes = 1

rpc runtimeStatus (RuntimeStatusRequest, RuntimeStatusResponse)

message RuntimeStatusRequest

message RuntimeStatusResponse

RuntimeStatusResponse.Status status = 1

uint64 capacityInBytes = 2

uint32 maxLoadingConcurrency = 3

uint32 modelLoadingTimeoutMs = 4

uint64 defaultModelSizeInBytes = 5

string runtimeVersion = 6

uint64 numericRuntimeVersion = 7

map<string, RuntimeStatusResponse.MethodInfo> methodInfos = 8