package sax

Get desktop application:
View/edit binary Protocol Buffers messages

////////////////////////////// Called by clients. //////////////////////////////

rpc Publish (PublishRequest, PublishResponse)
admin.proto:312
Starts serving a model on N model servers.
message PublishRequest
admin.proto:201
- optional Model model = 2
message PublishResponse
admin.proto:205
(message has no fields)
rpc Update (UpdateRequest, UpdateResponse)
admin.proto:315
Updates a published model.
message UpdateRequest
admin.proto:213
- optional Model model = 1
message UpdateResponse
admin.proto:217
(message has no fields)
rpc Unpublish (UnpublishRequest, UnpublishResponse)
admin.proto:318
Stops serving a model.
message UnpublishRequest
admin.proto:207
- string model_id = 1
message UnpublishResponse
admin.proto:211
(message has no fields)
rpc List (ListRequest, ListResponse)
admin.proto:321
Lists actively serving models.
message ListRequest
admin.proto:219
- string model_id = 1
  If empty, lists all actively serving models in the system.
message ListResponse
admin.proto:224
- repeated PublishedModel published_models = 1
rpc Stats (StatsRequest, StatsResponse)
admin.proto:324
Gets stats of a cell.
message StatsRequest
admin.proto:228
- string model_id = 1
  If empty, returns stats for all actively serving models in the system.
message StatsResponse
admin.proto:233
- repeated ModelServerTypeStat model_server_type_stats = 1
- map<string, int32> num_servers_by_servable_model_path = 2
  This counts the maximum number of servers that can be used to serve a servable model path. Clients can use this field to calculate their expected serving capacity.
rpc WatchLoc (WatchLocRequest, WatchLocResponse)
admin.proto:327
Watches for changes of model server address(es) for a given model.
message WatchLocRequest
admin.proto:242
- string model_id = 1
  An ID to identify the model. Must be globally unique, e.g., /sax/bar/lm_cloud_spmd_1024b
- string admin_server_id = 3
  Identifier of the admin server incarnation. If does not match the current admin server's id, the server will send back a full set and the current admin server's id.
- int32 seqno = 2
  The client has synchronized its local state about addresses of servers serving this model right before 'seqno'.
message WatchLocResponse
admin.proto:277
- string admin_server_id = 2
- optional WatchResult result = 1
rpc WaitForReady (WaitForReadyRequest, WaitForReadyResponse)
admin.proto:330
Waits for a certain number of replicas to be ready for a given model.
message WaitForReadyRequest
admin.proto:282
- string model_id = 1
- int32 num_replicas = 2
message WaitForReadyResponse
admin.proto:287
(message has no fields)
rpc Join (JoinRequest, JoinResponse)
admin.proto:338
Periodically called by a model server to tell the admin server it has come/is online. The admin server keeps track of healthy model servers.
message JoinRequest
admin.proto:289
- string address = 1
  The network address and port identifying a model server, e.g., [1::2]:8888 An RPC server listens at this address for ModeletService.{Load, Unload, Status}, etc.
- string debug_address = 3
  If non-empty, the server has a status http server at this address for diagnosis purpose. Otherwise, uses 'address'.
- string data_address = 4
  Client connects to server at this address if non-empty. Otherwise, uses 'address'.
- optional ModelServer model_server = 2
message JoinResponse
admin.proto:304
(message has no fields)

rpc Load (LoadRequest, LoadResponse)
modelet.proto:211
Loads a model onto the model server.
message LoadRequest
modelet.proto:24
TODO(yuanzx): Add a way to override static model parameters.
- string model_key = 1
  Key identifying the model to load.
- string model_path = 2
  Path of the model in Sax's model registry linked in the server binary. This is the name used to locate a model in Sax, e.g., lingvo.lm.lm_cloud.LmCloudSpmd1024B
- string checkpoint_path = 3
  Path to checkpoint, e.g., gs://model/path/checkpoints/checkpoint_00050000
- optional AccessControlLists acls = 4
  ACLs protecting data methods supported by this model.
- map<string, string> overrides = 5
  model config overrides, e.g. BATCH_SIZE: 1
message LoadResponse
modelet.proto:45
(message has no fields)
rpc UpdateLoaded (UpdateLoadedRequest, UpdateLoadedResponse)
modelet.proto:214
Updates a model loaded on the model server.
message UpdateLoadedRequest
modelet.proto:47
- string model_key = 1
  Key identifying the model to load.
- optional AccessControlLists acls = 2
  ACLs protecting data methods supported by this model.
- string checkpoint_path = 3
  Checkpoint path.
message UpdateLoadedResponse
modelet.proto:58
(message has no fields)
rpc Unload (UnloadRequest, UnloadResponse)
modelet.proto:217
Unloads a model from the model server.
message UnloadRequest
modelet.proto:60
- string model_key = 1
message UnloadResponse
modelet.proto:64
(message has no fields)
rpc Export (ExportRequest, ExportResponse)
modelet.proto:220
Exports a method of a model.
message ExportRequest
modelet.proto:66
- string model_key = 1
  Key identifying the model to export.
- repeated string method_names = 6
  The names of the method to export.
- repeated string signatures = 7
  The Signatures of the exported methods. If unspecified, default to `serving_default` and only works when having one method_name specified. If exporting multiple method_names, the signatures need to be a list that corresponds to method names. e.g., if we export with `method_names : [Generate, GenerateStream]`, the signatures here need to be: ['signature_for_generate', 'signature_for_generate_stream'].
- string export_path = 3
  Path in which to save the exported model.
- ExportRequest.SerializedModelFormat serialized_model_format = 4
  The format of the serialized model.
- ExportRequest.RngSeedMode rng_seed_mode = 5
  The RNG seed mode.
- bool enable_gpu_multi_device_execution = 8
  If true, enable the multi-device execution type for GPU.
message ExportResponse
modelet.proto:118
(message has no fields)
rpc GetStatus (GetStatusRequest, GetStatusResponse)
modelet.proto:223
Reports server status such as models loaded.
message GetStatusRequest
modelet.proto:131
- bool include_failure_reasons = 1
- bool include_method_stats = 2
message GetStatusResponse
modelet.proto:139
TODO(jiawenhao): Add MemoryStats and LoadStats. MemoryStats: Per-device/total used, free, etc. LoadStats: Per-model/method RPCs minute/hour/total.
- repeated GetStatusResponse.ModelWithStatus models = 1
- optional GetStatusResponse.ServerStatus server_status = 2
rpc Save (SaveRequest, SaveResponse)
modelet.proto:226
Saves checkpoint of a model.
message SaveRequest
modelet.proto:120
- string model_key = 1
  Key identifying the model to save.
- string checkpoint_path = 2
  Path to checkpoint, e.g., gs://model/path/checkpoints/checkpoint_00050000
message SaveResponse
modelet.proto:129
(message has no fields)
rpc WakeUp (WakeUpRequest, WakeUpResponse)
modelet.proto:229
Wake-up a dormant server.
message WakeUpRequest
modelet.proto:206
(message has no fields)
message WakeUpResponse
modelet.proto:207
(message has no fields)

Used in: LoadRequest, Model, UpdateLoadedRequest

map<string, string> items = 1
items[method] specifies the access control list name for the given method. A method corresponds to the model data method, e.g., lm.score, lm.generate, vm.classify. The ACL name is up to the implementation to interpret, but in general the ACL name is a group name. E.g., the following ACLs opens scoring method to all and restricts generation method to the group foo. items { "lm.score" : "all" "lm.generate" : "foo" }

string fs_root = 1
The file system root under which all Sax cell states are stored, e.g., gs://sax-data/
string admin_acl = 2
ACL protecting admin methods running in this cell, including publish, update, and unpublish. The content is up to the implementation to interpret, but in general it is a group name.

Used in: ExportRequest

RNG_SEED_MODE_UNSPECIFIED = 0
If this is left unspecified, it is up to Sax to determine a mode that matches its native serving behavior. Currently, the native serving behavior is STATEFUL.
STATELESS = 1
The exported method takes a uint32 tensor of shape `[batch_size]` and named `rng_seed`. `rng_seed[0]` will be used as the seed for the whole batch and other entries in `rng_seed` are ignored.
STATEFUL = 2
# The exported method uses an in-graph tf.random.uniform internally to generate the rng seed.
FIXED = 3
The exported method uses an in-graph tf.constant() as the random seed.

Used in: ExportRequest

SERIALIZED_MODEL_FORMAT_UNKNOWN = 0
Invalid.
TF_SAVEDMODEL_V0 = 1
The TensorFlow SavedModel format.

map<string, float> items = 1
items[input_key] specifies value set for an input_key. E.g., the following extra inputs will change input.tempeature to 0.1 in sampling decode. items { "temperature" : "0.1" }
map<string, Tensor> tensors = 2
tensors[input_key] specifies tensors set for an input_key. E.g., the following extra inputs will change input.tensors as soft prompt. tensors { "prompt_embeddings" : [0.1, 0.2, 0.3, 0.4] } It is invalid for the same key to appear in both items and tensors.
map<string, string> strings = 3
strings[input_key] specifies value in string type set for an input_key. E.g., the following extra inputs will change input.strings as decoding constraint. strings { "regex" : "a*b*c*d*e*f*g*h*" } It is invalid if the same key has appeared in items and tensors.

Method stats shown on modelet home pages.

Used in: ModelWithStatus

string method = 1
The method name.
int64 pending_reqs = 9
THe number of calls on this model/method waiting on the server.
float errors_per_second = 2
The QPS of failed requests in the past minute.
float successes_per_second = 3
The QPS of succeeded requests in the past minute.
optional float mean_latency_on_success_per_second = 4
The mean latency of succeeded requests in the past minute.
optional float p50_latency_on_success_per_second = 5
The 50 percentile latency of succeeded requests in the past minute.
optional float p95_latency_on_success_per_second = 6
The 95 percentile latency of succeeded requests in the past minute.
optional float p99_latency_on_success_per_second = 7
The 99 percentile latency of succeeded requests in the past minute.
repeated int32 recent_batch_sizes = 8
The recent 10 batch sizes.

Used in: GetStatusResponse

string model_key = 1
ModelStatus model_status = 2
string failure_reason = 3
only filled in if requested
repeated MethodStats method_stats = 4
Only filled if request.include_method_stats=true.

Used in: GetStatusResponse

optional ServerStatus.State state = 1
optional string explanation = 2
Optionally, human readable explanation for the server state.
optional ServerStatus.Stats stats = 3

Used in: ServerStatus

UNDEFINED = 0
ACTIVE = 1
The server is usable and ready to serve.
DORMANT = 2
The server is offline and unusable now, but the job is able to be back online and become active when needed.

Used in: ServerStatus

float early_rejection_errors_per_second = 1
The QPS of early rejected requests in the past 10s. Early rejected requests are requests that are rejected by the server with "kUnavailable" error before they are processed, e.g., due to server is dormant.

The state of a joined model server.

optional ModelServer model_server = 1
string address = 2
string debug_address = 6
string data_address = 7
int64 last_join_ms = 3
milliseconds since Unix epoch
map<string, ModelStatus> loaded_models = 4
model ID: status
map<string, string> failure_reasons = 5
model ID: error message
bool is_dormant = 11
state
int64 pending_requests = 12
stats
float errors_per_second = 8
float successes_per_second = 9
float mean_latency_in_seconds = 10

string location = 1
e.g. IP:port

The configuration of a model.

Used in: PublishRequest, PublishedModel, State, UpdateRequest

string model_id = 1
An ID to identify the model. Must be globally unique, e.g., /sax/test/lm_cloud_spmd_2b
string model_path = 2
Path to a model in Sax's model registry linked in the server binary, e.g., saxml.server.pax.lm.params.lm_cloud.LmCloudSpmd2B
string checkpoint_path = 3
Path to a checkpoint, e.g., gs://sax-data/checkpoints/checkpoint_00000000
int32 requested_num_replicas = 4
The number of model servers to serve this model on. The admin server periodically examines active/available model servers and tries its best to keep these many replicas active for this model.
optional AccessControlLists acls = 5
ACLs protecting data methods supported by this model.
string admin_acl = 6
ACL protecting admin methods running on this model, including update and unpublish. The content is up to the implementation to interpret, but in general it is a group name.
map<string, string> overrides = 7
model config overrides, e.g. BATCH_SIZE: 1
bytes uuid = 8
Identifies specific deployment of the model to the SAX. It spans lifetime of the model from publish to unpublish. It's a random 128-bit number represented as an array of bytes.

The capabilities of a model server.

Used in: JoinRequest, JoinedModelServer

ModelServer.ChipType chip_type = 1
ModelServer.ChipTopology chip_topology = 2
repeated string servable_model_paths = 3
The server informs the admin which model paths it supported. Hence, it is expected that the admin will only ask this server to load models whose model paths in this list. E.g., saxml.lm.params.Gemma7B
repeated string tags = 4
A set of strings associated with this server. Each tag is a free form string. The admin may use these tags during the model assignment.

Used in: ModelServer, ModelServerTypeStat

CHIP_TOPOLOGY_UNKNOWN = 0
CHIP_TOPOLOGY_1 = 22
CHIP_TOPOLOGY_2 = 24
CHIP_TOPOLOGY_4 = 23
CHIP_TOPOLOGY_8 = 25
CHIP_TOPOLOGY_16 = 26
CHIP_TOPOLOGY_1X1 = 1
CHIP_TOPOLOGY_2X2 = 2
CHIP_TOPOLOGY_2X4 = 28
CHIP_TOPOLOGY_4X2 = 21
CHIP_TOPOLOGY_4X4 = 3
CHIP_TOPOLOGY_4X8 = 4
CHIP_TOPOLOGY_8X8 = 5
CHIP_TOPOLOGY_8X16 = 6
CHIP_TOPOLOGY_16X16 = 7
CHIP_TOPOLOGY_16X32 = 8
CHIP_TOPOLOGY_32X32 = 9
CHIP_TOPOLOGY_1X1X1 = 10
CHIP_TOPOLOGY_1X2X1 = 11
CHIP_TOPOLOGY_2X2X1 = 12
CHIP_TOPOLOGY_2X2X2 = 13
CHIP_TOPOLOGY_2X2X4 = 14
CHIP_TOPOLOGY_2X4X4 = 15
CHIP_TOPOLOGY_4X4X4 = 16
CHIP_TOPOLOGY_4X4X8 = 17
CHIP_TOPOLOGY_4X4X16 = 18
CHIP_TOPOLOGY_4X8X8 = 19
CHIP_TOPOLOGY_4X8X16 = 33
CHIP_TOPOLOGY_4X16X16 = 31
CHIP_TOPOLOGY_4X16X32 = 30
CHIP_TOPOLOGY_8X8X8 = 27
CHIP_TOPOLOGY_8X8X12 = 20
CHIP_TOPOLOGY_8X8X16 = 32
CHIP_TOPOLOGY_8X16X16 = 29

Used in: ModelServer, ModelServerTypeStat

CHIP_TYPE_UNKNOWN = 0
CHIP_TYPE_TPU_V2 = 2
CHIP_TYPE_TPU_V3 = 4
CHIP_TYPE_TPU_V4 = 6
CHIP_TYPE_TPU_V4I = 5
CHIP_TYPE_TPU_V5E = 15
CHIP_TYPE_TPU_V6E = 17
CHIP_TYPE_GPU_P100 = 9
CHIP_TYPE_GPU_V100 = 10
CHIP_TYPE_GPU_T4 = 13
CHIP_TYPE_GPU_A100 = 11
CHIP_TYPE_GPU_H100 = 14
CHIP_TYPE_GPU_L4 = 16
CHIP_TYPE_CPU = 12

Used in: StatsResponse

ModelServer.ChipType chip_type = 1
ModelServer.ChipTopology chip_topology = 2
int32 num_replicas = 3
number of active replicas

Used in: GetStatusResponse.ModelWithStatus, JoinedModelServer

NONE = 0
Unused: unloaded models are removed from responses.
LOADING = 1
This model is being loaded and can't serve yet.
LOADED = 2
This model is loaded and ready to serve.
FAILED = 3
This model failed to load or unload.
UNLOADING = 4
This model is being unloaded and can't serve anymore.

The state of a published model.

Used in: ListResponse

optional Model model = 1
repeated string modelet_addresses = 2

repeated Model models = 1
int32 last_generation = 2

Used in: ExtraInputs

repeated float values = 1
Tensors in float flattend to 1d. Reshaping information can be infered from model attributes or other extra inputs.

Used in: TestRequestWithExtraInput

string model_key = 1
string text = 2

optional TestRequest test_request = 1
optional ExtraInputs extra_inputs = 3

Used in: WatchLocResponse

int32 next_seqno = 1
The seqno the client should use for the next Watch call.
bool has_fullset = 2
If has_fullset is true, the server sends back the complete set in 'values' together with a sequence of changes in 'changelog'. If has_fullset is false, 'changelog' contains mutations within [req.seqno .. next_seqno).
repeated string values = 3
repeated WatchResult.Mutation changelog = 4

Used in: WatchResult

oneof kind
- string addition = 1
- string deletion = 2

package sax

service Admin

rpc Publish (PublishRequest, PublishResponse)

message PublishRequest

optional Model model = 2

message PublishResponse

rpc Update (UpdateRequest, UpdateResponse)

message UpdateRequest

optional Model model = 1

message UpdateResponse

rpc Unpublish (UnpublishRequest, UnpublishResponse)

message UnpublishRequest

string model_id = 1

message UnpublishResponse

rpc List (ListRequest, ListResponse)

message ListRequest

string model_id = 1

message ListResponse

repeated PublishedModel published_models = 1

rpc Stats (StatsRequest, StatsResponse)

message StatsRequest

string model_id = 1

message StatsResponse

repeated ModelServerTypeStat model_server_type_stats = 1

map<string, int32> num_servers_by_servable_model_path = 2

rpc WatchLoc (WatchLocRequest, WatchLocResponse)

message WatchLocRequest

string model_id = 1

string admin_server_id = 3

int32 seqno = 2

message WatchLocResponse

string admin_server_id = 2

optional WatchResult result = 1

rpc WaitForReady (WaitForReadyRequest, WaitForReadyResponse)

message WaitForReadyRequest

string model_id = 1

int32 num_replicas = 2

message WaitForReadyResponse

rpc Join (JoinRequest, JoinResponse)

message JoinRequest

string address = 1

string debug_address = 3

string data_address = 4

optional ModelServer model_server = 2

message JoinResponse

service Modelet

rpc Load (LoadRequest, LoadResponse)

message LoadRequest

string model_key = 1

string model_path = 2

string checkpoint_path = 3

optional AccessControlLists acls = 4

map<string, string> overrides = 5

message LoadResponse

rpc UpdateLoaded (UpdateLoadedRequest, UpdateLoadedResponse)

message UpdateLoadedRequest

string model_key = 1

optional AccessControlLists acls = 2

string checkpoint_path = 3

message UpdateLoadedResponse

rpc Unload (UnloadRequest, UnloadResponse)

message UnloadRequest

string model_key = 1

message UnloadResponse

rpc Export (ExportRequest, ExportResponse)

message ExportRequest

string model_key = 1

repeated string method_names = 6

repeated string signatures = 7

string export_path = 3

ExportRequest.SerializedModelFormat serialized_model_format = 4

ExportRequest.RngSeedMode rng_seed_mode = 5

bool enable_gpu_multi_device_execution = 8

message ExportResponse

rpc GetStatus (GetStatusRequest, GetStatusResponse)

message GetStatusRequest

bool include_failure_reasons = 1

bool include_method_stats = 2

message GetStatusResponse

repeated GetStatusResponse.ModelWithStatus models = 1