package xla

Get desktop application:
View/edit binary Protocol Buffers messages

/////////////////////// Global data requests

rpc Unregister (UnregisterRequest, UnregisterResponse)
xla_service.proto:57
Unregisters a global allocation. If the handle given is not currently allocated, a NOT_FOUND status is returned.
message UnregisterRequest
xla.proto:413
- repeated GlobalDataHandle data = 1
message UnregisterResponse
xla.proto:417
(message has no fields)
rpc DeconstructTuple (DeconstructTupleRequest, DeconstructTupleResponse)
xla_service.proto:62
Deconstructs a tuple. Returns a newly created GlobalDataHandle for each element in the tuple.
message DeconstructTupleRequest
xla.proto:487
- optional GlobalDataHandle tuple_handle = 2
message DeconstructTupleResponse
xla.proto:491
- repeated GlobalDataHandle element_handles = 1
rpc Unpack (UnpackRequest, UnpackResponse)
xla_service.proto:70
Unpack requests that a global data handle, with a tuple shape, has global data handles created for each of its constituent members. This is the equivalent of the "destructuring assignment" present in various programming languages.
message UnpackRequest
xla.proto:533
- optional GlobalDataHandle data = 1
message UnpackResponse
xla.proto:537
- repeated GlobalDataHandle tied_data = 1
rpc GetShape (GetShapeRequest, GetShapeResponse)
xla_service.proto:74
Requests the shape of the referenced global data.
message GetShapeRequest
xla.proto:525
- optional GlobalDataHandle data = 1
message GetShapeResponse
xla.proto:529
- optional ShapeProto shape = 1
rpc GetComputationGraphStats (ComputationGraphStatsRequest, ComputationStatsResponse)
xla_service.proto:78
Requests the statistics of the given computation.
message ComputationGraphStatsRequest
xla.proto:396
- optional HloModuleProto computation = 1
- optional DebugOptions debug_options = 2
message ComputationStatsResponse
xla.proto:401
- optional ComputationStats stats = 1
rpc LoadData (LoadDataRequest, LoadDataResponse)
xla_service.proto:83
Loads a variable number of values with a given element type from ColumnIO.
message LoadDataRequest
xla.proto:495
- string columnio_tablet_path = 1
  Describes the path of the ColumnIO tablet to load.
- string columnio_field = 2
  Describes the field to load within the ColumnIO tablet.
- optional ShapeProto element_shape = 3
  Individual element shape, excluding rows.
- int64 offset = 4
  Warning: ColumnIO does not support random-access, so use offset with caution in performance-critical scenarios.
- int64 limit = 5
  Maximum number of elements (with shape element_shape) to load.
- bool zip = 6
  If more than one item is requested (via limit > 1), then this request attribute zips together the produced vectors.
message LoadDataResponse
xla.proto:517
- optional GlobalDataHandle data = 1
- optional ShapeProto data_shape = 2
- int64 available_rows = 3
- int64 rows_loaded = 4
- int64 nanoseconds = 5
rpc TransferToClient (TransferToClientRequest, TransferToClientResponse)
xla_service.proto:87
Transfers the given global data to the client in the form of a Literal.
message TransferToClientRequest
xla.proto:348
- optional GlobalDataHandle data = 1
- optional ShapeProto shape_with_layout = 2
  This optional field directs the service to return the literal in this layout. A shape is used to hold the layout to accommodate tuples.
message TransferToClientResponse
xla.proto:356
- optional LiteralProto literal = 1
rpc TransferToServer (TransferToServerRequest, TransferToServerResponse)
xla_service.proto:93
Transfers the given literal to the server to be stored in a global allocation, which is returned.
message TransferToServerRequest
xla.proto:360
- optional LiteralProto literal = 1
- optional DeviceHandle device_handle = 2
message TransferToServerResponse
xla.proto:365
- optional GlobalDataHandle data = 1
rpc TransferToInfeed (TransferToInfeedRequest, TransferToInfeedResponse)
xla_service.proto:98
Transfers the given literal to the Infeed buffer of the device.
message TransferToInfeedRequest
xla.proto:369
- optional LiteralProto literal = 1
- int64 replica_id = 2
- optional DeviceHandle device_handle = 3
message TransferToInfeedResponse
xla.proto:375
(message has no fields)
rpc TransferFromOutfeed (TransferFromOutfeedRequest, TransferFromOutfeedResponse)
xla_service.proto:103
Transferred literal from the Outfeed buffer of the device.
message TransferFromOutfeedRequest
xla.proto:377
- optional ShapeProto shape_with_layout = 1
  This optional field directs the service to return the literal in this layout. A shape is used to hold the layout to accommodate tuples.
- int64 replica_id = 2
- optional DeviceHandle device_handle = 3
message TransferFromOutfeedResponse
xla.proto:386
- optional LiteralProto literal = 1
rpc ResetDevice (ResetDeviceRequest, ResetDeviceResponse)
xla_service.proto:108
Resets the device, clearing all existing state on the device.
message ResetDeviceRequest
xla.proto:390
- optional DeviceHandle device_handle = 1
message ResetDeviceResponse
xla.proto:394
(message has no fields)
rpc ComputeConstantGraph (ComputeConstantGraphRequest, ComputeConstantResponse)
xla_service.proto:113
Computes the value of a constant expression. The request contains the computation graph for the constant expression.
message ComputeConstantGraphRequest
xla.proto:477
- optional HloModuleProto computation = 1
- optional LayoutProto output_layout = 2
message ComputeConstantResponse
xla.proto:482
- optional LiteralProto literal = 1
  A LiteralProto is returned directly for this request.
rpc GetDeviceHandles (GetDeviceHandlesRequest, GetDeviceHandlesResponse)
xla_service.proto:120
Requests one or more device handles from the target. The returned device handles can be used to specify the device on which to execute computations or transfer data.
message GetDeviceHandlesRequest
xla.proto:340
- int64 device_count = 1
message GetDeviceHandlesResponse
xla.proto:344
- repeated DeviceHandle device_handles = 1
rpc CreateChannelHandle (CreateChannelHandleRequest, CreateChannelHandleResponse)
xla_service.proto:126
Creates a channel handle that can be used to transfer data between two computations via a pair of Send and Recv instructions.
message CreateChannelHandleRequest
xla.proto:405
- ChannelHandle.ChannelType channel_type = 1
message CreateChannelHandleResponse
xla.proto:409
- optional ChannelHandle channel = 1
rpc Compile (CompileRequest, CompileResponse)
xla_service.proto:132
Compiles the provided computation into executable. Returns the handle of the executable.
message CompileRequest
xla.proto:419
- optional HloModuleProto computation = 1
  The graph to be compiled.
- optional ExecutionOptions execution_options = 2
  Options that affect how XLA compiles code to service this request.
- repeated ShapeProto input_shape_with_layout = 3
  The layouts of the input arguments. If not set, the default layout will be used. Although the real arguments are not needed in compilation, the layouts of the arguments can affect the compilation.
message CompileResponse
xla.proto:432
- optional ExecutionHandle handle = 1
  The handle to the executable.
rpc Execute (ExecuteRequest, ExecuteResponse)
xla_service.proto:137
Invokes the provided executable with the provided global data passed as immutable arguments. The request contains the handle to the executable. Returns global data output and execution timing.
message ExecuteRequest
xla.proto:437
- optional ExecutionHandle handle = 1
- repeated GlobalDataHandle arguments = 2
  The shape and layout of the arguments must be the same as the those of the executable's parameters.
rpc ExecuteGraphParallel (ExecuteGraphParallelRequest, ExecuteParallelResponse)
xla_service.proto:142
Invokes the provided list of computations in parallel with the provided global data for each computation. Returns a list of global data output and execution timing.
message ExecuteGraphParallelRequest
xla.proto:455
- repeated ExecuteGraphRequest requests = 1
message ExecuteParallelResponse
xla.proto:464
- repeated ExecuteResponse responses = 1
rpc WaitForExecution (WaitForExecutionRequest, WaitForExecutionResponse)
xla_service.proto:148
Waits until the given execution (aysnchronously launched) is complete, and returns the global data output.
message WaitForExecutionRequest
xla.proto:468
- optional ExecutionHandle execution = 1
message WaitForExecutionResponse
xla.proto:472
- optional GlobalDataHandle output = 1
- optional ExecutionProfile profile = 2

Serialization of BufferAllocation.

Used in: BufferAssignmentProto

int64 index = 1
int64 size = 2
bool is_thread_local = 3
bool is_tuple = 11
bool is_entry_computation_parameter = 5
bool is_constant = 12
int64 parameter_number = 6
repeated int64 parameter_shape_index = 10
bool maybe_live_out = 7
int64 color = 8
repeated BufferAllocationProto.Assigned assigned = 9

Assigned represents a single LogicalBuffer that is assigned to this BufferAllocation.

Used in: BufferAllocationProto

int64 logical_buffer_id = 1
int64 offset = 2
int64 size = 3

Serialization of BufferAssignment.

Used in: HloProto

repeated LogicalBufferProto logical_buffers = 1
repeated BufferAssignmentProto.BufferAlias buffer_aliases = 2
repeated BufferAllocationProto buffer_allocations = 3
repeated HeapSimulatorTrace heap_simulator_traces = 4

Alias represents a source LogicalBuffer, and the buffer location that aliases it.

Used in: BufferAssignmentProto

int64 source_buffer_id = 1
optional LogicalBufferProto.Location location = 2

Handle given to a user to represent a channel between two computations via a Send and Recv instruction pair. Channels are unbuffered, so Send Send instructions will be blocked until the data is transferred.

Used in: CreateChannelHandleResponse

int64 handle = 1
ChannelHandle.ChannelType type = 2

Used in: ChannelHandle, CreateChannelHandleRequest

CHANNEL_TYPE_INVALID = 0
Invalid primitive type to serve as default.
DEVICE_TO_DEVICE = 1
A channel for sending data between devices.
DEVICE_TO_HOST = 2
A channel for sending data from the device to the host. Can only be used with a Send operation.
HOST_TO_DEVICE = 3
A channel for sending data from the host to the device. Can only be used with a Recv operation.

Used in: HloInstructionProto

bool lower = 1
If true, uses the lower triangle of `a`. If false, uses the upper triangle of `a`.

Statistics of a computation.

Used in: ComputationStatsResponse

double flop_count = 1
The number of floating point operations in the computation.
double transcendental_count = 2
The number of transcendental operations (e.g., exp) in the computation.

Used in: HloInstructionProto

int64 input_batch_dimension = 7
The number of the dimension that represents batch in the input.
int64 input_feature_dimension = 8
The number of the dimension that represents features in the input.
repeated int64 input_spatial_dimensions = 11
The dimension numbers for the spatial dimensions that the window moves through in the input.
int64 kernel_input_feature_dimension = 3
The number of the dimension that represents input features in the convolutional kernel (rhs).
int64 kernel_output_feature_dimension = 4
The number of the dimension that represents output features in the convolutional kernel (rhs).
repeated int64 kernel_spatial_dimensions = 6
The dimension numbers for the spatial dimensions that the window moves through in the kernel (rhs). window.strides(0) is the stride in the kernel_spatial_dimensions(0) dimension.
int64 output_batch_dimension = 9
The number of the dimension that represents batch in the output.
int64 output_feature_dimension = 10
The number of the dimension that represents features in the output.
repeated int64 output_spatial_dimensions = 12
The dimension numbers for the spatial dimensions that the window moves through in the output.

Debugging options for XLA. These options may change at any time - there are no guarantees about backward or forward compatibility for these fields.

Used in: ComputationGraphStatsRequest, ExecutionOptions, xrt.XLAComputationConfig

bool xla_hlo_graph_addresses = 2
Show addresses of HLO ops in graph dump.
bool xla_hlo_profile = 9
Instrument the computation to collect per-HLO cycle counts.
repeated string xla_disable_hlo_passes = 30
List of HLO passes to disable/enable. These names must exactly match the pass names as specified by the HloPassInterface::name() method. At least one of xla_disable_hlo_passes and xla_enable_hlo_passes_only must be empty.
repeated string xla_enable_hlo_passes_only = 124
bool xla_disable_all_hlo_passes = 104
Disables all HLO passes. Notes that some passes are necessary for correctness and the invariants that must be satisfied by "fully optimized" HLO are different for different devices and may change over time. The only "guarantee", such as it is, is that if you compile XLA and dump the optimized HLO for some graph, you should be able to run it again on the same device with the same build of XLA.
int32 xla_backend_optimization_level = 31
Numerical optimization level for the XLA compiler backend; the specific interpretation of this value is left to the backends.
bool xla_embed_ir_in_executable = 33
Embed the compiler IR as a string in the executable.
bool xla_eliminate_hlo_implicit_broadcast = 35
Eliminate implicit broadcasts when lowering user computations to HLO instructions; use explicit broadcast instead.
bool xla_cpu_multi_thread_eigen = 60
When generating calls to Eigen in the CPU backend, use multi-threaded Eigen mode.
string xla_gpu_cuda_data_dir = 61
Path to directory with cuda/ptx tools and libraries.
bool xla_gpu_ftz = 62
Enable flush-to-zero semantics in the GPU backend.
bool xla_gpu_disable_multi_streaming = 63
Disable multi-streaming in the GPU backend.
bool xla_llvm_enable_alias_scope_metadata = 70
If true, in LLVM-based backends, emit !alias.scope metadata in generated IR.
bool xla_llvm_enable_noalias_metadata = 71
If true, in LLVM-based backends, emit !noalias metadata in the generated IR.
bool xla_llvm_enable_invariant_load_metadata = 72
If true, in LLVM-based backends, emit !invariant.load metadata in the generated IR.
bool xla_llvm_disable_expensive_passes = 73
If true, a set of expensive LLVM optimization passes will not be run.
repeated HloReducePrecisionOptions hlo_reduce_precision_options = 80
Options for inserting reduce-precision operations for numerical experimentation. This is a repeated field, as we may want to have multiple passes with different parameters.
bool xla_test_all_output_layouts = 90
This is used by ClientLibraryTestBase::ComputeAndCompare*. If true, the computation will run n! times with all permunations of layouts for the output shape in rank n. For example, with a 3D shape, all permutations of the set {0, 1, 2} are tried.
bool xla_test_all_input_layouts = 91
This is used by ClientLibraryTestBase::ComputeAndCompare*. If true, the computation will run for all permunations of layouts of all input arguments. For example, with 2 input arguments in 2D and 4D shapes, the computation will run 2! * 4! times.
bool xla_hlo_graph_sharding_color = 92
Assign colors based on sharding information when generating the Graphviz HLO graph.
bool xla_gpu_use_cudnn_batchnorm = 94
If true, the GPU backend is free to use cudnn for HLO batch normalization ops.
bool xla_cpu_use_mkl_dnn = 97
Generate calls to MKL-DNN in the CPU backend.
int32 xla_gpu_max_kernel_unroll_factor = 98
Maximum kernel unroll factor for the GPU backend.
bool xla_cpu_enable_fast_math = 99
When true, "unsafe" mathematical optimizations are enabled. These transformations include but are not limited to: - Reducing the precision of operations (e.g. using an approximate sin function, or transforming x/y into x * (1/y)). - Assuming that operations never produce or consume NaN or +/- Inf (this behavior can be adjusted using xla_cpu_fast_math_allow_{nans|infs}). - Assuming that +0 and -0 are indistinguishable.
bool xla_cpu_fast_math_honor_nans = 120
When xla_cpu_enable_fast_math is true then this controls whether we allow operations to produce NaNs. Ignored when xla_cpu_enable_fast_math is false.
bool xla_cpu_fast_math_honor_infs = 121
When xla_cpu_enable_fast_math is true then this controls whether we allow operations to produce infinites. Ignored when xla_cpu_enable_fast_math is false.
bool xla_cpu_fast_math_honor_division = 126
When xla_cpu_enable_fast_math is true then this controls whether we forbid to use the reciprocal of an argument instead of division. Ignored when xla_cpu_enable_fast_math is false.
bool xla_cpu_fast_math_honor_functions = 129
When xla_cpu_enable_fast_math is true then this controls whether we forbid to approximate calculations for functions. Ignored when xla_cpu_enable_fast_math is false.
bool xla_gpu_enable_fast_min_max = 100
When true we lower the Minimum and Maximum hlos in the GPU backend such that Min(NotNaN, NaN) = Min(NaN, NotNaN) = NotNaN. In other words, if flag this is true we don't propagate NaNs through Min and Max.
bool xla_allow_excess_precision = 122
Allows xla to increase the output precision of floating point operations.
bool xla_gpu_crash_on_verification_failures = 101
Crashes the program when any kind of verification fails, instead of just logging the failures. One example is cross checking of convolution results among different algorithms.
bool xla_gpu_disable_autotune = 123
Disable GEMM and Convolution auto-tuning.
int32 xla_force_host_platform_device_count = 102
Force the host platform to pretend that there are these many host "devices". All these devices are backed by the same threadpool. Defaults to 1. Setting this to anything other than 1 can increase overhead from context switching but we let the user override this behavior to help run tests on the host that run models in parallel across multiple devices.
bool xla_gpu_disable_ptxas_optimizations = 103
If set to true XLA:GPU invokes `ptxas` with -O0 (default is -O3).
bool xla_hlo_evaluator_use_fast_path = 106
Enable fast math with eigen in the HLO evaluator.
bool xla_allow_scalar_index_dynamic_ops = 107
Temporary option to allow support for both the R1 and the scalar index versions of DynamicSlice and DynamicUpdateSlice. Only used for testing.
DebugOptions.StepMarkerLocation xla_step_marker_location = 108
Option to emit a target-specific marker to indicate the start of a training step. The location of the marker (if any) is determined by the option value.
string xla_dump_to = 109
Directory to dump into.
string xla_dump_hlo_module_re = 110
If specified, will only dump modules which match this regexp.
string xla_dump_hlo_pass_re = 111
If this flag is specified, will also HLO before and after passes that match this regular expression. Set to .* to dump before/after all passes.
bool xla_dump_hlo_as_text = 112
Specifies the format that HLO is dumped in. Multiple of these may be specified.
bool xla_dump_hlo_as_proto = 113
bool xla_dump_hlo_as_dot = 114
bool xla_dump_hlo_as_url = 115
bool xla_dump_hlo_as_html = 116
Dump HLO graphs as an HTML (DOT -> SVG inlined in HTML)
bool xla_dump_hlo_snapshots = 118
If true, every time an HLO module is run, we will dump an HloSnapshot (essentially, a serialized module plus its inputs) to the --xla_dump_to directory.
bool xla_gpu_force_conv_nchw = 125
repeated string xla_gpu_ptx_file = 127
Paths to files with ptx code.
string xla_gpu_algorithm_blacklist_path = 128
Blacklist for cuDNN convolutions.
map<string, string> xla_backend_extra_options = 500
Extra options to pass to the compilation backend (e.g. LLVM); specific interpretation of these values is left to the backend.

Used in: DebugOptions

STEP_MARK_AT_ENTRY = 0
Generate a step marker at the program entry. This handles the case where each step is done by one or multiple program execution(s). Only the first program will be tagged for generating a step marker at the program entry. This is the default.
STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP = 1
Generate a step marker at each iteration of the top level while loop, which is assumed to be a training loop.
STEP_MARK_AT_SECOND_LEVEL_WHILE_LOOP = 3
Generate a step marker at each iteration of the second level while loops, which is assumed to be a training or eval loop.
STEP_MARK_NONE = 2
No step marker generated.

DeviceAssignmentProto is a serialized form of DeviceAssignment class, which represents the device ids assigned to a set of replicated computations. See xla::DeviceAssignment class comment for more details.

Used in: ExecutionOptions

int32 replica_count = 1
int32 computation_count = 2
repeated DeviceAssignmentProto.ComputationDevice computation_devices = 3

Each logical computation runs on replica_count physical devices. ComputationDevice represents the device ids assinged to the replicas.

Used in: DeviceAssignmentProto

repeated int32 replica_device_ids = 1

Handle given to a user that represents a replicated virtual device. Each replicated device represents N physical devices for execution where N is the number of replicas.

Used in: ExecutionOptions, GetDeviceHandlesResponse, ResetDeviceRequest, TransferFromOutfeedRequest, TransferToInfeedRequest, TransferToServerRequest

int64 handle = 1
int64 device_count = 2
The number of model-parallel virtual devices that communicate via XLA Send/Recv instructions.

Used in: HloInstructionProto, gpu.GemmBackendConfig

repeated int64 lhs_contracting_dimensions = 1
The dimension numbers that represent the 'lhs' contracting dimensions.
repeated int64 rhs_contracting_dimensions = 2
The dimension numbers that represent the 'rhs' contracting dimensions.
repeated int64 lhs_batch_dimensions = 3
The dimension numbers that represent the 'lhs' batch dimensions.
repeated int64 rhs_batch_dimensions = 4
The dimension numbers that represent the 'rhs' batch dimensions.

Used in: HloModuleProto

repeated DynamicParameterBindingProto.Binding entries = 1

A list of bindings which indicates that the `target_dim_num` in the subshape `target_param_index` of parameter `target_param_num` is a dynamic dimension and its real dynamic size is represented by `dynamic_param_index` in parameter `dynamic_param_num`. As an example, imagine we have a program: ENTRY main { a = f32[] parameter(0) b = f32[10] parameter(1) ROOT root = (f32[], f32[10]) tuple(%a, %b) } Let's say 'b' (param index 1) is a dynamic shape whose input has an upperbound of 10 and real size is determined at runtime.'a' represents the real size of b's first dimension. In this case, the fields are set in the following way: dynamic_param_num = 1 dynamic_param_index = {} target_param_num = 0 target_param_index = {} target_param_dim = 0

Used in: DynamicParameterBindingProto

int64 dynamic_param_num = 1
repeated int64 dynamic_param_index = 2
int64 target_param_num = 3
repeated int64 target_param_index = 4
int64 target_param_dim_num = 5

TODO(b/118493728): Remove this and ExecuteGraphParallelRequest and replace the uses with calls to Compile and Execute.

Used in: ExecuteGraphParallelRequest

optional HloModuleProto computation = 1
repeated GlobalDataHandle arguments = 2
optional ExecutionOptions execution_options = 3
Options that affect how XLA compiles and runs code to service this request.

Used as response type in: XlaService.Execute

Used as field type in: ExecuteParallelResponse

optional GlobalDataHandle output = 1
optional ExecutionProfile profile = 2

Handle given to a user that represents an execution that the user launched asynchronously on the device.

Used in: CompileResponse, ExecuteRequest, WaitForExecutionRequest

int64 handle = 1

These settings control how XLA compiles and/or runs code. Not all settings will have an effect on every platform. When adding new fields, keep in mind that boolean fields default to false.

Used in: CompileRequest, ExecuteGraphRequest

optional ShapeProto shape_with_output_layout = 2
This optional field's layout is used as a hint when storing the output of this computation. Subsequent transfers of this output array to the client may be faster when using this layout. We use a Shape here to accommodate computations that return a tuple.
uint64 seed = 3
Used to seed random-number generators used in this computation. If this is 0, we generate a seed ourselves. TODO(b/32083678): Changing the seed unnecessarily forces a recompilation.
optional DebugOptions debug_options = 4
repeated DeviceHandle device_handles = 5
This optional field specifies a particular set of devices to run the computation on. The computation will be partitioned across these devices. If not provided, the default device will be chosen.
int32 num_replicas = 6
Number of replicas of the computation to run. If zero, uses the default number of replicas for the XLA service.
optional DeviceAssignmentProto device_assignment = 7
This optional field specifies the device assignment if known at compile time.

Profile data from the execution of a computation.

Used in: ExecuteResponse, WaitForExecutionResponse

bool compilation_cache_hit = 1
Whether the executable was read from the compilation cache.
int64 compile_time_ms = 2
The time in milliseconds spent to compile the computation. This only set if the executable was not read from the compilation cache (compilation_cache_hit == false).
int64 compute_cycle_count = 3
The number of cycles spent for the computation. This does not include the time taken for the data transfers between the host and the device. This is a target-dependent field and only used for debugging purposes.
int64 compute_time_ns = 4
The time in nanoseconds spent for the computation, without data transfer.
int64 compute_and_transfer_time_ns = 5
The time in nanoseconds spent for the entire computation, including the result data transfer time. Current implementation does not spend any cycles for the input data transfer since the memory is initialized with the proper values before the execution.
int64 executable_size_in_bytes = 6
The size of the binary code in the executable.
bool profile_cache_hit = 7
Whether this profile was drawn from a cache of profiles instead of from execution on the hardware.

Used in: HloInstructionProto

FFT = 0
Forward FFT; complex in, complex out.
IFFT = 1
Inverse FFT; complex in, complex out.
RFFT = 2
Forward real FFT; real in, fft_length / 2 + 1 complex out
IRFFT = 3
Inverse real FFT; fft_length / 2 + 1 complex in,

A format specifies the method used by a layout to store an array in memory.

Used in: LayoutProto

INVALID_FORMAT = 0
TODO(b/120869032): Rename this to FORMAT_NONE or something else which better corresponds to its meaning.
DENSE = 1
The default layout, with exactly one storage location per element.
SPARSE = 2
A sparsely encoded layout, providing only the index/value pairs of non-zero elements.

Describes the dimension numbers for a gather operation. See https://www.tensorflow.org/performance/xla/operation_semantics#gather for more details.

Used in: HloInstructionProto

repeated int64 offset_dims = 1
"Window indices" is a term for a set of indices that index into the interior of a dynamic-slice from the input tensor, the starting indices for which were computed from output_gather_dims (see the operation semantic for how this is defined) and the start_indices tensor. The window indices for a specific output index Out is computed as: i = 0 for (k : [0, input_tensor_shape.rank)) window_indices[k] = if k in collapsed_slice_dims then 0 else Out[offset_dims[i++]]
repeated int64 collapsed_slice_dims = 2
repeated int64 start_index_map = 3
This is interpreted as a map from i to start_index_map[i]. It transforms the gather index looked up from the start_indices tensor into the starting index in the input space.
int64 index_vector_dim = 4
The dimension in the start_indices input that contains the starting indices.

Handle given to a user that represents a globally accessible allocation. Contrast this against a ComputationDataHandle, which is not globally accessible, since it only exists within a specific computation.

Used in: DeconstructTupleRequest, DeconstructTupleResponse, ExecuteGraphRequest, ExecuteRequest, ExecuteResponse, GetShapeRequest, LoadDataResponse, TransferToClientRequest, TransferToServerResponse, UnpackRequest, UnpackResponse, UnregisterRequest, WaitForExecutionResponse

int64 handle = 1

A trace of a HeapSimulator run.

Used in: BufferAssignmentProto

repeated HeapSimulatorTrace.Event events = 1
bool whole_module_simulation = 2

The trace includes a list of events, where each event describes one action performed by the heap simulator.

Used in: HeapSimulatorTrace

Event.Kind kind = 1
int64 buffer_id = 2
The id of the LogicalBuffer that the event applies to.
string computation_name = 3
The HloInstruction that the simulation was processing that caused this event to occur, identified by its computation and instruction name. E.g. buffers defined by instruction A are allocated when processing A.
string instruction_name = 4
int64 share_with_canonical_id = 5
The id of the canonical LogicalBuffer that the buffer shares with. Only set for SHARE_WITH events.

Used in: Event

ALLOC = 0
A memory region was allocated for the buffer.
FREE = 1
A memory region was freed for the buffer.
SHARE_WITH = 2
A buffer was shared with another (canonical) buffer. This is similar to ALLOC, except that instead of allocating a new region of memory, the memory region of the canonical buffer is directly re-used. Multiple buffers may share with the same canonical buffer. The lifetime of the canonical buffer is extended to the union of all lifetimes.

Serialization of HloComputation.

Used in: HloModuleProto

string name = 1
repeated HloInstructionProto instructions = 2
The array of instructions is always in a valid dependency order, where operands appear before their users.
optional ProgramShapeProto program_shape = 4
int64 id = 5
The id of this computation.
int64 root_id = 6
The id of the root of the computation.

Used in: HloModuleProto

repeated HloInputOutputAliasProto.AliasEntryProto entries = 1

The following proto describes a pair of aliased an input (described by parameter number and a ShapeIndex of the parameter) and an output (described by a ShapeIndex of the root instruction). For example: entry = { output_shape_index={1}, parameter_number=0, parameter_shape_index={1, 2}, } This entry indicates that the first paremter's {1, 2} element is aliased with the {1} element of the root instruction.

Used in: HloInputOutputAliasProto

repeated int64 output_shape_index = 1
ShapeIndex of the root hlo.
int64 parameter_number = 2
Number of the parameter in entry computation.
repeated int64 parameter_shape_index = 3
ShapeIndex of the parameter instruction.
Kind kind = 4
The kind of alias to be setup.

Used in: AliasEntryProto

UNDEFINED_ALIAS = 0
Define a UNDEFINED_ALIAS equal to zero to get around the default-0 proto3 behavior and missing has_*() APIs.
USER_ALIAS = 1
An alias setup by the user as must alias. A use setting USER_ALIAS is expecting the designed output to be dropped over the given input parameter number+index.
SYSTEM_ALIAS = 2
An alias setup by the compiler as part of its optimizations.

Serialization of HloInstruction. Next ID: 68

Used in: HloComputationProto, gpu.ConvInstructionLog

string name = 1
string opcode = 2
optional ShapeProto shape = 3
optional OpMetadata metadata = 7
optional LiteralProto literal = 8
Literal, only present for kConstant.
int64 parameter_number = 9
Parameter number is only present for kParameter.
string fusion_kind = 11
Fusion state, only present for kFusion.
int64 tuple_index = 13
Index for kGetTupleElement.
repeated int64 dimensions = 14
Dimensions present for some operations that require reshaping or broadcasting, including Reshape, Reduce, ReduceWindow, and Reverse.
optional Window window = 15
Describes the window in a windowed operation such as convolution.
optional ConvolutionDimensionNumbers convolution_dimension_numbers = 16
Describes the dimension numbers used for a convolution.
int64 feature_group_count = 50
The number of feature groups. Used for a convolution. Must be a divisor of the input feature dimension and output feature dimension. If not specified, it will use a default value of 1.
int64 batch_group_count = 58
repeated HloInstructionProto.SliceDimensions slice_dimensions = 17
int32 exponent_bits = 18
The bit sizes for a reduce-precision operation.
int32 mantissa_bits = 19
repeated int64 dynamic_slice_sizes = 20
Describes the [start, start + size) range size for a dynamic slice ('start' is specified dynamically in the second operand of the operation).
optional PaddingConfig padding_config = 21
The padding configuration that describes the edge padding and interior padding of this pad instruction. Only set for pad instructions.
bytes outfeed_config = 22
Outfeed configuration information, only present for kOutfeed.
RandomDistribution distribution = 23
The distribution requested for random number generation. Only present for kRng.
float epsilon = 24
A small float number added to the variance to avoid divide-by-zero error. Only present for kBatchNormTraining.
int64 feature_index = 25
An integer value representing the index of the feature dimension. Only present for kBatchNormTraining.
int64 channel_id = 26
Represents a unique identifier for each Send/Recv instruction pair or optionally for collective instructions (AllReduce, CollectivePermute, AllToAll). Non-positive channel_id is equivalent to no channel id.
bytes infeed_config = 27
The string representation of the infeed configuration.
string custom_call_target = 28
Name of a external target (eg, global symbol) to call, only present for kCustomCall.
optional ShapeProto outfeed_shape = 29
Shape of outfeed request.
optional DotDimensionNumbers dot_dimension_numbers = 30
Describes the dimension numbers used for a dot operation
FftType fft_type = 31
FFT type (FFT, IFFT, etc).
repeated int64 fft_length = 32
FFT length.
string comparison_direction = 63
Comparison direction only used for kCompare.
optional GatherDimensionNumbers gather_dimension_numbers = 33
Gather dimension numbers.
repeated int64 gather_slice_sizes = 34
string channel_name = 41
Compute Host.
int64 cost_estimate_ns = 42
int64 id = 35
The id of this instruction.
repeated int64 operand_ids = 36
repeated int64 control_predecessor_ids = 37
repeated int64 called_computation_ids = 38
optional OpSharding sharding = 40
string backend_config = 43
Backend configuration for the instruction. Has backend-specific meaning.
repeated ReplicaGroup replica_groups = 49
Cross replica op fields.
int64 all_reduce_id = 45
Deprecated, but keeping it for backward compatibility. Use channel_id. Non-positive all_reduce_id is equivalent to no all_reduce_id.
bool is_host_transfer = 47
Whether this Send/Recv instruction transfers data to/from the host. Only present for Send and Recv instructions and their SendDone and RecvDone partners.
bool is_stable = 60
Whether this Sort instruction should be stable.
optional ScatterDimensionNumbers scatter_dimension_numbers = 48
optional PrecisionConfig precision_config = 51
Precision configuration for the instruction. Has backend-specific meaning.
repeated SourceTarget source_target_pairs = 52
Collective permute field.
optional OpSharding domain_entry_sharding = 54
Sharding for kDomain instructions.
optional OpSharding domain_exit_sharding = 55
bool constrain_layout = 56
For custom call this indicates that the layouts are constrained. If constrain_layout is true then the 'shape' field must contain a layout, and 'operand_shapes_with_layout' must contain a shape with layout for each operand.
repeated ShapeProto operand_shapes_with_layout = 57
optional TriangularSolveOptions triangular_solve_options = 59
Options for TriangularSolve
optional CholeskyOptions cholesky_options = 62
Options for Cholesky
optional ParameterReplication parameter_replication = 61
Describes how parameters behave with regards to replicas.
repeated int64 outer_dimension_partitions = 64
If set, the given instruction is run in parallel on e.g. multiple CPU cores. The outermost dimension gets split up into outer_dimension_partitions[0] pieces, the next-outermost dim gets split into outer_dimension_partitions[1] pieces, etc. It's illegal to partition a dimension into more shards than there are elements in that dimension.
bool custom_call_has_side_effect = 65
Whether the kCustomCall instruction has side-effects, only present for kCustomCall.
int64 delta = 66
The delta value for kRngGetAndUpdateState.
bool indices_are_sorted = 67
Specifies if the gather/scatter indices are guaranteed to be sorted by the caller.

Describes the [begin, end) index range and stride for slices.

Used in: HloInstructionProto

int64 start = 1
int64 limit = 2
int64 stride = 3

An abstraction representing a set of HLO module built to run concurrently across different devices.

string name = 1
repeated HloModuleProto hlo_modules = 2

Serialization of HloModule.

Used in: CompileRequest, ComputationGraphStatsRequest, ComputeConstantGraphRequest, ExecuteGraphRequest, HloModuleGroupProto, HloProto

string name = 1
string entry_computation_name = 2
int64 entry_computation_id = 6
repeated HloComputationProto computations = 3
The array of computations is always in a valid dependency order, where callees appear before their callers.
optional ProgramShapeProto host_program_shape = 4
The host program shape (with layout) of the entry computation.
int64 id = 5
The id of this module.
optional HloScheduleProto schedule = 7
The schedule for this module.
optional HloInputOutputAliasProto input_output_alias = 8
Describes alias information between inputs and outputs.
optional DynamicParameterBindingProto dynamic_parameter_binding = 9

Describes how to pretty-print a profile counter array gathered for a specific HloModule.

repeated HloProfilePrinterData.HloComputationInfo computation_infos = 1
HloComputationInfos for every HloComputation in the HloModule.
int64 profile_counters_size = 2
The size of the profile counters array we will pretty-print.
map<string, int64> extra_metrics = 3
Maps extra metric name to the index into the profile counters array.
string entry_computation = 4
Name of the entry computation.

Pretty-printer information about an HloComputation.

Used in: HloProfilePrinterData

string name = 1
int64 profile_index = 2
The index into the profile counters array for the HloComputation corresponding to this HloComputationInfo.
repeated HloInstructionInfo instruction_infos = 3
HloInstructionInfos for every HloInstruction in the HloComputation for corresponding to this HloComputattionInfo.

Pretty-printer information about an HloInstruction.

Used in: HloComputationInfo

string long_name = 1
string short_name = 2
string category = 3
float flop_count = 4
Metrics computed by HloCostAnalysis.
float transcendental_count = 5
float bytes_accessed = 6
float optimal_seconds = 7
int64 profile_index = 8
The index into the profile counters array for the HloInstruction corresponding to this HloInstructionInfo.

Grouping message that contains all of the information above.

Used in: tensorflow.tpu.CompilationResultProto, HloSnapshot

optional HloModuleProto hlo_module = 1
optional BufferAssignmentProto buffer_assignment = 3

Options for the HLO insert-reduce-precision-operations pass.

Used in: DebugOptions

HloReducePrecisionOptions.Location location = 1
uint32 exponent_bits = 2
Exponent and mantissa bit counts for the reduced precision.
uint32 mantissa_bits = 3
repeated uint32 opcodes_to_suffix = 4
Operations matching these opcodes should be suffixed with reduce-precision operations.
repeated string opname_substrings_to_suffix = 5
Operations with names containing these substrings should be suffixed with reduce-precision operations.

Where and when the reduce-precision operations will be added.

Used in: HloReducePrecisionOptions

OP_INPUTS = 0
Add reduce-precision operations to the inputs of selected instructions. This is done before any optimization occurs.
OP_OUTPUTS = 1
Add reduce-precision operations to the outputs of selected instructions. This is done before any optimization occurs.
UNFUSED_OP_OUTPUTS = 2
After operation-fusion occurs, add reduce-precision operations to the outputs of any selected instructions that have not been fused into fusion instructions.
FUSION_INPUTS_BY_CONTENT = 3
After operation-fusion occurs, add reduce-precision operations to the outputs of any fusion instructions that contain operations matching the selection criteria.
FUSION_OUTPUTS_BY_CONTENT = 4
After operation-fusion occurs, add reduce-precision operations to the outputs of any fusion instructions that contain operations matching the selection criteria.

Serialization of an HLO schedule. An HLO schedule contains a total order of instructions for each non-fusion computation in the module.

Used in: HloModuleProto

map<int64, HloScheduleProto.InstructionSequence> sequences = 1
Map from computation id to sequence.

Used in: HloScheduleProto

repeated int64 instruction_ids = 1

Encapsulates HloProto together with the arguments, result, and execution_platform. This message is used for purposes such as analysis/replay/file-storage.

Used in: xrt.XLAComputation

optional HloProto hlo = 1
The hlo graph.
repeated LiteralProto arguments = 2
The arguments passed to the graph.
optional LiteralProto result = 3
The result of the graph.
string execution_platform = 4
The name of the platform used to run the graph.

A layout describes how the array is placed in (1D) memory space. This includes the minor-to-major ordering of dimensions within a shape. Clients must specify the layouts of input Literals to the computation. Layouts specified in interior operations which take Shapes (for example, Convert) are ignored. See the XLA documentation for more information on shapes and layouts. LINT.IfChange

Used in: ComputeConstantGraphRequest, ShapeProto

Format format = 4
The method used to store the data in memory. The format determines which of the other fields are used by the layout.
repeated int64 minor_to_major = 1
Sequence of dimension numbers, from minor (fastest varying index) to major (slowest varying index). This field is required.
int64 max_sparse_elements = 5
The maximum number of elements that can be stored for SPARSE formats. This can be used to determine the maximum size in bytes of arrays stored in memory. This field must be unset unless the format is SPARSE.
repeated TileProto tiles = 6
A sequence of tiles, starting from the tile that's applied first to the Shape. TODO(b/119839262): implement tiling in each backend or add Unimplemented error.
int64 element_size_in_bits = 7
Bit size of each element. If the size is bigger than what the element type requires, the value is stored in the least significant bits and the additional most significant bits are filled with 0's. TODO(b/119839262): implement in each backend or add Unimplemented error.
int64 memory_space = 8
Memory space where this array resides. The integer field is interpreted in a backend-specific manner.

Literals are used when the server and client need to exchange materialized data / results. Literals are also used to describe constants used in computations. Transfers to/from the client are encoded in literal form, and the structure of the repeated fields is implied by the shape.

Used in: ComputeConstantResponse, HloInstructionProto, HloSnapshot, TransferFromOutfeedResponse, TransferToClientResponse, TransferToInfeedRequest, TransferToServerRequest, xrt.XLAAllocation

optional ShapeProto shape = 1
repeated bool preds = 2
bytes s8s = 15
bytes u8s = 3
repeated int32 s32s = 4
repeated int64 s64s = 5
repeated uint32 u32s = 6
repeated uint64 u64s = 7
repeated float f32s = 8
repeated double f64s = 9
repeated float c64s = 12
Stored as interleaved real, imag floats.
repeated double c128s = 18
Stored as interleaved real, imag doubles.
repeated LiteralProto tuple_literals = 10
bytes f16s = 11
The F16s, BF16s, U16s and S16s are encoded in little endian byte order
bytes bf16s = 13
bytes u16s = 16
bytes s16s = 17
repeated int64 sparse_indices = 14
Next = 19

Serialization of LogicalBuffer.

Used in: BufferAssignmentProto

int64 id = 1
int64 size = 2
optional LogicalBufferProto.Location defined_at = 3
The location where the buffer is defined.
int64 color = 4

Location represents an instruction and its shape index, which uniquely identifies a point where a buffer is needed.

Used in: BufferAssignmentProto.BufferAlias, LogicalBufferProto

string computation_name = 1
NOTE: module_name isn't necessary, since all LogicalBuffers are associated with a single HloModule.
string instruction_name = 2
repeated int64 shape_index = 3

Symbolization metadata for HLO Instructions. This metadata is used for debugging XLA code generation, as well as performance profiling of XLA-generated executables.

Used in: HloInstructionProto

string op_type = 1
The framework op name that generated this XLA op. Frameworks that build on top of XLA should mirror the names of their ops back to users by specifying the op_type. In this way, even if the framework's "ops" are implemented as multiple XLA HLO Ops, they can be grouped appropriately. (e.g. if a SoftMax layer is emitted into XLA as multiple ops, then each op should have the op_type be "SoftMax".)
string op_name = 2
The user-specified name of the op. This name is often unique within a computation. Note: some frameworks add auto-generated names if the user does not provide one.
string source_file = 3
Indicate a file and line that this op is associated to in a user's program. e.g. it could be the file and line of user code that generated the op.
int32 source_line = 4

Used in: HloInstructionProto

OpSharding.Type type = 1
optional ShapeProto tile_shape = 2
The shape of the sharded tile.
repeated int64 tile_assignment_dimensions = 3
The shape of the tile assignment tensor - this must be the same rank as tile_shape and the product of its dimensions must equal tile_assignment_devices.size().
repeated int64 tile_assignment_devices = 4
Flattened list of device IDs. The order of flattening is the same as used by IndexUtil::MultiToLinearIndex(tile_assignment_shape).
repeated OpSharding tuple_shardings = 5
If type == TUPLE, the sub-shardings, one per leaf node in the tuple shape, in pre-order. The tuple shape could be nested; here we store just a flattened list of all leaves in the tuple shape. Note that the tuple shape is not stored here; shardings do not store the shapes to which they are applied, this is inferred from the instruction this sharding gets attached to.

Used in: OpSharding

REPLICATED = 0
This sharding is replicated across all devices (implies maximal, all other fields are unused).
MAXIMAL = 1
This sharding is maximal - one device runs the entire operation.
TUPLE = 2
This sharding is a tuple - only the tuple_shardings field is valid.
OTHER = 3
None of the above; tile_shape and tile_assignment are both used.

Describes the padding configuration for Pad operation. The padding amount on both edges as well as between the elements are specified for each dimension.

Used in: HloInstructionProto

repeated PaddingConfig.PaddingConfigDimension dimensions = 1
The padding configuration for all dimensions.

Describes the padding configuration for a dimension.

Used in: PaddingConfig

int64 edge_padding_low = 1
Padding amount on the low-end (next to the index 0). May be negative.
int64 edge_padding_high = 2
Padding amount on the high-end (next to the highest index). May be negative.
int64 interior_padding = 3
Padding amount between the elements. May not be negative.

Describes whether all data-parallelism replicas will receive the same parameter data at each buffer.

Used in: HloInstructionProto

repeated bool replicated_at_leaf_buffers = 1
A list of boolean values for the flattened leaf buffers. Each value indicates whether the corresponding leaf buffer is replicated. If this field is empty, it means no buffer is replicated. Otherwise, the number of elements in this field must match the number of leaf buffers in the HLO instruction's shape.

Used to indicate the precision configuration. It has backend specific meaning.

Used in: HloInstructionProto

repeated PrecisionConfig.Precision operand_precision = 1

Used in: PrecisionConfig

DEFAULT = 0
HIGH = 1
HIGHEST = 2

Primitive types are the individual values that can be held in rectangular multidimensional arrays. A description of the rectangular multidimensional array dimensions / primitive type is given by Shape, below.

Used in: ShapeProto

PRIMITIVE_TYPE_INVALID = 0
Invalid primitive type to serve as default.
PRED = 1
Predicates are two-state booleans.
S8 = 2
Signed integral values of fixed width.
S16 = 3
S32 = 4
S64 = 5
U8 = 6
Unsigned integral values of fixed width.
U16 = 7
U32 = 8
U64 = 9
F16 = 10
Floating-point values of fixed width. Note: if f16s are not natively supported on the device, they will be converted to f16 from f32 at arbirary points in the computation.
F32 = 11
BF16 = 16
Truncated 16 bit floating-point format. This is similar to IEEE's 16 bit floating-point format, but uses 1 bit for the sign, 8 bits for the exponent and 7 bits for the mantissa.
F64 = 12
C64 = 15
Complex values of fixed width.
Paired F32 (real, imag), as in std::complex<float>.
C128 = 18
Paired F64 (real, imag), as in std::complex<double>.
TUPLE = 13
A tuple is a polymorphic sequence; e.g. a shape that holds different sub-shapes. They are used for things like returning multiple values from a computation; e.g. a computation that returns weights and biases may have a signature that results in a tuple like (f32[784x2000], f32[2000]) If a shape proto has the tuple element type, it may not have any entries in the dimensions field.
OPAQUE_TYPE = 14
An opaque type used for passing context-specific data to a custom operation. Shapes of this primitive type will have empty dimensions and tuple_shapes fields. (OPAQUE would be a better name for this identifier, but that conflicts with a macro defined in windows.h.)
TOKEN = 17
A token type threaded between side-effecting operations. Shapes of this primitive type will have empty dimensions and tuple_shapes fields.

Shape of the parameters and output of a computation (like a traditional function signature).

Used in: HloComputationProto, HloModuleProto, xrt.XLAComputationConfig

repeated ShapeProto parameters = 1
optional ShapeProto result = 2
repeated string parameter_names = 3

Used in: HloInstructionProto

RNG_INVALID = 0
RNG_UNIFORM = 1
Creates a uniform-distribution-generated random number on the semi-open interval [parameter[0], parameter[1]).
RNG_NORMAL = 2
Creates a normal-distribution-generated random number with mean parameter[0] and standard deviation parameter[1].

Describes the replica groups in a cross replica op (e.g., all-reduce and all-to-all).

Used in: HloInstructionProto

repeated int64 replica_ids = 1
The ids of the replicas that belongs to the same group. The ordering of the ids matters in some ops (e.g., all-to-all).

Describes the dimension numbers for a scatter operation. All the fields are similar to the corresponding fields in GatherDimensionNumbers. Differences are noted below.

Used in: HloInstructionProto

repeated int64 update_window_dims = 1
The set of dimensions in the updates shape that are window dimensions.
repeated int64 inserted_window_dims = 2
The set of window dimensions that must be inserted into the updates shape.
repeated int64 scatter_dims_to_operand_dims = 3
int64 index_vector_dim = 4

A shape describes the number of dimensions in the array, the size of each dimension, and the primitive component type. Tuples are a special case in that they have rank zero and have tuple_shapes defined. See the XLA documentation for more information on shapes and layouts. LINT.IfChange

Used in: CompileRequest, ExecutionOptions, GetShapeResponse, HloInstructionProto, LiteralProto, LoadDataRequest, LoadDataResponse, OpSharding, ProgramShapeProto, TransferFromOutfeedRequest, TransferToClientRequest, gpu.ConvInstructionLog

PrimitiveType element_type = 2
The element type for this shape.
repeated int64 dimensions = 3
The size (number of elements) for each dimension, or an upper bound on the size if the dimension is dynamic. In XLA, dimensions are numbered from 0 to N-1 for an N-dimensional array. The first element of 'dimensions' is the size of dimension 0, the second element is the size of dimension 1, and so forth. Empty list indicates a scalar. If the respective element in 'is_dimension_dynamic' is true then the value in this field represents an upper bound on the size of the dimension.
repeated ShapeProto tuple_shapes = 4
For tuples only, the shapes of constituent shapes in the tuple sequence.
optional LayoutProto layout = 5
The layout used to back this shape.
repeated bool is_dynamic_dimension = 6
For arrays, this indicates whether or not each dimension is dynamically-sized. The number of elements in this repeated field should be zero (indicating that no dimensions are dynamic) or equal to the number of elements in the 'dimensions' field.

Describes the source target pair in the collective permute op.

Used in: HloInstructionProto

int64 source = 1
int64 target = 2

Describes a tile used in tiling-based layout. Refer to g3doc/third_party/tensorflow/compiler/xla/g3doc/layout_with_tiling.md for details about tiling-based layout.

Used in: LayoutProto

repeated int64 dimensions = 1
Number of elements in each dimension of the tile. It's ordered from the most major dimension of the tile to the most minor dimension of the tile. The dimensions correspond to a suffix of the dimensions of the shape being tiled.

Used in: HloInstructionProto

bool left_side = 1
If true, solves ax = b. If false, solves xa = b.
bool lower = 2
If true, 'a' is lower triangular. If false, 'a' is upper triangular.
bool unit_diagonal = 3
If true, the diagonal elements of 'a' are assumed to be 1 and not accessed.
TriangularSolveOptions.Transpose transpose_a = 4

Should we transpose or use the adjoint of 'a'?

Used in: TriangularSolveOptions

TRANSPOSE_INVALID = 0
NO_TRANSPOSE = 1
Don't transpose 'a'.
TRANSPOSE = 2
Transpose 'a'.
ADJOINT = 3
Complex conjugate and transpose 'a'.

A backend-config for kWhile loops that stores the loop's trip count, if it is known. This is useful for backends that can implement a `for i in 0..N` loop more efficiently than a `while` loop. For example, on GPUs, we can implement a `for i in 0..N` loop by enqueueing the kernels for the loop body N times, whereas implementing a `while` loop requires a host-device sync on each iteration.

optional WhileLoopBackendConfig.KnownTripCount known_trip_count = 1
This indirection lets us distinguish between known-trip-count == 0 and unknown-trip-count.

Used in: WhileLoopBackendConfig

int64 n = 1

Describes the windowing in an operation such as convolution. The window is moved across a base area and for each position of the window a computation is performed. The field below describes the window and the movement of the window across a base area.

Used in: HloInstructionProto

repeated WindowDimension dimensions = 1

Used in: Window

int64 size = 1
The size of the window in this dimension. For a rectangle, this would be the width or height.
int64 stride = 2
The stride at which the window moves across the base area in this dimension. In other words, this is the spacing between different positions of the window in this dimension.
int64 padding_low = 3
If positive, means the amount of padding to add to the base area at the low end of this dimension; if negative, its negative means the number of elements removed from the low end of this dimension. For example, in the horizontal dimension of a rectangle, this would be the number of padding values to pad on the left, given that indices increase when going right. The actual padding value depends upon the context. Convolution pads with zeros. ReduceWindow and SelectAndScatter pads with the reduce function's init value.
int64 padding_high = 4
As padding_low, but on the high end of this dimension. For example, in the horizontal dimension of a rectangle, this would be the number of values to pad on the right, given that indices increase when going right.
int64 window_dilation = 5
Dilation factor of the sliding window in this dimension. A dilation factor of 1 means no dilation. window_dilation - 1 no-op entries ("holes") are implicitly placed between each kernel element. This value may not be less than 1. See documentation for convolution.
int64 base_dilation = 6
Dilation factor of the base area in this dimension. A dilation factor of 1 means no dilation. base_dilation - 1 no-op entries ("holes") are implicitly placed between each base area element. This value may not be less than 1. See documentation for convolution.
bool window_reversal = 7
Window reversal means that this dimension was logically reversed before the operation.

package xla

service XlaService

rpc Unregister (UnregisterRequest, UnregisterResponse)

message UnregisterRequest

repeated GlobalDataHandle data = 1

message UnregisterResponse

rpc DeconstructTuple (DeconstructTupleRequest, DeconstructTupleResponse)

message DeconstructTupleRequest

optional GlobalDataHandle tuple_handle = 2

message DeconstructTupleResponse

repeated GlobalDataHandle element_handles = 1

rpc Unpack (UnpackRequest, UnpackResponse)

message UnpackRequest

optional GlobalDataHandle data = 1

message UnpackResponse

repeated GlobalDataHandle tied_data = 1

rpc GetShape (GetShapeRequest, GetShapeResponse)

message GetShapeRequest

optional GlobalDataHandle data = 1

message GetShapeResponse

optional ShapeProto shape = 1

rpc GetComputationGraphStats (ComputationGraphStatsRequest, ComputationStatsResponse)

message ComputationGraphStatsRequest

optional HloModuleProto computation = 1

optional DebugOptions debug_options = 2

message ComputationStatsResponse

optional ComputationStats stats = 1

rpc LoadData (LoadDataRequest, LoadDataResponse)

message LoadDataRequest

string columnio_tablet_path = 1

string columnio_field = 2

optional ShapeProto element_shape = 3

int64 offset = 4

int64 limit = 5

bool zip = 6

message LoadDataResponse

optional GlobalDataHandle data = 1

optional ShapeProto data_shape = 2

int64 available_rows = 3

int64 rows_loaded = 4

int64 nanoseconds = 5

rpc TransferToClient (TransferToClientRequest, TransferToClientResponse)

message TransferToClientRequest

optional GlobalDataHandle data = 1

optional ShapeProto shape_with_layout = 2

message TransferToClientResponse

optional LiteralProto literal = 1

rpc TransferToServer (TransferToServerRequest, TransferToServerResponse)

message TransferToServerRequest

optional LiteralProto literal = 1

optional DeviceHandle device_handle = 2

message TransferToServerResponse

optional GlobalDataHandle data = 1

rpc TransferToInfeed (TransferToInfeedRequest, TransferToInfeedResponse)

message TransferToInfeedRequest

optional LiteralProto literal = 1

int64 replica_id = 2

optional DeviceHandle device_handle = 3

message TransferToInfeedResponse

rpc TransferFromOutfeed (TransferFromOutfeedRequest, TransferFromOutfeedResponse)

message TransferFromOutfeedRequest

optional ShapeProto shape_with_layout = 1

int64 replica_id = 2

optional DeviceHandle device_handle = 3

message TransferFromOutfeedResponse

optional LiteralProto literal = 1

rpc ResetDevice (ResetDeviceRequest, ResetDeviceResponse)

message ResetDeviceRequest

optional DeviceHandle device_handle = 1

message ResetDeviceResponse

rpc ComputeConstantGraph (ComputeConstantGraphRequest, ComputeConstantResponse)

message ComputeConstantGraphRequest

optional HloModuleProto computation = 1

optional LayoutProto output_layout = 2

message ComputeConstantResponse

optional LiteralProto literal = 1

rpc GetDeviceHandles (GetDeviceHandlesRequest, GetDeviceHandlesResponse)

message GetDeviceHandlesRequest

int64 device_count = 1

message GetDeviceHandlesResponse