package tensorflow.data

Get desktop application:
View/edit binary Protocol Buffers messages

rpc ClientHeartbeat (ClientHeartbeatRequest, ClientHeartbeatResponse)
dispatcher.proto:216
Heartbeats from the client. This lets the dispatcher know that the client is still active, and gives the dispatcher a chance to notify the client of new tasks.
message ClientHeartbeatRequest
dispatcher.proto:135
Next tag: 5
- int64 job_client_id = 1
  The job client id to heartbeat for.
- oneof optional_current_round
  Reports which round the client is currently reading from when doing round-robin reads.
  - int64 current_round = 2
- oneof optional_blocked_round
  Reports whether the client has successfully blocked the indicated round from starting. This enables the dispatcher to add a new task in the blocked round or later.
  - int64 blocked_round = 4
message ClientHeartbeatResponse
dispatcher.proto:153
Next tag: 4
- repeated TaskInfo task_info = 1
  A list of all tasks that the client should read from.
- oneof optional_block_round
  Tells the client not to start the given round if possible.
  - int64 block_round = 3
- bool job_finished = 2
  Whether the job has finished.
rpc GetDatasetDef (GetDatasetDefRequest, GetDatasetDefResponse)
dispatcher.proto:187
Gets a dataset defintion.
message GetDatasetDefRequest
dispatcher.proto:39
Next tag: 2
- int64 dataset_id = 1
message GetDatasetDefResponse
dispatcher.proto:44
Next tag: 2
- optional DatasetDef dataset_def = 1
rpc GetOrCreateJob (GetOrCreateJobRequest, GetOrCreateJobResponse)
dispatcher.proto:204
Gets a job if it already exists, otherwise creates it.
message GetOrCreateJobRequest
dispatcher.proto:91
Next tag: 8
- int64 dataset_id = 1
  The id of the dataset to create a job for.
- ProcessingModeDef processing_mode = 2
  A mode controlling how the tf.data service produces data for the job.
- optional JobKey job_key = 5
  Optional job key identifying a shared job. If not set, the RPC will always create a new job.
- oneof optional_num_consumers
  Optional number of consumers. If set, the job's tasks will provide their elements to consumers round-robin.
  - int64 num_consumers = 7
message GetOrCreateJobResponse
dispatcher.proto:108
Next tag: 2
- int64 job_client_id = 1
  An id for the client that will read from the job. When the client is done with the job, they should call ReleaseJobClient with this id.
rpc GetOrRegisterDataset (GetOrRegisterDatasetRequest, GetOrRegisterDatasetResponse)
dispatcher.proto:200
Registers a dataset with the server, or returns its id if it is already registered. The dataset is constructed in a new graph, so it must not refer to external resources or variables.
message GetOrRegisterDatasetRequest
dispatcher.proto:70
Next tag: 2
- optional DatasetDef dataset = 1
  The dataset to register.
message GetOrRegisterDatasetResponse
dispatcher.proto:76
Next tag: 2
- int64 dataset_id = 1
  The id for the registered dataset.
rpc GetSplit (GetSplitRequest, GetSplitResponse)
dispatcher.proto:190
Gets the next split for a given job.
message GetSplitRequest
dispatcher.proto:49
Next tag: 4
- int64 job_id = 1
- int64 repetition = 2
- int64 split_provider_index = 3
message GetSplitResponse
dispatcher.proto:56
Next tag: 3
- optional TensorProto split = 1
- bool end_of_splits = 2
rpc GetVersion (GetVersionRequest, GetVersionResponse)
dispatcher.proto:193
Returns the API version of the server.
message GetVersionRequest
dispatcher.proto:62
Next tag: 1
(message has no fields)
message GetVersionResponse
dispatcher.proto:65
Next tag: 2
- int64 version = 1
rpc GetWorkers (GetWorkersRequest, GetWorkersResponse)
dispatcher.proto:219
Reports a list of all workers registered with the dispatcher.
message GetWorkersRequest
dispatcher.proto:171
Next tag: 1
(message has no fields)
message GetWorkersResponse
dispatcher.proto:174
Next tag: 2
- repeated WorkerInfo workers = 1
  A list of all workers.
rpc MaybeRemoveTask (MaybeRemoveTaskRequest, MaybeRemoveTaskResponse)
dispatcher.proto:207
Attempts to remove a task from a round-robin read job.
message MaybeRemoveTaskRequest
dispatcher.proto:115
Next tag: 4
- int64 task_id = 1
- int64 consumer_index = 2
- int64 round = 3
message MaybeRemoveTaskResponse
dispatcher.proto:122
Next tag: 2
- bool removed = 1
rpc ReleaseJobClient (ReleaseJobClientRequest, ReleaseJobClientResponse)
dispatcher.proto:210
Releases a job client so that a job may eventually be cleaned up.
message ReleaseJobClientRequest
dispatcher.proto:127
Next tag: 2
- int64 job_client_id = 1
message ReleaseJobClientResponse
dispatcher.proto:132
Next tag: 1
(message has no fields)
rpc WorkerHeartbeat (WorkerHeartbeatRequest, WorkerHeartbeatResponse)
dispatcher.proto:181
Performs a periodic worker heartbeat.
message WorkerHeartbeatRequest
dispatcher.proto:17
Next tag: 4
- string worker_address = 1
- string transfer_address = 3
- repeated int64 current_tasks = 2
message WorkerHeartbeatResponse
dispatcher.proto:24
Next tag: 3
- repeated TaskDef new_tasks = 1
- repeated int64 tasks_to_delete = 2
rpc WorkerUpdate (WorkerUpdateRequest, WorkerUpdateResponse)
dispatcher.proto:184
Updates the dispatcher with information about the worker's state.
message WorkerUpdateRequest
dispatcher.proto:30
Next tag: 3
- string worker_address = 1
- repeated TaskProgress updates = 2
message WorkerUpdateResponse
dispatcher.proto:36
Next tag: 1
(message has no fields)

rpc GetElement (GetElementRequest, GetElementResponse)
worker.proto:59
Gets the next dataset element.
message GetElementRequest
worker.proto:14
- int64 task_id = 1
  The task to fetch an element from.
- oneof optional_consumer_index
  Optional index to indentify the consumer.
  - int64 consumer_index = 2
- oneof optional_round_index
  Optional round index, indicating which round of round-robin the consumer wants to read from. This is used to keep consumers in sync.
  - int64 round_index = 3
- bool skipped_previous_round = 4
  Whether the previous round was skipped. This information is needed by the worker to recover after restarts.
- bool allow_skip = 5
  Whether to skip the round if data isn't ready fast enough.
message GetElementResponse
worker.proto:33
- oneof element
  The produced element.
  - CompressedElement compressed = 3
  - UncompressedElement uncompressed = 5
- int64 element_index = 6
  The element's index within the task it came from.
- bool end_of_sequence = 2
  Boolean to indicate whether the iterator has been exhausted.
- bool skip_task = 4
  Indicates whether the round was skipped.
rpc GetWorkerTasks (GetWorkerTasksRequest, GetWorkerTasksResponse)
worker.proto:62
Gets the tasks currently being executed by the worker.
message GetWorkerTasksRequest
worker.proto:48
Named GetWorkerTasks to avoid conflicting with GetTasks in dispatcher.proto
(message has no fields)
message GetWorkerTasksResponse
worker.proto:50
- repeated TaskInfo tasks = 1
rpc ProcessTask (ProcessTaskRequest, ProcessTaskResponse)
worker.proto:56
Processes an task for a dataset, making elements available to clients.
message ProcessTaskRequest
worker.proto:8
- optional TaskDef task = 1
message ProcessTaskResponse
worker.proto:12
(message has no fields)

Next tag: 3

Used in: Update

int64 job_id = 1
int64 job_client_id = 2

Represents the type of auto-sharding we enable.

Used in: DistributeOptions

AUTO = 0
AUTO: Attempts FILE-based sharding, falling back to DATA-based sharding.
FILE = 1
FILE: Shards by input files (i.e. each worker will get a set of files to process). When this option is selected, make sure that there is at least as many files as workers. If there are fewer input files than workers, a runtime error will be raised.
DATA = 2
DATA: Shards by elements produced by the dataset. Each worker will process the whole dataset and discard the portion that is not for itself. Note that for this mode to correctly partitions the dataset elements, the dataset needs to produce elements in a deterministic order.
HINT = 3
HINT: Looks for the presence of `shard(SHARD_HINT, ...)` which is treated as a placeholder to replace with `shard(num_workers, worker_index)`.
OFF = -1
OFF: No sharding will be performed.

Updates dispatcher state based on a client heartbeat. Next tag: 4

Used in: Update

int64 job_client_id = 1
bool task_accepted = 2
optional TaskRejected task_rejected = 3

Metadata describing a compressed component of a dataset element.

Used in: CompressedElement

DataType dtype = 1
The dtype of the component tensor.
optional TensorShapeProto tensor_shape = 2
The shape of the component tensor.
int64 tensor_size_bytes = 3
Size of the uncompressed tensor bytes. For tensors serialized as TensorProtos, this is TensorProto::BytesAllocatedLong(). For raw Tensors, this is the size of the buffer underlying the Tensor.

Used in: GetElementResponse

bytes data = 1
Compressed tensor bytes for all components of the element.
repeated CompressedComponentMetadata component_metadata = 2
Metadata for the components of the element.

Next tag: 9

Used in: Update

int64 job_id = 1
int64 dataset_id = 2
ProcessingModeDef processing_mode = 3
int64 num_split_providers = 8
optional NamedJobKeyDef named_job_key = 4
Only some jobs have names, so this may be unset.
oneof optional_num_consumers
Optional number of consumers. If set, the job's tasks will provide their elements to consumers round-robin.
- int64 num_consumers = 7

Next tag: 6

Used in: Update

int64 task_id = 1
int64 job_id = 2
string worker_address = 3
string transfer_address = 4
int64 starting_round = 5

Next tag: 7

Used in: Update

int64 task_id = 1
int64 job_id = 2
string worker_address = 4
string transfer_address = 6

Next tag: 2

Used in: GetDatasetDefResponse, GetOrRegisterDatasetRequest, TaskDef

optional GraphDef graph = 1
We represent datasets as tensorflow GraphDefs which define the operations needed to create a tf.data dataset.

Used in: Options

AutoShardPolicy auto_shard_policy = 1
oneof optional_num_devices
The number of devices attached to this input pipeline.
- int32 num_devices = 2

Represents how to handle external state during serialization.

Used in: Options

POLICY_WARN = 0
POLICY_IGNORE = 1
POLICY_FAIL = 2

Next tag: 2

Used in: Update

int64 task_id = 1

Next tag: 2

Used in: Update

int64 job_id = 1

Next tag: 3

Used in: GetOrCreateJobRequest

string job_name = 1
A name for the job.
int64 job_name_index = 2
An index for the job. Multiple jobs can be created for the same name, if they have different indices.

Next tag: 3

Used in: CreateJobUpdate

string name = 1
int64 index = 2

Used in: Options

oneof optional_apply_default_optimizations
Whether to apply default graph optimizations. If False, only graph optimizations that have been explicitly enabled will be applied.
- bool apply_default_optimizations = 1
oneof optional_autotune
Whether to automatically tune performance knobs.
- bool autotune = 2
oneof optional_autotune_buffers
When autotuning is enabled (through autotune), determines whether to also autotune buffer sizes for datasets with parallelism.
- bool autotune_buffers = 3
oneof optional_autotune_cpu_budget
When autotuning is enabled (through autotune), determines the CPU budget to use. Values greater than the number of schedulable CPU cores are allowed but may result in CPU contention.
- int32 autotune_cpu_budget = 4
oneof optional_autotune_ram_budget
When autotuning is enabled (through autotune), determines the RAM budget to use. Values greater than the available RAM in bytes may result in OOM. If 0, defaults to half of the available RAM in bytes.
- int64 autotune_ram_budget = 5
oneof optional_filter_fusion
Whether to fuse filter transformations.
- bool filter_fusion = 6
oneof optional_map_and_batch_fusion
Whether to fuse map and batch transformations.
- bool map_and_batch_fusion = 9
oneof optional_map_and_filter_fusion
Whether to fuse map and filter transformations.
- bool map_and_filter_fusion = 10
oneof optional_map_fusion
Whether to fuse map transformations.
- bool map_fusion = 11
oneof optional_map_parallelization
Whether to parallelize stateless map transformations.
- bool map_parallelization = 12
oneof optional_noop_elimination
Whether to eliminate no-op transformations.
- bool noop_elimination = 14
oneof optional_parallel_batch
Whether to parallelize copying of batch elements. This optimization is highly experimental and can cause performance degradation (e.g. when the parallelization overhead exceeds the benefits of performing the data copies in parallel). You should only enable this optimization if a) your input pipeline is bottlenecked on batching and b) you have validated that this optimization improves performance.
- bool parallel_batch = 15
oneof optional_shuffle_and_repeat_fusion
Whether to fuse shuffle and repeat transformations.
- bool shuffle_and_repeat_fusion = 17

Message stored with Dataset objects to control how datasets are processed and optimized.

oneof optional_deterministic
Whether the outputs need to be produced in deterministic order.
- bool deterministic = 1
optional DistributeOptions distribute_options = 2
The distribution strategy options associated with the dataset.
optional OptimizationOptions optimization_options = 3
The optimization options associated with the dataset.
oneof optional_slack
Whether to introduce 'slack' in the last `prefetch` of the input pipeline, if it exists. This may reduce CPU contention with accelerator host-side activity at the start of a step. The slack frequency is determined by the number of devices attached to this input pipeline.
- bool slack = 4
optional ThreadingOptions threading_options = 5
The threading options associated with the dataset.
oneof optional_external_state_policy
This option can be used to override the default policy for how to handle external state when serializing a dataset or checkpointing its iterator. There are three settings available - IGNORE: External state is ignored without a warning; WARN: External state is ignored and a warning is logged; FAIL: External state results in an error.
- ExternalStatePolicy external_state_policy = 6

Next tag: 3

Used in: CreateJobUpdate, GetOrCreateJobRequest, TaskDef

INVALID = 0
PARALLEL_EPOCHS = 1
Each tf.data worker processes an entire epoch.
DISTRIBUTED_EPOCH = 2
Processing of an epoch is distributed across all tf.data workers.

Next tag: 5

Used in: Update

int64 job_id = 1
int64 repetition = 2
int64 split_provider_index = 4
bool finished = 3
Whether the split provider reached its end.

Next tag: 3

Used in: Update

int64 dataset_id = 1
uint64 fingerprint = 2

Next tag: 3

Used in: Update

string worker_address = 1
string transfer_address = 2

Next tag: 3

Used in: Update

int64 job_client_id = 1
int64 time_micros = 2
The time when the client was released, measured in microseconds since the epoch.

Next tag: 2

Used in: Update

int64 task_id = 1

Next tag: 10

Used in: ProcessTaskRequest, WorkerHeartbeatResponse

oneof dataset
The dataset to iterate over.
- DatasetDef dataset_def = 1
- string path = 2
int64 dataset_id = 3
int64 task_id = 4
int64 job_id = 5
int64 num_split_providers = 9
In distributed epoch processing mode, we use one split provider for each source that feeds into the dataset. In parallel_epochs mode, `num_split_providers` is always zero.
string worker_address = 8
Address of the worker that the task is assigned to.
ProcessingModeDef processing_mode = 6
oneof optional_num_consumers
Optional number of consumers. If set, the results of the task will be provided to consumers round-robin.
- int64 num_consumers = 7

Next tag: 6

Used in: ClientHeartbeatResponse, GetWorkerTasksResponse

string worker_address = 1
The address of the worker processing the task.
string transfer_address = 4
The transfer address of the worker processing the task.
int64 task_id = 2
The task id.
int64 job_id = 3
The id of the job that the task is part of.
int64 starting_round = 5
The round to start reading from the task in. For non-round-robin reads, this is always 0.

Next tag: 3

Used in: WorkerUpdateRequest

int64 task_id = 1
The task that this message is about.
bool completed = 2
Whether the task has completed.

Indicates that a client failed to block before reaching the target round. Next tag: 2

Used in: ClientHeartbeatUpdate

int64 new_target_round = 1
A new target round to try adding the task in.

message ThreadingOptions

dataset_options.proto:112

Used in: Options

oneof optional_max_intra_op_parallelism
If set, it overrides the maximum degree of intra-op parallelism.
- int32 max_intra_op_parallelism = 1
oneof optional_private_threadpool_size
If set, the dataset will use a private threadpool of the given size.
- int32 private_threadpool_size = 2

An uncompressed dataset element.

Used in: GetElementResponse

repeated TensorProto components = 1

Message representing journaled dispatcher metadata updates. When we apply one of these changes to the dispatcher's in-memory state, we also write an Update message to the journal. Next tag: 13

oneof update_type
- RegisterDatasetUpdate register_dataset = 1
- RegisterWorkerUpdate register_worker = 5
- CreateJobUpdate create_job = 2
- ProduceSplitUpdate produce_split = 8
- AcquireJobClientUpdate acquire_job_client = 6
- ReleaseJobClientUpdate release_job_client = 7
- GarbageCollectJobUpdate garbage_collect_job = 12
- RemoveTaskUpdate remove_task = 11
- CreatePendingTaskUpdate create_pending_task = 9
- ClientHeartbeatUpdate client_heartbeat = 10
- CreateTaskUpdate create_task = 3
- FinishTaskUpdate finish_task = 4

Next tag: 3

Used in: GetWorkersResponse

string address = 1
int64 id = 2

package tensorflow.data

service DispatcherService

rpc ClientHeartbeat (ClientHeartbeatRequest, ClientHeartbeatResponse)

message ClientHeartbeatRequest

int64 job_client_id = 1

oneof optional_current_round

int64 current_round = 2

oneof optional_blocked_round

int64 blocked_round = 4

message ClientHeartbeatResponse

repeated TaskInfo task_info = 1

oneof optional_block_round

int64 block_round = 3

bool job_finished = 2

rpc GetDatasetDef (GetDatasetDefRequest, GetDatasetDefResponse)

message GetDatasetDefRequest

int64 dataset_id = 1

message GetDatasetDefResponse

optional DatasetDef dataset_def = 1

rpc GetOrCreateJob (GetOrCreateJobRequest, GetOrCreateJobResponse)

message GetOrCreateJobRequest

int64 dataset_id = 1

ProcessingModeDef processing_mode = 2

optional JobKey job_key = 5

oneof optional_num_consumers

int64 num_consumers = 7

message GetOrCreateJobResponse

int64 job_client_id = 1

rpc GetOrRegisterDataset (GetOrRegisterDatasetRequest, GetOrRegisterDatasetResponse)

message GetOrRegisterDatasetRequest

optional DatasetDef dataset = 1

message GetOrRegisterDatasetResponse

int64 dataset_id = 1

rpc GetSplit (GetSplitRequest, GetSplitResponse)

message GetSplitRequest

int64 job_id = 1

int64 repetition = 2

int64 split_provider_index = 3

message GetSplitResponse

optional TensorProto split = 1

bool end_of_splits = 2

rpc GetVersion (GetVersionRequest, GetVersionResponse)

message GetVersionRequest

message GetVersionResponse

int64 version = 1

rpc GetWorkers (GetWorkersRequest, GetWorkersResponse)

message GetWorkersRequest

message GetWorkersResponse

repeated WorkerInfo workers = 1

rpc MaybeRemoveTask (MaybeRemoveTaskRequest, MaybeRemoveTaskResponse)

message MaybeRemoveTaskRequest

int64 task_id = 1

int64 consumer_index = 2

int64 round = 3

message MaybeRemoveTaskResponse

bool removed = 1

rpc ReleaseJobClient (ReleaseJobClientRequest, ReleaseJobClientResponse)

message ReleaseJobClientRequest

int64 job_client_id = 1

message ReleaseJobClientResponse

rpc WorkerHeartbeat (WorkerHeartbeatRequest, WorkerHeartbeatResponse)

message WorkerHeartbeatRequest

string worker_address = 1

string transfer_address = 3

repeated int64 current_tasks = 2

message WorkerHeartbeatResponse

repeated TaskDef new_tasks = 1

repeated int64 tasks_to_delete = 2

rpc WorkerUpdate (WorkerUpdateRequest, WorkerUpdateResponse)

message WorkerUpdateRequest

string worker_address = 1

repeated TaskProgress updates = 2

message WorkerUpdateResponse

service WorkerService

rpc GetElement (GetElementRequest, GetElementResponse)

message GetElementRequest

int64 task_id = 1

oneof optional_consumer_index

int64 consumer_index = 2

oneof optional_round_index