package google.cluster_data

Get desktop application:
View/edit binary Protocol Buffers messages

Collection events apply to the collection as a whole.

Common fields shared between instances and collections.

optional int64 time = 1
Timestamp, in microseconds since the start of the trace.
optional EventType type = 2
What type of event is this?
optional int64 collection_id = 3
The identity of the collection.
optional LatencySensitivity scheduling_class = 4
How latency-sensitive is the collection?
optional MissingType missing_type = 5
Was there any missing data? If so, why?
optional CollectionType collection_type = 6
What type of collection is this?
optional int32 priority = 7
Cluster-level scheduling priority for the collection.
optional int64 alloc_collection_id = 8
The ID of the alloc set that this job is to run in, or NO_ALLOC_COLLECTION (only for jobs).
optional string user = 9
The user who runs the collection
optional string collection_name = 10
Obfuscated name of the collection.
optional string collection_logical_name = 11
Obfuscated logical name of the collection.
optional int64 parent_collection_id = 12
ID of the collection that this is a child of. (Used for stopping a collection when the parent terminates.)
repeated int64 start_after_collection_ids = 13
IDs of collections that must finish before this collection may start.
optional int32 max_per_machine = 14
Maximum number of instances of this collection that may be placed on one machine (or 0 if unlimited).
optional int32 max_per_switch = 15
Maximum number of instances of this collection that may be placed on machines connected to a single Top of Rack switch (or 0 if unlimited).
optional VerticalScalingSetting vertical_scaling = 16
How/whether vertical scaling should be done for this collection.
optional Scheduler scheduler = 17
The preferred cluster scheduler to use.

Collections are either jobs (which have tasks) or alloc sets (which have alloc instances).

Used in: CollectionEvent, InstanceEvent, InstanceUsage

JOB = 0
ALLOC_SET = 1

Values used to indicate "not present" for special cases.

NO_MACHINE = 0
The thing is not bound to a machine.
DEDICATED_MACHINE = -1
The thing is bound to a dedicated machine.
NO_ALLOC_COLLECTION = 0
The thing is not running in an alloc set.
NO_ALLOC_INDEX = -1
The thing does not have an alloc instance index.

This enum is used in the 'type' field of the CollectionEvent and InstanceEvent tables.

Used in: CollectionEvent, InstanceEvent

SUBMIT = 0
The collection or instance was submitted to the scheduler for scheduling.
QUEUE = 1
The collection or instance was marked not eligible for scheduling by the batch scheduler.
ENABLE = 2
The collection or instance became eligible for scheduling.
SCHEDULE = 3
The collection or instance started running.
EVICT = 4
The collection or instance was descheduled because of a higher priority collection or instance, or because the scheduler overcommitted resources.
FAIL = 5
The collection or instance was descheduled due to a failure.
FINISH = 6
The collection or instance completed normally.
KILL = 7
The collection or instance was cancelled by the user or because a depended-upon collection died.
LOST = 8
The collection or instance was presumably terminated, but due to missing data there is insufficient information to identify when or how.
UPDATE_PENDING = 9
The collection or instance was updated (scheduling class or resource requirements) while it was waiting to be scheduled.
UPDATE_RUNNING = 10
The collection or instance was updated while it was scheduled somewhere.

Instance and collection events both share a common prefix, followed by specific fields. Information about an instance event (task or alloc instance).

Common fields shared between instances and collections.

optional int64 time = 1
Timestamp, in microseconds since the start of the trace.
optional EventType type = 2
What type of event is this?
optional int64 collection_id = 3
The identity of the collection that this instance is part of.
optional LatencySensitivity scheduling_class = 4
How latency-sensitive is the instance?
optional MissingType missing_type = 5
Was there any missing data? If so, why?
optional CollectionType collection_type = 6
What type of collection this instance belongs to.
optional int32 priority = 7
Cluster-level scheduling priority for the instance.
optional int64 alloc_collection_id = 8
(Tasks only) The ID of the alloc set that this task is running in, or NO_ALLOC_COLLECTION if it is not running in an alloc.
optional int32 instance_index = 9
Begin: fields specific to instances The index of the instance in its collection (starts at 0).
optional int64 machine_id = 10
The ID of the machine on which this instance is placed (or NO_MACHINE if not placed on one, or DEDICATED_MACHINE if it's on a dedicated machine).
optional int32 alloc_instance_index = 11
(Tasks only) The index of the alloc instance that this task is running in, or NO_ALLOC_INDEX if it is not running in an alloc.
optional Resources resource_request = 12
The resources requested when the instance was submitted or last updated.
repeated MachineConstraint constraint = 13
Currently active scheduling constraints.

Information about resource consumption (usage) during a sample window (which is typically 300s, but may be shorter if the instance started and/or ended during a measurement window).

optional int64 start_time = 1
Sample window end points, in microseconds since the start of the trace.
optional int64 end_time = 2
optional int64 collection_id = 3
ID of collection that this instance belongs to.
optional int32 instance_index = 4
Index of this instance's position in that collection (starts at 0).
optional int64 machine_id = 5
Unique ID of the machine on which the instance has been placed.
optional int64 alloc_collection_id = 6
ID and index of the alloc collection + instance in which this instance is running, or NO_ALLOC_COLLECTION / NO_ALLOC_INDEX if it is not running inside an alloc.
optional int64 alloc_instance_index = 7
optional CollectionType collection_type = 8
Type of the collection that this instance belongs to.
optional Resources average_usage = 9
Average (mean) usage over the measurement period.
optional Resources maximum_usage = 10
Observed maximum usage over the measurement period. This measurement may be fully or partially missing in some cases.
optional Resources random_sample_usage = 11
Observed CPU usage during a randomly-sampled second within the measurement window. (No memory data is provided here.)
optional float assigned_memory = 12
The memory limit imposed on this instance; normally, it will not be allowed to exceed this amount of memory.
optional float page_cache_memory = 13
Amount of memory that is used for the instance's file page cache in the OS kernel.
optional float cycles_per_instruction = 14
Average (mean) number of processor and memory cycles per instruction.
optional float memory_accesses_per_instruction = 15
optional float sample_rate = 16
The average (mean) number of data samples collected per second (e.g., sample_rate=0.5 means a sample every 2 seconds on average).
repeated float cpu_usage_distribution = 17
CPU usage percentile data. The cpu_usage_distribution vector contains 10 elements, representing 0%ile (aka min), 10%ile, 20%ile, ... 90%ile, 100%ile (aka max) of the normalized CPU usage in NCUs. Note that the 100%ile may not exactly match the maximum_usage value because of interpolation effects.
repeated float tail_cpu_usage_distribution = 18
The tail_cpu_usage_distribution vector contains 9 elements, representing 91%ile, 92%ile, 93%ile, ... 98%ile, 99%ile of the normalized CPU resource usage in NCUs.

How latency-sensitive a thing is to CPU scheduling delays when running on a machine, in increasing-sensitivity order. Note that this is _not_ the same as the thing's cluster-scheduling priority although latency-sensitive things do tend to have higher priorities.

Used in: CollectionEvent, InstanceEvent

MOST_INSENSITIVE = 0
Also known as "best effort".
INSENSITIVE = 1
Often used for batch jobs.
SENSITIVE = 2
Used for latency-sensitive jobs.
MOST_SENSITIVE = 3
Used for the most latency-senstive jobs.

A machine attribute update or (if time = 0) its initial value.

optional int64 time = 1
Timestamp, in microseconds since the start of the trace. [key]
optional int64 machine_id = 2
Unique ID of the machine within the cluster. [key]
optional string name = 3
Obfuscated unique name of the attribute (unique across all clusters). [key]
optional string value = 4
Value of the attribute. If this is unset, then 'deleted' must be true.
optional bool deleted = 5
True if the attribute is being deleted at this time.

A constraint represents a request for a thing to be placed on a machine (or machines) with particular attributes.

Used in: InstanceEvent

optional string name = 1
Obfuscated name of the constraint.
optional string value = 2
Target value for the constraint (e.g., a minimum or equality).
optional MachineConstraint.Relation relation = 3
Comparison operator.

Comparison operation between the supplied value and the machine's value. For EQUAL and NOT_EQUAL relationships, the comparison is a string comparison; for LESS_THAN, GREATER_THAN, etc., the values are converted to floating point numbers first; for PRESENT and NOT_PRESENT, the test is merely whether the supplied attribute exists for the machine in question, and the value field of the constraint is ignored.

Used in: MachineConstraint

EQUAL = 0
NOT_EQUAL = 1
LESS_THAN = 2
GREATER_THAN = 3
LESS_THAN_EQUAL = 4
GREATER_THAN_EQUAL = 5
PRESENT = 6
NOT_PRESENT = 7

Machine events describe the addition, removal, or update (change) of a machine in the cluster at a particular time.

optional int64 time = 1
Timestamp, in microseconds since the start of the trace. [key]
optional int64 machine_id = 2
Unique ID of the machine within the cluster. [key]
optional MachineEvent.EventType type = 3
Specifies the type of event
optional string switch_id = 4
Obfuscated name of the Top of Rack switch that this machine is attached to.
optional Resources capacity = 5
Available resources that the machine supplies. (Note: may be smaller than the physical machine's raw capacity.)
optional string platform_id = 6
An obfuscated form of the machine platform (microarchitecture + motherboard design).
optional MachineEvent.MissingDataReason missing_data_reason = 7
Did we detect possibly-missing data?

Used in: MachineEvent

EVENT_TYPE_UNKNOWN = 0
Should never happen :-).
ADD = 1
Machine added to the cluster.
REMOVE = 2
Machine removed from cluster (usually due to failure or repairs).
UPDATE = 3
Machine capacity updated (while not removed).

If we detect that data is missing, why do we know this?

Used in: MachineEvent

MISSING_DATA_REASON_NONE = 0
No data is missing.
SNAPSHOT_BUT_NO_TRANSITION = 1
We observed that a change to the state of a machine must have occurred from an internal state snapshot, but did not see a corresponding transition event during the trace.

Represents reasons why we synthesized a scheduler event to replace apparently missing data.

Used in: CollectionEvent, InstanceEvent

MISSING_TYPE_NONE = 0
No data was missing.
SNAPSHOT_BUT_NO_TRANSITION = 1
NO_SNAPSHOT_OR_TRANSITION = 2
EXISTS_BUT_NO_CREATION = 3
TRANSITION_MISSING_STEP = 4
TOO_MANY_EVENTS = 5

A common structure for CPU and memory resource units. All resource measurements are normalized and scaled.

Used in: InstanceEvent, InstanceUsage, MachineEvent

optional float cpus = 1
Normalized GCUs (NCUs).
optional float memory = 2
Normalized RAM bytes.

Represents the type of scheduler that is handling a job.

Used in: CollectionEvent

SCHEDULER_DEFAULT = 0
Handled by the default cluster scheduler.
SCHEDULER_BATCH = 1
Handled by a secondary scheduler, optimized for batch loads.

How the collection is verically auto-scaled.

Used in: CollectionEvent

VERTICAL_SCALING_SETTING_UNKNOWN = 0
We were unable to determine the setting.
VERTICAL_SCALING_OFF = 1
Vertical scaling was disabled, e.g., in the collection creation request.
VERTICAL_SCALING_CONSTRAINED = 2
Vertical scaling was enabled, with user-supplied lower and/or upper bounds for GCU and/or RAM.
VERTICAL_SCALING_FULLY_AUTOMATED = 3
Vertical scaling was enabled, with no user-provided bounds.

package google.cluster_data

message CollectionEvent

optional int64 time = 1

optional EventType type = 2

optional int64 collection_id = 3

optional LatencySensitivity scheduling_class = 4

optional MissingType missing_type = 5

optional CollectionType collection_type = 6

optional int32 priority = 7

optional int64 alloc_collection_id = 8

optional string user = 9

optional string collection_name = 10

optional string collection_logical_name = 11

optional int64 parent_collection_id = 12

repeated int64 start_after_collection_ids = 13

optional int32 max_per_machine = 14

optional int32 max_per_switch = 15

optional VerticalScalingSetting vertical_scaling = 16

optional Scheduler scheduler = 17

enum CollectionType

JOB = 0

ALLOC_SET = 1

enum Constants

NO_MACHINE = 0

DEDICATED_MACHINE = -1

NO_ALLOC_COLLECTION = 0

NO_ALLOC_INDEX = -1

enum EventType

SUBMIT = 0

QUEUE = 1

ENABLE = 2

SCHEDULE = 3

EVICT = 4

FAIL = 5

FINISH = 6

KILL = 7

LOST = 8

UPDATE_PENDING = 9

UPDATE_RUNNING = 10

message InstanceEvent

optional int64 time = 1

optional EventType type = 2

optional int64 collection_id = 3

optional LatencySensitivity scheduling_class = 4

optional MissingType missing_type = 5

optional CollectionType collection_type = 6

optional int32 priority = 7

optional int64 alloc_collection_id = 8

optional int32 instance_index = 9

optional int64 machine_id = 10

optional int32 alloc_instance_index = 11

optional Resources resource_request = 12

repeated MachineConstraint constraint = 13

message InstanceUsage

optional int64 start_time = 1

optional int64 end_time = 2

optional int64 collection_id = 3

optional int32 instance_index = 4

optional int64 machine_id = 5

optional int64 alloc_collection_id = 6

optional int64 alloc_instance_index = 7

optional CollectionType collection_type = 8

optional Resources average_usage = 9

optional Resources maximum_usage = 10

optional Resources random_sample_usage = 11

optional float assigned_memory = 12

optional float page_cache_memory = 13

optional float cycles_per_instruction = 14

optional float memory_accesses_per_instruction = 15

optional float sample_rate = 16

repeated float cpu_usage_distribution = 17

repeated float tail_cpu_usage_distribution = 18

enum LatencySensitivity

MOST_INSENSITIVE = 0

INSENSITIVE = 1

SENSITIVE = 2

MOST_SENSITIVE = 3

message MachineAttribute

optional int64 time = 1

optional int64 machine_id = 2