package mediapipe

Get desktop application:
View/edit binary Protocol Buffers messages

Affine according to ( [a b * x + [dx; ( c d] dy]

optional float dx = 1
optional float dy = 2
optional float a = 3
optional float b = 4
optional float c = 5
optional float d = 6

Transforms a 3D color vector x = (c1, c2, c3) according to [ g_00 g_01 g_02 g_03 * [ c1 g_10 g_11 g_12 g_13 c2 g_20 g_21 g_22 g_23 ] c3 1 ]

Used in: MixtureAffineToneModel, ToneChange

optional float g_00 = 1
optional float g_01 = 2
optional float g_02 = 3
optional float g_03 = 4
optional float g_10 = 5
optional float g_11 = 6
optional float g_12 = 7
optional float g_13 = 8
optional float g_20 = 9
optional float g_21 = 10
optional float g_22 = 11
optional float g_23 = 12

The anchor representation for object detection.

required float x_center = 1
Encoded anchor box center.
required float y_center = 2
required float h = 3
Encoded anchor box height.
required float w = 4
Encoded anchor box width.

Options for the AnnotationOverlayCalculator.

optional int32 canvas_width_px = 2
The canvas width and height in pixels, and the background color. These options are used only if an input stream of ImageFrame isn't provided to the renderer calculator. If an input stream of ImageFrame is provided, then the calculator renders the annotations on top of the provided image, else a canvas is created with the dimensions and background color specified in these options and the annotations are rendered on top of this canvas.
optional int32 canvas_height_px = 3
optional Color canvas_color = 4
optional bool flip_text_vertically = 5
Whether text should be rendered upside down. When it's set to false, text is rendered normally assuming the underlying image has its origin at the top-left corner. Therefore, for images with the origin at the bottom-left corner this should be set to true.
optional bool gpu_uses_top_left_origin = 6
Whether input stream IMAGE_GPU (OpenGL texture) has bottom-left or top-left origin. (Historically, OpenGL uses bottom left origin, but most MediaPipe examples expect textures to have top-left origin.)
optional float gpu_scale_factor = 7
Scale factor for intermediate image for GPU rendering. This can be used to speed up annotation by drawing the annotation on an intermediate image with a reduced scale, e.g. 0.5 (of the input image width and height), before resizing and overlaying it on top of the input image.

optional float min_similarity_threshold = 1

repeated AudioStreamOptions audio_stream = 1
optional double start_time = 2
The start time in seconds to decode.
optional double end_time = 3
The end time in seconds to decode (inclusive).

Used in: AudioDecoderOptions

optional int64 stream_index = 1
The stream to decode. Stream indexes start from 0 (audio and video are handled separately).
optional bool allow_missing = 2
Process the file despite this stream not being present.
optional bool ignore_decode_failures = 3
If true, failures to decode a frame of data will be ignored.
optional bool output_regressing_timestamps = 4
Output packets with regressing timestamps. By default those packets are dropped.
optional bool correct_pts_for_rollover = 5
MPEG PTS timestamps roll over back to 0 after 26.5h. If this flag is set we detect any rollover and continue incrementing timestamps past this point. Set this flag if you want non-regressing timestamps for MPEG content where the PTS may roll over.

optional float sigma_color = 1
Max variance in color allowed, based on normalized color values.
optional float sigma_space = 2
Window radius. Results in a '(sigma_space*2+1) x (sigma_space*2+1)' size kernel. This should be set based on output image pixel space.

Binary feature descriptor for a particular feature. For example: orb http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.370.4395&rep=rep1&type=pdf

Used in: BoxDetectorIndex.BoxEntry.FrameEntry, RegionFlowFeature, TrackingData.MotionData

optional bytes data = 1

TrackingData in compressed binary format. Obtainable via FlowPackager::EncodeTrackingData. Details of binary encode are below.

TrackingContainer::header = "TRAK"

Used in: TrackingContainerProto

optional bytes data = 1

A representation of a bounding box.

Used in: Locus

optional int32 left_x = 1
optional int32 upper_y = 2
optional int32 right_x = 3
optional int32 lower_y = 4

optional BoxDetectorOptions detector_options = 1
repeated string index_proto_filename = 2
File path to the template index files.

Proto to hold BoxDetector's internal search index.

repeated BoxDetectorIndex.BoxEntry box_entry = 1

message BoxDetectorIndex.BoxEntry

box_detector.proto:83

Message to hold keypoints and descriptors for each box.

Used in: BoxDetectorIndex

repeated BoxEntry.FrameEntry frame_entry = 1

message BoxDetectorIndex.BoxEntry.FrameEntry

box_detector.proto:87

Message to hold keypoints and descriptors for each appearance. One box could have multiple appearances to account for shape and perspective change, etc..

Used in: BoxEntry

optional TimedBoxProto box = 1
repeated float keypoints = 2
repeated BinaryFeatureDescriptor descriptors = 3

Used in: BoxDetectorCalculatorOptions

optional BoxDetectorOptions.IndexType index_type = 1
optional int32 detect_every_n_frame = 2
Decide whether we force detector run every N frame. 0 means detection will never be called. 1 means detect every frame. 2 means detect every other frame. etc.. Currently only applied to image query mode.
optional bool detect_out_of_fov = 4
Enable box detection when tracked boxes is out of FOV. Detection will be ceased after the detector successfully re-acquire the box.
optional BoxDetectorOptions.ImageQuerySettings image_query_settings = 3
Options for detection function with image query.
optional int32 descriptor_dims = 5
Dimensions (number of elements) for feature descriptor.
optional int32 min_num_correspondence = 6
Minimum number of correspondence to go through RANSAC.
optional float ransac_reprojection_threshold = 7
Reprojection threshold for RANSAC to find inliers.
optional float max_match_distance = 8
Max distance to match 2 NIMBY features.
optional float max_perspective_factor = 9
Max persepective change factor.

Options only for detection from image queries.

Used in: BoxDetectorOptions

optional int32 pyramid_bottom_size = 1
Resize the input image's longer edge to this size. Skip resizing if the input size is already smaller than this size.
optional float pyramid_scale_factor = 2
Scale factor between adjacent pyramid levels.
optional int32 max_pyramid_levels = 3
Maximum number of pyramid levels.
optional int32 max_features = 4
Max number of features the detector uses.

Available types of detector's index and search structure.

Used in: BoxDetectorOptions

INDEX_UNSPECIFIED = 0
OPENCV_BF = 1
BFMatcher from OpenCV

optional BoxTrackerOptions tracker_options = 1
optional TimedBoxProtoList initial_position = 2
Initial position to be tracked. Can also be supplied as side packet or as input stream.
optional bool visualize_tracking_data = 3
If set and VIZ stream is present, renders tracking data into the visualization.
optional bool visualize_state = 4
If set and VIZ stream is present, renders the box state into the visualization.
optional bool visualize_internal_state = 5
If set and VIZ stream is present, renders the internal box state into the visualization.
optional int32 streaming_track_data_cache_size = 6
Size of the track data cache during streaming mode. This allows to buffer track_data's for fast forward tracking, i.e. any TimedBox received via input stream START_POS can be tracked towards the current track head (i.e. last received TrackingData). Measured in number of frames.
optional int32 start_pos_transition_frames = 7
Add a transition period of N frames to smooth the jump from original tracking to reset start pos with motion compensation. The transition will be a linear decay of original tracking result. 0 means no transition.

Used in: BoxTrackerCalculatorOptions

optional int32 caching_chunk_size_msec = 1
Chunk size for caching files. Should be equal to those written by the FlowPackagerCalculator.
optional string cache_file_format = 2
Chunk file format.
optional int32 num_tracking_workers = 3
Number of simultaneous tracking requests.
optional int32 read_chunk_timeout_msec = 4
Maximum waiting time for next chunk, till function times out.
optional bool record_path_states = 5
If set, box tracker will record the state for each computed TimedBox across all paths.
optional TrackStepOptions track_step_options = 6
Actual tracking options to be used for every step.

optional double test_field = 1

Describes the topology and function of a MediaPipe Graph. The graph of Nodes must be a Directed Acyclic Graph (DAG) except as annotated by "back_edge" in InputStreamInfo. Use a mediapipe::CalculatorGraph object to run the graph.

Used in: CalculatorGraphTemplate, GraphProfile

repeated CalculatorGraphConfig.Node node = 1
The nodes.
repeated PacketFactoryConfig packet_factory = 6
Create a side packet using a PacketFactory. This side packet is created as close to the worker that does the work as possible. A PacketFactory is basically a PacketGenerator that takes no input side packets and produces a single output side packet.
repeated PacketGeneratorConfig packet_generator = 7
Configs for PacketGenerators. Generators take zero or more input side packets and produce any number of output side packets. For example, MediaDecoderCalculator takes an input side packet with type DeletingFile. However, most users want to specify videos by ContentIdHex (i.e. video id). By using the VideoIdToLocalFileGenerator, a user can specify a video id (as a string) and obtain a DeletingFile to use with the decoder. PacketGenerators can take as a input side packet the output side packet of another PacketGenerator. The graph of PacketGenerators must be a directed acyclic graph.
int32 num_threads = 8
Number of threads for running calculators in multithreaded mode. If not specified, the scheduler will pick an appropriate number of threads depending on the number of available processors. To run on the calling thread, specify "ApplicationThreadExecutor" see: http://g3doc/mediapipe/g3doc/running.md.
repeated StatusHandlerConfig status_handler = 9
Configs for StatusHandlers that will be called after each call to Run() on the graph. StatusHandlers take zero or more input side packets and the absl::Status returned by a graph run. For example, a StatusHandler could store information about graph failures and their causes for later monitoring. Note that graph failures during initialization may cause required input side packets (created by a PacketFactory or PacketGenerator) to be missing. In these cases, the handler with missing input side packets will be skipped.
repeated string input_stream = 10
Specify input streams to the entire graph. Streams specified here may have packets added to them using CalculatorGraph::AddPacketToInputStream. This works much like a source calculator, except that the source is outside of the mediapipe graph.
repeated string output_stream = 15
Output streams for the graph when used as a subgraph.
repeated string input_side_packet = 16
Input side packets for the graph when used as a subgraph.
repeated string output_side_packet = 17
Output side packets for the graph when used as a subgraph.
int32 max_queue_size = 11
Maximum queue size of any input stream in the graph. This can be used to control the memory usage of a MediaPipe graph by preventing fast sources from flooding the graph with packets. Any source that is connected to an input stream that has hit its maximum capacity will not be scheduled until the queue size falls under the specified limits, or if the scheduler queue is empty and no other nodes are running (to prevent possible deadlocks due to a incorrectly specified value). This global parameter is set to 100 packets by default to enable pipelining. If any node indicates that it buffers packets before emitting them, then the max(node_buffer_size, max_queue_size) is used. Set this parameter to -1 to disable throttling (i.e. the graph will use as much memory as it requires). If not specified, the limit is 100 packets.
bool report_deadlock = 21
If true, the graph run fails with an error when throttling prevents all calculators from running. If false, max_queue_size for an input stream is adjusted when throttling prevents all calculators from running.
optional InputStreamHandlerConfig input_stream_handler = 12
Config for this graph's InputStreamHandler. If unspecified, the framework will automatically install the default handler, which works as follows. The calculator's Process() method is called for timestamp t when: - at least one stream has a packet available at t; and, - all other streams either have packets at t, or it is known that they will not have packets at t (i.e. their next timestamp bound is greater than t). The handler then provides all available packets with timestamp t, with no preprocessing.
optional OutputStreamHandlerConfig output_stream_handler = 13
Config for this graph's OutputStreamHandler. If unspecified, the default output stream handler will be automatically installed by the framework which does not modify any outgoing packets.
repeated ExecutorConfig executor = 14
Configs for Executors. The names of the executors must be distinct. The default executor, whose name is the empty string, is predefined. The num_threads field of the CalculatorGraphConfig specifies the number of threads in the default executor. If the config for the default executor is specified, the CalculatorGraphConfig must not have the num_threads field.
optional ProfilerConfig profiler_config = 18
The default profiler-config for all calculators. If set, this defines the profiling settings such as num_histogram_intervals for every calculator in the graph. Each of these settings can be overridden by the |profiler_config| specified for a node.
string package = 19
The namespace used for class name lookup within this graph. An unqualified or partially qualified class name is looked up in this namespace first and then in enclosing namespaces.
string type = 20
The type name for the graph config, used for registering and referencing the graph config.
optional MediaPipeOptions options = 1001
The types and default values for graph options, in proto2 syntax.
repeated google.protobuf.Any graph_options = 1002
The types and default values for graph options, in proto3 syntax.

A single node in the DAG.

Used in: CalculatorGraphConfig, SwitchContainerOptions

string name = 1
The name of the node. This field is optional and doesn't generally need to be specified, but does improve error messaging.
string calculator = 2
The registered type of a calculator (provided via REGISTER_CALCULATOR), or of a subgraph (via REGISTER_MEDIAPIPE_GRAPH).
A Calculator can choose to access its input streams, output streams, and input side packets either by tag or by index. If the calculator chooses indexes then it will receive the streams or side packets in the same order as they are specified in this proto. If the calculator chooses to use tags then it must specify a tag along with each name. The field is given as "TAG:name". Meaning a tag name followed by a colon followed by the name. Tags use only upper case letters, numbers, and underscores, whereas names use only lower case letters, numbers, and underscores. Example: Node { calculator: "SomeAudioVideoCalculator" # This calculator accesses its inputs by index (no tag needed). input_stream: "combined_input" # This calculator accesses its outputs by tags, so all # output_streams must specify a tag. output_stream: "AUDIO:audio_stream" output_stream: "VIDEO:video_stream" # This calculator accesses its input side packets by tag. input_side_packet: "MODEL:model_01" }
repeated string input_stream = 3
String(s) representing "TAG:name" of the stream(s) from which the current node will get its inputs. "TAG:" part is optional, see above. A calculator with no input stream is a source.
repeated string output_stream = 4
String(s) representing "TAG:name" of the stream(s) produced by this node. "TAG:" part is optional, see above. These must be different from any other output_streams specified for other nodes in the graph.
repeated string input_side_packet = 5
String(s) representing "TAG:name" of the input side packet(s). "TAG:" part is optional, see above.
repeated string output_side_packet = 6
String(s) representing "TAG:name" of the output side packet(s). Only used by subgraphs. "TAG:" part is optional, see above.
optional CalculatorOptions options = 7
The options passed to the Calculator, in proto2 syntax.
repeated google.protobuf.Any node_options = 8
The options passed to the Calculator, in proto3 syntax. Each node_options message must have a different message type. If the same message type is specified in |options| and |node_options|, only the message in |options| is used.
int32 source_layer = 9
For a Source Calculator (i.e. a calculator with no inputs), this is the "layer" on which the calculator is executed. For a non-source calculator (i.e. a calculator with one or more input streams) this field has no effect. The sources on each layer are completely exhausted before Process() is called on any source calculator on a higher numbered layer. Example: Decoder -> Median Frame (requires all frames) -> Image Subtraction ---------------------------------------> The entire video will be buffered on the edge from the decoder to the Image subtraction. To fix this problem, layers can be used. Decoder (layer 0) -> Median Frame -> Image Subtraction Decoder (layer 1) -----------------> The frames from layer 0 will no longer be buffered, but the video will be decoded again instead. Note, that different options can be used in the second decoder.
int32 buffer_size_hint = 10
Optional parameter that allows the user to indicate to the scheduler that this node has a buffering behavior (i.e. waits for a bunch of packets before emitting any) and specify the size of the buffer that is built up. The scheduler will then try to keep the maximum size of any input queues in the graph to remain below the maximum of all buffer_size_hints and max_queue_size (if specified). The ideal value is typically something larger than the actual number of buffered packets to maintain pipelining. The default value 0 indicates that the node has no buffering behavior.
optional InputStreamHandlerConfig input_stream_handler = 11
Config for this node's InputStreamHandler. If unspecified, the graph-level input stream handler will be used.
optional OutputStreamHandlerConfig output_stream_handler = 12
Config for this node's OutputStreamHandler. If unspecified, the graph-level output stream handler will be used.
repeated InputStreamInfo input_stream_info = 13
Additional information about an input stream. The |name| field of the InputStreamInfo must match an input_stream.
string executor = 14
Set the executor which the calculator will execute on.
optional ProfilerConfig profiler_config = 15
TODO: Remove from Node when switched to Profiler. DEPRECATED: Configs for the profiler.
int32 max_in_flight = 16
The maximum number of invocations that can be executed in parallel. If not specified, the limit is one invocation.
repeated string external_input = 1005
DEPRECATED: For backwards compatibility we allow users to specify the old name for "input_side_packet" in proto configs. These are automatically converted to input_side_packets during config canonicalization.

A protobuf extension defining a list of template rules.

optional CalculatorGraphConfig config = 1
The base configuration.
repeated TemplateExpression rule = 2
The list of template rules.

Options for Calculators. Each Calculator implementation should have its own options proto, which should look like this: message MyCalculatorOptions { extend CalculatorOptions { optional MyCalculatorOptions ext = <unique id, e.g. the CL#>; } optional string field_needed_by_my_calculator = 1; optional int32 another_field = 2; // etc }

Used in: CalculatorGraphConfig.Node

optional bool merge_fields = 1
If true, this proto specifies a subset of field values, which should override corresponding field values.

Stores the profiling information for a calculator node. All the times are in microseconds.

Used in: GraphProfile

optional string name = 1
The calculator name.
optional int64 open_runtime = 2
Total time the calculator spent on Open (in microseconds).
optional int64 close_runtime = 3
Total time the calculator spent on Close (in microseconds).
optional TimeHistogram process_runtime = 4
Total and histogram of the time that the calculator spent on the Process() (in microseconds).
optional TimeHistogram process_input_latency = 5
Total and histogram of the time that the input latency, ie. difference between input timestamp and process call time. (in microseconds).
optional TimeHistogram process_output_latency = 6
Total and histogram of the time that the output latency, ie. difference between input timestamp and process finished time.
repeated StreamProfile input_stream_profiles = 7
Total and histogram of the time that input streams of this calculator took.

optional CallbackPacketCalculatorOptions.PointerType type = 1
The type of the data pointer that the callback will put data into.
optional bytes pointer = 2
The location of the data stored as a string printed with snprintf(address, sizeof(address), "%p", pointer). This calculator only produces a reasonable callback if it is constructed on the same machine as the original pointer was created on and that pointer is still alive.

Used in: CallbackPacketCalculatorOptions

UNKNOWN = 0
VECTOR_PACKET = 1
POST_STREAM_PACKET = 2

Next tag: 33

Used in: FrameSelectionResult

optional TranslationModel translation = 1
Background motion expressed in various models. These are per-frame pair motions (from current to previous frame). Models are expressed in the un-normalized domain frame_width x frame_height that is passed to MotionEstimation (storred below).
optional SimilarityModel similarity = 2
optional LinearSimilarityModel linear_similarity = 3
optional AffineModel affine = 4
optional Homography homography = 5
optional MixtureHomography mixture_homography = 8
optional float frame_width = 31
Frame dimensions camera motion was computed over.
optional float frame_height = 32
repeated MixtureHomography mixture_homography_spectrum = 23
Mixture homographies computed w.r.t. exponentially increasing regularizers. Above mixture_homography member is selected from spectrum based on amount of rolling shutter present in the video.
optional float mixture_row_sigma = 10
Relative row sigma w.r.t. frame_height for mixture models.
optional float average_magnitude = 24
Average of all motion vector magnitudes (without accounting for any motion model), within 10th to 90th percentile (to remove outliers).
optional float translation_variance = 25
Inlier-weighted variance of the translation model. Specified, w.r.t. unnormalized video domain that motion models are computed for.
optional float similarity_inlier_ratio = 29
Ratio of inliers w.r.t. regular and stricter thresholds. In [0, 1].
optional float similarity_strict_inlier_ratio = 30
optional float average_homography_error = 11
Average registration error of homography in pixels. Note: These two parameters default to zero in-case homographies have not been estimated.
optional float homography_inlier_coverage = 12
Fraction, in [0,1], of homography inliers.
optional float homography_strict_inlier_coverage = 22
Same as above but with stricter threshold. (For details, see: MotionEstimationOptions::strict_coverage_scale). Coverage is designed to measure the amount of significant outliers, which can affect the validity of the estimated homography. However, it does not discount small outliers, which occur in case of small rolling shutter wobbles. For this a stricter version of coverage is needed, which is essential for computing the rolling_shutter_guess, i.e. the increase in coverage by using mixtures vs. homographies.
repeated float mixture_inlier_coverage = 13
Per-block inlier fraction for mixtures.
optional float rolling_shutter_guess = 14
Set based on stability analysis indicating if frame is likely to originate from a rolling shutter camera. (-1 is used to indicate frame was not tested, e.g. due to mixture deemed unstable for analysis). Guess is a scaler indicating by how much the mixture models (suitable for rolling shutter distortions) increased inlier coverage compared to a single homography. For example a value, of 1.3 indicates, that the mixture models increased inlier coverage by 30%. If not -1, range is in [0, inf] (values slightly smaller than 1 are possible due to suppression of noisy feature tracks during estimation).
optional int32 rolling_shutter_motion_index = 16
Indicating if CameraMotion is deemed to originate from rolling shutter camera (index >= 0), and if so, denotes the index in the mixture_homography_spectrum, where higher indices correspond to heavier regularized motions. If motion is not deemed to originate from a rolling shutter camera, index is set to -1.
repeated int32 overlay_indices = 17
List of overlay indices (cell locations in column major format) over domain of size overlay_domain x overlay_domain, where overlay_domain is set by MotionEstimation to MotionEstimationOptions::OverlayDetectionOptions::analysis_mask_size. Overlay analysis is performed over chunk of frames, as specified by MotionEstimationOptions::overlay_analysis_chunk_size, with the resulting overlay indices being assigned to each frame of the chunk. Consequently it suffices to store the result only for the first frame of every chunk. Subsequent frames store a single negative index relative to the first chunk frame indicating where to locate the overlay indicies. Specifically if for frame f, overlay_indices(0) == -2, overlay indices for corresponding chunk can be found at frame f - 2. For details about how overlay indices are used to flag a frame to contain an overlay, see MotionFilterOptions::OverlayOptions.
optional int32 overlay_domain = 18
optional CameraMotion.Type type = 6
optional CameraMotion.Type overridden_type = 15
If set, stores original type in case it was overriden (by filtering functions, etc.).
optional int32 flags = 19
optional float blur_score = 20
Same as in RegionFlowFeatureList (from region_flow.proto), measures blur as average cornerness over textured areas. As it depends on the image content, should only be used relative.
optional float bluriness = 21
Quanitifies amount of blur. Specified as ratio w.r.t. sharpest matching frame, i.e. 1 indicates no blur, values > 1 amount of blur w.r.t. sharpest frame.
optional float frac_long_features_rejected = 26
Same as in RegionFlowFeatureList (from region_flow.proto). Stores fraction of long feature tracks that got rejected for this frame.
optional int64 timestamp_usec = 27
Same as in RegionFlowFeatureList (from region_flow.proto). Timestamp in micro seconds of the underlying frame.
optional int32 match_frame = 28
Same as in RegionFlowFeatureList (from region_flow.proto). Denotes frame that motion was computed w.r.t. to, locally to the current frame. Values < 0 indicate backward tracking, while values > 0 indicate forward tracking. For example, match_frame = -1, indicates tracking is from current to previous frame.

Set of optional *bit* flags set for various purposes.

FLAG_SHOT_BOUNDARY = 1
Set to indicate presence of a
FLAG_BLURRY_FRAME = 2
shot boundary.
FLAG_MAJOR_OVERLAY = 4
FLAG_SHARP_FRAME = 8
Set if frame is considered sharp
FLAG_SINGULAR_ESTIMATION = 16
in a neighborhood of frames.
Indicates that estimation resulted
FLAG_SHOT_FADE = 32
in singular optimization problem. Used internally by MotionEstimation. Indicates if shot boundary is part of a fade. If so, all frames of the fade will be labeled with the FLAG but only the begin and end of the fade will have the FLAG_SHOT_BOUNDARY set.
FLAG_DUPLICATED = 64
Set if frame is exact duplicate of
FLAG_CENTER_FRAME = 128
previous frame.
Indicates this frame is at the

CameraMotion type indicates whether highest degree of freedom (DOF) model estimation was deemed stable, in which case CameraMotion::Type is set to VALID. If a model was deemed not stable (according to *StabilityBounds in MotionEstimationOptions), it is set to the lower dof type which was deemed stable.

Used in: CameraMotion

VALID = 0
All requested motion models estimated reliably.
UNSTABLE_HOMOG = 1
Fallback to homographies, mixture unreliable.
UNSTABLE_SIM = 2
Fallback to similarity model, homography
UNSTABLE = 3
unreliable.
Fallback to translation model, similarity
INVALID = 4
unreliable, legacy naming.
Identity model, translation unreliable.

Used in: ClassificationList

optional int32 index = 1
The index of the class in the corresponding label map.
optional float score = 2
The probability score for this class.
optional string label = 3
Label or name of the class.
optional string display_name = 4
Optional human-readable string for display purposes.

Group of Classification protos.

Used in: ConstantSidePacketCalculatorOptions.ConstantSidePacket

repeated Classification classification = 1

Used in: ToneEstimationOptions

optional float min_exposure = 1
Over/Under exposure setting. Pixels that are clipped due to limited dynamic range are masked out from analysis. Values specified w.r.t. [0, 1] range.
optional float max_exposure = 2
optional int32 max_clipped_channels = 4
A pixel can have clipped color values in atmost max_clipped_channels before it will be labeled as clipped.
optional int32 clip_mask_diameter = 5
Over-exposure tends to show blooming (neighboring pixels are affected by over-exposure as well). For robustness mask of clipped pixels is dilated with structuring element of diameter clip_mask_diam.

optional int32 min_size = 1
The minimum size an input iterable collection should have for the calculator to output true.

Used in: AnnotationOverlayCalculatorOptions, ColorMap, DetectionsToRenderDataCalculatorOptions, LabelsToRenderDataCalculatorOptions, LandmarksToRenderDataCalculatorOptions, RecolorCalculatorOptions, RectToRenderDataCalculatorOptions, RenderAnnotation, RenderAnnotation.FilledOval, RenderAnnotation.FilledRectangle, RenderAnnotation.FilledRoundedRectangle, RenderAnnotation.GradientLine, TimedBoxListToRenderDataCalculatorOptions

optional int32 r = 1
optional int32 g = 2
optional int32 b = 3

Mapping from string label to a color.

map<string, Color> label_to_color = 1

repeated ConstantSidePacketCalculatorOptions.ConstantSidePacket packet = 1

Used in: ConstantSidePacketCalculatorOptions

oneof value
- int32 int_value = 1
- float float_value = 2
- bool bool_value = 3
- string string_value = 4
- uint64 uint64_value = 5
- ClassificationList classification_list_value = 6

optional CopyCalculatorOptions.Rotation rotation = 1

Used in: CopyCalculatorOptions

NONE = 0
CCW = 1
rotate 90 degrees counterclockwise
CCW_FLIP = 2
hack to rectify convfloat

See DefaultInputStreamHandler for documentation.

optional int32 batch_size = 1
batch_size determines how many input packets should be collected before a calculator can process them. Once there are enough packets, Process method of the Calculator is called sequentially. Currently, batching is not supported for source nodes but it may be supported in the future. Therefore, this field should not be specified for source nodes.

optional float max_quantized_value = 1
optional float min_quantized_value = 2

Used in: DetectionList

repeated string label = 1
i-th label or label_id has a score encoded by the i-th element in score.
repeated int32 label_id = 2
repeated float score = 3
optional LocationData location_data = 4
Location data corresponding to all detected labels above.
optional string feature_tag = 5
Optional string to indicate the feature generation method. Useful in associating a name to the pipeline used to generate this detection.
optional string track_id = 6
Optional string to specify track_id if detection is part of a track.
optional int64 detection_id = 7
Optional unique id to help associate different Detections to each other.
repeated Detection.AssociatedDetection associated_detections = 8
repeated string display_name = 9
Human-readable string for display, intended for debugging purposes. The display name corresponds to the label (or label_id). This is optional.
optional int64 timestamp_usec = 10
The timestamp (in microseconds) *at which* this detection was created/detected.

Useful for associating a detection with other detections based on the detection_id. For example, this could be used to associate a face detection with a body detection when they belong to the same person.

Used in: Detection

optional int32 id = 1
optional float confidence = 2

message DetectionLabelIdToTextCalculatorOptions

detection_label_id_to_text_calculator.proto:21

optional string label_map_path = 1
Path to a label map file for getting the actual name of detected classes.
repeated string label = 2
Alternative way to specify label map label: "label for id 0" label: "label for id 1" ...
optional bool keep_label_id = 3
By default, the `label_id` field from the input is stripped if a text label could be found. By setting this field to true, it is always copied to the output detections.

Group of Detection protos.

repeated Detection detection = 1

optional int32 rotation_vector_start_keypoint_index = 1
Specify the rotation angle of the output rect with a vector formed by connecting two keypoints in the detection, together with the target angle (can be in radians or in degrees) of that vector after rotation. The target angle is counter-clockwise starting from the positive x-axis.
optional int32 rotation_vector_end_keypoint_index = 2
optional float rotation_vector_target_angle = 3
In radians.
optional float rotation_vector_target_angle_degrees = 4
In degrees.
optional bool output_zero_rect_for_empty_detections = 5
Whether to output a zero-rect (with origin and size both zero) when the input detection vector is empty.
optional DetectionsToRectsCalculatorOptions.ConversionMode conversion_mode = 6

Used in: DetectionsToRectsCalculatorOptions

DEFAULT = 0
USE_BOUNDING_BOX = 1
USE_KEYPOINTS = 2

optional bool produce_empty_packet = 1
If true, produces a RenderData packet with no annotation when the input packet has no detection. Otherwise, it won't produce any packet. Please note, regardless of this flag nothing will be produce if there is no input packet for a timestamp.
optional string text_delimiter = 2
The delimiter to separate label(_id) and score.
optional bool one_label_per_line = 3
If true, each "label(_id),score" will be on a separate line. Otherwise, all "label(_id),score" will be concatenated when the detection has more than one label.
optional RenderAnnotation.Text text = 4
Rendering options for the label.
optional double thickness = 5
Thickness for drawing the label(s) and the location_data(box).
optional Color color = 6
Color for drawing the label(s), feature_tag, and the location_data(box).
optional string scene_class = 7
An optional string that identifies this class of annotations for the render data output this calculator produces. If multiple instances of this calculator are present in the graph, this value should be unique among them.
optional bool render_detection_id = 8
If true, renders the detection id in the first line before the labels.

Describes a MediaPipe Executor.

Used in: CalculatorGraphConfig

string name = 1
The name of the executor (used by a CalculatorGraphConfig::Node or PacketGeneratorConfig to specify which executor it will execute on). This field must be unique within a CalculatorGraphConfig. If this field is omitted or is an empty string, the ExecutorConfig describes the default executor. NOTE: The names "default" and "gpu" are reserved and must not be used.
string type = 2
The registered type of the executor. For example: "ThreadPoolExecutor". The framework will create an executor of this type (with the options in the options field) for the CalculatorGraph. The ExecutorConfig for the default executor may omit this field and let the framework choose an appropriate executor type. Note: If the options field is used in this case, it should contain the ThreadPoolExecutorOptions. If the ExecutorConfig for an additional (non-default) executor omits this field, the executor must be created outside the CalculatorGraph and passed to the CalculatorGraph for use.
optional MediaPipeOptions options = 3
The options passed to the Executor. The extension in the options field must match the type field. For example, if the type field is "ThreadPoolExecutor", then the options field should contain the ThreadPoolExecutorOptions.

Describes a field within a message.

(message has no fields)

Used in: TemplateExpression

TYPE_INVALID = 0
0 is reserved for errors.
TYPE_DOUBLE = 1
Order is weird for historical reasons.
TYPE_FLOAT = 2
TYPE_INT64 = 3
Not ZigZag encoded. Negative numbers take 10 bytes. Use TYPE_SINT64 if negative values are likely.
TYPE_UINT64 = 4
TYPE_INT32 = 5
Not ZigZag encoded. Negative numbers take 10 bytes. Use TYPE_SINT32 if negative values are likely.
TYPE_FIXED64 = 6
TYPE_FIXED32 = 7
TYPE_BOOL = 8
TYPE_STRING = 9
TYPE_GROUP = 10
Tag-delimited aggregate. Group type is deprecated and not supported in proto3. However, Proto3 implementations should still be able to parse the group wire format and treat group fields as unknown fields.
TYPE_MESSAGE = 11
Length-delimited aggregate.
TYPE_BYTES = 12
New in version 2.
TYPE_UINT32 = 13
TYPE_ENUM = 14
TYPE_SFIXED32 = 15
TYPE_SFIXED64 = 16
TYPE_SINT32 = 17
Uses ZigZag encoding.
TYPE_SINT64 = 18
Uses ZigZag encoding.

See FixedSizeInputStreamHandler for documentation.

optional int32 trigger_queue_size = 1
The queue size at which input queues are truncated.
optional int32 target_queue_size = 2
The queue size to which input queues are truncated.
optional bool fixed_min_size = 3
If false, input queues are truncated to at most trigger_queue_size. If true, input queues are truncated to at least trigger_queue_size.

optional int32 max_in_flight = 1
The maximum number of frames released for processing at one time. The default value limits to 1 frame processing at a time.
optional int32 max_in_queue = 2
The maximum number of frames queued waiting for processing. The default value limits to 1 frame awaiting processing.
optional int64 in_flight_timeout = 3
The maximum time in microseconds to wait for a frame to finish processing. The default value stops waiting after 1 sec. The value 0 specifies no timeout.

optional FlowPackagerOptions flow_packager_options = 1
optional int32 caching_chunk_size_msec = 2
Chunk size for caching files that are written to the externally specified caching directory. Specified in msec. Note that each chunk always contains at its end the first frame of the next chunk (to enable forward tracking across chunk boundaries).
optional string cache_file_format = 3

Options controlling compression and encoding.

Used in: FlowPackagerCalculatorOptions

optional int32 domain_width = 1
Tracking data is resolution independent specified w.r.t. specified domain. Only values <= 256 are supported if binary tracking data is requested to be supported (see below).
optional int32 domain_height = 2
optional bool binary_tracking_data_support = 6
Needs to be set for calls to FlowPackager::EncodeTrackingData. If encoding is not required, can be set to false in which case a higher domain_width can be used.
optional bool use_high_profile = 3
optional bool high_fidelity_16bit_encode = 4
If set uses 16 bit encode for vector data, in BinaryTrackingData, otherwise only 8 bits are used.
optional float high_profile_reuse_threshold = 5
In high profile encode, re-use previously encoded vector when absolute difference to current vector is below threshold.

High profile encoding flags.

ADVANCE_FLAG = 128
DOUBLE_INDEX_ENCODE = 64
INDEX_MASK = 63

Specifies the maximum and minimum value to truncate when normalize optical flow fields.

optional float min_value = 1
optional float max_value = 2

Next index: 7

Used in: FrameSelectionOptions

optional int32 sampling_rate = 1
Interval at which frames should be sampled; set to zero if sampling should not be enforced (i.e. selection is performed w.r.t. other criteria).
optional float bandwidth_frames = 2
Bandwidth used during dynamic programming. The larger the bandwidth the more accurate the result w.r.t. the specified sampling rate. Smaller bandwidth's bias the solution suboptimally to center around the mean frame numbers of the sampling rate. If in (0, 1), assumed to specify fraction of total number of input frames, otherwise must be an integer.
optional int32 search_radius_frames = 3
Search radius for dynamic programming (how many frames you are allowed to search around the previous frame).
optional FrameSelectionSolutionEvaluatorType solution_evaluator = 4
Allows one to specify custom solution selection criteria (i.e. different way to choose the best row of the computed cost matrix).
optional int32 max_output_frames = 5
Outputs a fixed number of frames and automatically sets the appropriate sampling rate. Set to 0 by default (i.e. not enabled).

Options for computing frame selection. TODO: Support multiple criteria if required. Currently uses only the first one.

repeated FrameSelectionCriterion criterion = 1
optional int32 chunk_size = 2
FrameSelection buffers incoming CameraMotions for specified chunk size and creates cost matrices upon reaching the limit. TODO: Implement if necessary (currently nothing is cleared upon reaching the limit).

Stores the result of the frame selection, with composited features. Next index: 6

optional int64 timestamp = 1
Timestamp of the selected frame.
optional int32 frame_idx = 2
Frame index of the selected frame in the initial video stream. If this timestamp was manufactured, this will be the index of the initial frame.
optional CameraMotion camera_motion = 3
CameraMotion from selected item to previous selected item.
optional RegionFlowFeatureList features = 4
Features from selected item to previous selected item.
optional int64 processed_from_timestamp = 5
If this FrameSelectionResult was the result of processing a previous one, the timestamp of the original frame.

Used in: FrameSelectionSolutionEvaluatorType

(message has no fields)

Used in: FrameSelectionCriterion

optional string class_name = 1
Class of type FrameSelectionSolution that computes the best row.
optional FrameSelectionSolutionEvaluatorOptions options = 2

Stores selected timestamps and corresponding frame index.

optional int64 timestamp = 1
Timestamp of the selected frame.
optional int32 frame_idx = 2
Frame index of the selected frame in the initial video stream. If this timestamp was manufactured, this will be the index of the initial frame.
optional int64 processed_from_timestamp = 3
If this timestamp was manufactured, the timestamp of the original frame.

Transforms a 3D color vector x = (c1, c2, c3) according to [ gain_c1 0 0 bias_c1 * [ c1 0 gain_c2 0 bias_c2 c2 0 0 gain_c3 bias_c3 ] c3 1 ]

Used in: MixtureGainBiasModel, ToneChange

optional float gain_c1 = 1
optional float bias_c1 = 2
optional float gain_c2 = 3
optional float bias_c2 = 4
optional float gain_c3 = 5
optional float bias_c3 = 6

optional bool empty_packets_as_allow = 1
By default an empty packet in the ALLOW or DISALLOW input stream indicates disallowing the corresponding packets in the data input streams. Setting this option to true inverts that, allowing the data packets to go through.

message GlContextOptions

gl_context_options.proto:21

optional string gl_context_name = 1

Next id: 8.

optional int32 output_width = 1
Output dimensions.
optional int32 output_height = 2
optional float output_scale = 7
A scale factor for output size, while keeping aspect ratio. It has lower priority than the above two fields. That is, it is effective only when the above two fields are unset.
optional int32 rotation = 3
Counterclockwise rotation in degrees. Must be a multiple of 90.
optional bool flip_vertical = 4
Flip the output texture vertically. This is applied after rotation.
optional bool flip_horizontal = 5
Flip the output texture horizontally. This is applied after rotation.
optional ScaleMode.Mode scale_mode = 6

optional ScaleMode.Mode frame_scale_mode = 1
Output frame scale mode. Default is FILL_AND_CROP.

(message has no fields)

Used in: ImageToTensorCalculatorOptions, TensorsToSegmentationCalculatorOptions

DEFAULT = 0
CONVENTIONAL = 1
OpenGL: bottom-left origin Metal : top-left origin
TOP_LEFT = 2
OpenGL: top-left origin Metal : top-left origin

Latency events and summaries for recent mediapipe packets.

repeated GraphTrace graph_trace = 1
Recent packet timing informtion about each calculator node and stream.
repeated CalculatorProfile calculator_profiles = 2
Aggregated latency information about each calculator node.
optional CalculatorGraphConfig config = 3
The canonicalized calculator graph that is traced.

Latency timing for recent mediapipe packets.

Used in: GraphProfile

optional int64 base_time = 1
The time represented as 0 in the trace.
optional int64 base_timestamp = 2
The timestamp represented as 0 in the trace.
repeated string calculator_name = 3
The list of calculator node names indexed by node id.
repeated string stream_name = 4
The list of stream names indexed by stream id.
repeated GraphTrace.CalculatorTrace calculator_trace = 5
Recent packet timing informtion about each calculator node and stream.

The timing for one packet set being processed at one caclulator node.

Used in: GraphTrace

optional int32 node_id = 1
The index of the calculator node in the calculator_name list.
optional int64 input_timestamp = 2
The input timestamp during Open, Process, or Close.
optional EventType event_type = 3
The kind of event, 1=Open, 2=Process, 3=Close, etc.
optional int64 start_time = 4
The time at which the packets entered the caclulator node.
optional int64 finish_time = 5
The time at which the packets exited the caclulator node.
repeated StreamTrace input_trace = 6
The timing data for each input packet.
repeated StreamTrace output_trace = 7
The identifying timetamp and stream_id for each output packet.
optional int32 thread_id = 8
An identifier for the current process thread.

The kind of event recorded.

Used in: CalculatorTrace

UNKNOWN = 0
OPEN = 1
PROCESS = 2
CLOSE = 3
NOT_READY = 4
READY_FOR_PROCESS = 5
READY_FOR_CLOSE = 6
THROTTLED = 7
UNTHROTTLED = 8
CPU_TASK_USER = 9
CPU_TASK_SYSTEM = 10
GPU_TASK = 11
DSP_TASK = 12
TPU_TASK = 13
GPU_CALIBRATION = 14
PACKET_QUEUED = 15

The timing for one packet across one packet stream.

Used in: CalculatorTrace

optional int64 start_time = 1
The time at which the packet entered the stream.
optional int64 finish_time = 2
The time at which the packet exited the stream.
optional int64 packet_timestamp = 3
The identifying timetamp of the packet.
optional int32 stream_id = 4
The index of the stream in the stream_name list.
optional int64 packet_id = 5
The address of the packet contents.
optional int64 event_data = 6
Data describing the event, such as the packet contents.

Homography according to [h_00 h_01 h_02; h_10 h_11 h_12; h_20 h_21 1]; Note: The parametrization with h_22 = 1 does not always hold, e.g. if the origin (0, 0, 1) gets mapped to the line at infinity (0, 0, 1). However for video we expect small perspective changes between frames and this parametrization improves robustness greatly as it removes an additional DOF. Therefore, all methods in motion_stabilization should not be used for general wide-baseline matching of frames.

Used in: CameraMotion, MixtureHomography, MotionBoxState, TrackingData

optional float h_00 = 1
optional float h_01 = 2
optional float h_02 = 3
optional float h_10 = 4
optional float h_11 = 5
optional float h_12 = 6
optional float h_20 = 7
optional float h_21 = 8

Taken from java/com/google/android/libraries/microvideo/proto/microvideo.proto to satisfy leakr requirements TODO: Remove and use above proto.

repeated float motion_homography_data = 1
For each frame, there are 12 homography matrices stored. Each matrix is 3x3 (9 elements). This field will contain 12 x 3 x 3 float values. The first row of the first homography matrix will be followed by the second row of the first homography matrix, followed by third row of first homography matrix, followed by the first row of the second homography matrix, etc.
repeated uint32 histogram_count_data = 2
Vector containing histogram counts for individual patches in the frame.
optional int32 frame_width = 3
The width of the frame at the time metadata was sampled.
optional int32 frame_height = 4
The height of the frame at the time metadata was sampled.

optional bool output_on_gpu = 1
Whether the output clone should have pixel data already available on GPU.

optional int32 width = 1
Output texture buffer dimensions. The values defined in the options will be overriden by the WIDTH and HEIGHT input streams if they exist.
optional int32 height = 2
optional float rotation = 3
Rotation angle is counter-clockwise in radian.
optional float norm_width = 4
Normalized width and height of the output rect. Value is within [0, 1].
optional float norm_height = 5
optional float norm_center_x = 6
Normalized location of the center of the output rectangle in image coordinates. Value is within [0, 1]. The (0, 0) point is at the (top, left) corner.
optional float norm_center_y = 7
optional ImageCroppingCalculatorOptions.BorderMode border_mode = 8
Specifies behaviour for crops that go beyond image borders.
optional int32 output_max_width = 9
Specifies limits for the size of the output image. It will be scaled down, preserving ratio, to fit within. These do not change which area of the input is selected for cropping.
optional int32 output_max_height = 10

Used in: ImageCroppingCalculatorOptions

BORDER_UNSPECIFIED = 0
First unspecified value is required by the guideline. See details here: https://developers.google.com/protocol-buffers/docs/style#enums
BORDER_ZERO = 1
BORDER_REPLICATE = 2

A list of properties extracted from EXIF metadata from an image file.

optional uint32 image_width = 1
Image dimensions.
optional uint32 image_height = 2
optional double focal_length_mm = 3
Focal length of camera lens in millimeters.
optional double focal_length_35mm = 4
Focal length of camera lens in 35 mm equivalent.
optional double focal_length_pixels = 5
Focal length in pixels.

(message has no fields)

Used in: ScaleImageCalculatorOptions

UNKNOWN = 0
The format is unknown. It is not valid for an ImageFrame to be initialized with this value.
SRGB = 1
sRGB, interleaved: one byte for R, then one byte for G, then one byte for B for each pixel.
SRGBA = 2
sRGBA, interleaved: one byte for R, one byte for G, one byte for B, one byte for alpha or unused.
GRAY8 = 3
Grayscale, one byte per pixel.
GRAY16 = 4
Grayscale, one uint16 per pixel.
YCBCR420P = 5
YCbCr420P (1 bpp for Y, 0.25 bpp for U and V). NOTE: NOT a valid ImageFrame format, but intended for ScaleImageCalculatorOptions, VideoHeader, etc. to indicate that YUVImage is used in place of ImageFrame.
YCBCR420P10 = 6
Similar to YCbCr420P, but the data is represented as the lower 10bits of a uint16. Like YCbCr420P, this is NOT a valid ImageFrame, and the data is carried within a YUVImage.
SRGB48 = 7
sRGB, interleaved, each component is a uint16.
SRGBA64 = 8
sRGBA, interleaved, each component is a uint16.
VEC32F1 = 9
One float per pixel.
VEC32F2 = 12
Two floats per pixel.
LAB8 = 10
LAB, interleaved: one byte for L, then one byte for a, then one byte for b for each pixel.
SBGRA = 11
sBGRA, interleaved: one byte for B, one byte for G, one byte for R, one byte for alpha or unused. This is the N32 format for Skia.

optional int32 output_tensor_width = 1
optional int32 output_tensor_height = 2
optional bool keep_aspect_ratio = 3
If true, image region will be extracted and copied into tensor keeping region aspect ratio, which usually results in letterbox padding. Otherwise, if false, image region is stretched to fill output tensor fully.
oneof range
Output tensor element range/type image pixels are converted to.
- ImageToTensorCalculatorOptions.FloatRange output_tensor_float_range = 4
optional GpuOrigin.Mode gpu_origin = 5
For CONVENTIONAL mode for OpenGL, input image starts at bottom and needs to be flipped vertically as tensors are expected to start at top. (DEFAULT or unset interpreted as CONVENTIONAL.)
optional ImageToTensorCalculatorOptions.BorderMode border_mode = 6
Pixel extrapolation method. When converting image to tensor it may happen that tensor needs to read pixels outside image boundaries. Border mode helps to specify how such pixels will be calculated. BORDER_REPLICATE is used by default.

Pixel extrapolation methods. See @border_mode.

Used in: ImageToTensorCalculatorOptions

BORDER_UNSPECIFIED = 0
BORDER_ZERO = 1
BORDER_REPLICATE = 2

Range of float values [min, max]. min, must be strictly less than max.

Used in: ImageToTensorCalculatorOptions

optional float min = 1
optional float max = 2

optional int32 output_width = 1
Output dimensions. Set to 0 if they should be the same as the input.
optional int32 output_height = 2
optional RotationMode.Mode rotation_mode = 3
Counterclockwise rotation mode.
optional bool flip_vertically = 4
Vertical flipping, applied after rotation.
optional bool flip_horizontally = 5
Horizontal flipping, applied after rotation.
optional ScaleMode.Mode scale_mode = 6
Scale mode.
optional bool constant_padding = 7
Padding type. This option is only used when the scale mode is FIT. Default is to use BORDER_CONSTANT. If set to false, it will use BORDER_REPLICATE instead.

Full Example: node { calculator: "InferenceCalculator" input_stream: "TENSOR_IN:image_tensors" output_stream: "TENSOR_OUT:result_tensors" options { [mediapipe.InferenceCalculatorOptions.ext] { model_path: "model.tflite" delegate { gpu {} } } } }

optional string model_path = 1
Path to the TF Lite model (ex: /path/to/modelname.tflite). On mobile, this is generally just modelname.tflite.
optional bool use_gpu = 2
Whether the TF Lite GPU or CPU backend should be used. Effective only when input tensors are on CPU. For input tensors on GPU, GPU backend is always used. DEPRECATED: configure "delegate" instead.
optional bool use_nnapi = 3
Android only. When true, an NNAPI delegate will be used for inference. If NNAPI is not available, then the default CPU delegate will be used automatically. DEPRECATED: configure "delegate" instead.
optional int32 cpu_num_thread = 4
The number of threads available to the interpreter. Effective only when input tensors are on CPU and 'use_gpu' is false.
optional InferenceCalculatorOptions.Delegate delegate = 5
TfLite delegate to run inference. If not specified, TFLite GPU delegate is used by default (as if "gpu {}" is specified) unless GPU support is disabled in the build (i.e., with --define MEDIAPIPE_DISABLE_GPU=1), in which case regular TFLite on CPU is used (as if "tflite {}" is specified) except when building with emscripten where xnnpack is used. NOTE: use_gpu/use_nnapi are ignored if specified. (Delegate takes precedence over use_* deprecated options.)

Used in: InferenceCalculatorOptions

oneof delegate
- Delegate.TfLite tflite = 1
- Delegate.Gpu gpu = 2
- Delegate.Nnapi nnapi = 3
- Delegate.Xnnpack xnnpack = 4

Delegate to run GPU inference depending on the device. (Can use OpenGl, OpenCl, Metal depending on the device.)

Used in: Delegate

optional bool use_advanced_gpu_api = 1
Experimental, Android/Linux only. Use TFLite GPU delegate API2 for the NN inference. example: delegate: { gpu { use_advanced_gpu_api: true } }
optional Gpu.Api api = 4
optional bool allow_precision_loss = 3
This option is valid for TFLite GPU delegate API2 only, Set to true to use 16-bit float precision. If max precision is needed, set to false for 32-bit float calculations only.
optional string cached_kernel_path = 2
Load pre-compiled serialized binary cache to accelerate init process. Only available for OpenCL delegate on Android. Kernel caching will only be enabled if this path is set.

This option is valid for TFLite GPU delegate API2 only, Choose any of available APIs to force running inference using it.

Used in: Gpu

ANY = 0
OPENGL = 1
OPENCL = 2

Android only.

Used in: Delegate

(message has no fields)

Default inference provided by tflite.

Used in: Delegate

(message has no fields)

Used in: Delegate

optional int32 num_threads = 1
Number of threads for XNNPACK delegate. (By default, calculator tries to choose optimal number of threads depending on the device.)

A collection of input data to a CalculatorGraph.

Used in: InputCollectionSet

string name = 1
The name of the input collection. Name must match [a-z_][a-z0-9_]*
repeated string side_packet_name = 2
The names of each side packet. The number of side_packet_name must match the number of packets generated by the input file.
repeated string external_input_name = 1002
DEPRECATED: old way of referring to side_packet_name.
InputCollection.InputType input_type = 3
Sets the source of the input collection data. The default value is UNKNOWN.
string file_name = 4
A file name pointing to the data. The format of the data is specified by the "input_type" field. Multiple shards may be specified using @N or glob expressions.

The input can be specified in several ways.

Used in: InputCollection

UNKNOWN = 0
An invalid default value. This value is guaranteed to be the lowest enum value (i.e. don't add negative enum values).
RECORDIO = 1
A recordio where each record is a serialized PacketManagerConfig. Each PacketManagerConfig must have the same number of packet factories in it as the number of side packet names. Furthermore, the output side packet name field in each PacketFactoryConfig must not be set. This is the most general input, and allows multiple side packet values to be set in arbitrarily complicated ways before each run.
FOREIGN_RECORDIO = 2
A recordio where each record is a serialized packet payload. For example a recordio of serialized OmniaFeature protos dumped from Omnia.
FOREIGN_CSV_TEXT = 3
A text file where each line is a comma separated list. The number of elements for each csv string must be the same as the number of side_packet_name (and the order must match). Each line must be less than 1MiB in size. Lines comprising of only whitespace or only whitespace and a pound comment will be skipped.
INVALID_UPPER_BOUND = 4
This and all higher values are invalid. Update this value to always be larger than any other enum values you add.

A convenient way to specify a number of InputCollections.

repeated InputCollection input_collection = 1

This proto should be used only as an input to a calculator, to verify that that case is covered.

optional int32 x = 1

Settings specifying an input stream handler.

Used in: CalculatorGraphConfig, CalculatorGraphConfig.Node

optional string input_stream_handler = 1
Name of the registered input stream handler class.
optional MediaPipeOptions options = 3
Options for the input stream handler.

Additional information about an input stream.

Used in: CalculatorGraphConfig.Node

string tag_index = 1
A description of the input stream. This description uses the Calculator visible specification of a stream. The format is a tag, then an index with both being optional. If the tag is missing it is assumed to be "" and if the index is missing then it is assumed to be 0. If the index is provided then a colon (':') must be used. Examples: "TAG" -> tag "TAG", index 0 "" -> tag "", index 0 ":0" -> tag "", index 0 ":3" -> tag "", index 3 "VIDEO:0" -> tag "VIDEO", index 0 "VIDEO:2" -> tag "VIDEO", index 2
bool back_edge = 2
Whether the input stream is a back edge. By default, MediaPipe requires graphs to be acyclic and treats cycles in a graph as errors. To allow MediaPipe to accept a cyclic graph, set the back_edge fields of the input streams that are back edges to true. A cyclic graph usually has an obvious forward direction, and a back edge goes in the opposite direction. For a formal definition of a back edge, please see https://en.wikipedia.org/wiki/Depth-first_search.

repeated Color color = 1
Colors for drawing the label(s).
optional double thickness = 2
Thickness for drawing the label(s).
optional int32 font_height_px = 3
The font height in absolute pixels.
optional int32 horizontal_offset_px = 7
The offset of the starting text in horizontal direction in absolute pixels.
optional int32 vertical_offset_px = 8
The offset of the starting text in vertical direction in absolute pixels.
optional int32 max_num_labels = 4
The maximum number of labels to display.
optional int32 font_face = 5
Specifies the font for the text. Font must be one of the following from OpenCV: cv::FONT_HERSHEY_SIMPLEX (0) cv::FONT_HERSHEY_PLAIN (1) cv::FONT_HERSHEY_DUPLEX (2) cv::FONT_HERSHEY_COMPLEX (3) cv::FONT_HERSHEY_TRIPLEX (4) cv::FONT_HERSHEY_COMPLEX_SMALL (5) cv::FONT_HERSHEY_SCRIPT_SIMPLEX (6) cv::FONT_HERSHEY_SCRIPT_COMPLEX (7)
optional LabelsToRenderDataCalculatorOptions.Location location = 6
optional bool use_display_name = 9
Uses Classification.display_name field instead of Classification.label.

Label location.

Used in: LabelsToRenderDataCalculatorOptions

TOP_LEFT = 0
BOTTOM_LEFT = 1

A landmark that can have 1 to 3 dimensions. Use x for 1D points, (x, y) for 2D points and (x, y, z) for 3D points. For more dimensions, consider using matrix_data.proto.

Used in: LandmarkList

optional float x = 1
optional float y = 2
optional float z = 3
optional float visibility = 4
Landmark visibility. Should stay unset if not supported. Float score of whether landmark is visible or occluded by other objects. Landmark considered as invisible also if it is not present on the screen (out of scene bounds). Depending on the model, visibility value is either a sigmoid or an argument of sigmoid.
optional float presence = 5
Landmark presence. Should stay unset if not supported. Float score of whether landmark is present on the scene (located within scene bounds). Depending on the model, presence value is either a result of sigmoid or an argument of sigmoid function to get landmark presence probability.

Group of Landmark protos.

repeated Landmark landmark = 1

optional bool ignore_rotation = 1
Ignore the rotation field of rect proto for projection.

oneof filter_options
- LandmarksSmoothingCalculatorOptions.NoFilter no_filter = 1
- LandmarksSmoothingCalculatorOptions.VelocityFilter velocity_filter = 2
- LandmarksSmoothingCalculatorOptions.OneEuroFilter one_euro_filter = 3

Default behaviour and fast way to disable smoothing.

Used in: LandmarksSmoothingCalculatorOptions

(message has no fields)

For the details of the filter implementation and the procedure of its configuration please check http://cristal.univ-lille.fr/~casiez/1euro/

Used in: LandmarksSmoothingCalculatorOptions

optional float frequency = 1
Frequency of incomming frames defined in frames per seconds. Used only if can't be calculated from provided events (e.g. on the very first frame).
optional float min_cutoff = 2
Minimum cutoff frequency. Start by tuning this parameter while keeping `beta = 0` to reduce jittering to the desired level. 1Hz (the default value) is a good starting point.
optional float beta = 3
Cutoff slope. After `min_cutoff` is configured, start increasing `beta` value to reduce the lag introduced by the `min_cutoff`. Find the desired balance between jittering and lag.
optional float derivate_cutoff = 4
Cutoff frequency for derivate. It is set to 1Hz in the original algorithm, but can be tuned to further smooth the speed (i.e. derivate) on the object.
optional float min_allowed_object_scale = 5
If calculated object scale is less than given value smoothing will be disabled and landmarks will be returned as is.
optional bool disable_value_scaling = 6
Disable value scaling based on object size and use `1.0` instead. If not disabled, value scale is calculated as inverse value of object size. Object size is calculated as maximum side of rectangular bounding box of the object in XY plane.

Used in: LandmarksSmoothingCalculatorOptions

optional int32 window_size = 1
Number of value changes to keep over time. Higher value adds to lag and to stability.
optional float velocity_scale = 2
Scale to apply to the velocity calculated over the given window. With higher velocity `low pass filter` weights new values higher. Lower value adds to lag and to stability.
optional float min_allowed_object_scale = 3
If calculated object scale is less than given value smoothing will be disabled and landmarks will be returned as is.
optional bool disable_value_scaling = 4
Disable value scaling based on object size and use `1.0` instead. If not disabled, value scale is calculated as inverse value of object size. Object size is calculated as maximum side of rectangular bounding box of the object in XY plane.

repeated int32 selected_landmark_indices = 1
A subset of indices to be included when creating the detection.

optional int32 num_dimensions = 1
Number of dimensions to convert. Must within [1, 3].

repeated int32 landmark_connections = 1
Specifies the landmarks to be connected in the drawing. For example, the landmark_connections value of [0, 1, 1, 2] specifies two connections: one that connects landmarks with index 0 and 1, and another that connects landmarks with index 1 and 2.
optional Color landmark_color = 2
Color of the landmarks.
optional Color connection_color = 3
Color of the connections.
optional double thickness = 4
Thickness of the drawing of landmarks and connections.
optional bool visualize_landmark_depth = 5
Change color and size of rendered landmarks based on its z value.
optional bool utilize_visibility = 6
Use landmarks visibility while rendering landmarks and connections. If landmark is not visible, neither it nor adjacent connections will be rendered.
optional double visibility_threshold = 7
Threshold to determine visibility of the landmark. Landmark with visibility greater or equal than threshold is considered visible.
optional bool utilize_presence = 8
Use landmarks presence while rendering landmarks and connections. If landmark is not present, neither it nor adjacent connections will be rendered.
optional double presence_threshold = 9
Threshold to determine presence of the landmark. Landmark with presence greater or equal than threshold is considered present.
optional double min_depth_circle_thickness = 10
Min thickness of the drawing for landmark circle.
optional double max_depth_circle_thickness = 11
Max thickness of the drawing for landmark circle.
optional Color min_depth_line_color = 12
Gradient color for the lines connecting landmarks at the minimum depth.
optional Color max_depth_line_color = 13
Gradient color for the lines connecting landmarks at the maximum depth.

Linear similarity model: [a -b; * x + [dx; b a] dy]

Used in: CameraMotion, MixtureLinearSimilarity

optional float dx = 1
optional float dy = 2
optional float a = 3
optional float b = 4

message LocalFileContentsCalculatorOptions

local_file_contents_calculator.proto:21

optional bool text_mode = 1
By default, set the file open mode to 'rb'. Otherwise, set the mode to 'r'.

Used in: Detection

optional LocationData.Format format = 1
optional LocationData.BoundingBox bounding_box = 2
optional LocationData.RelativeBoundingBox relative_bounding_box = 3
optional LocationData.BinaryMask mask = 4
repeated LocationData.RelativeKeypoint relative_keypoints = 5

A mask of size equivalent to the image size. It encodes a region, which can be thought of as a foreground object mask.

Used in: LocationData

optional int32 width = 1
Dimensions of the mask.
optional int32 height = 2
optional Rasterization rasterization = 3
A rasterization-like format for storing the mask.

A bounding box in pixel units. The box is defined by its upper left corner (xmin, ymin) and its width and height.

Used in: LocationData

optional int32 xmin = 1
optional int32 ymin = 2
optional int32 width = 3
optional int32 height = 4

The supported formats for representing location data. A single location must store its data in exactly one way.

Used in: LocationData

GLOBAL = 0
The full image. This is a handy format when one needs to refer to the full image, e.g. one uses global image labels. No other fields need to be populated.
BOUNDING_BOX = 1
A rectangle aka bounding box of an object. The field bounding_box must be used to store the location data.
RELATIVE_BOUNDING_BOX = 2
A rectangle aka bounding box of an object, defined in coordinates normalized by the image dimensions. The field relative_bounding_box must be used to store the location data.
MASK = 3
A foreground mask. The field mask must be used to store the location data.

A bounding box. The box is defined by its upper left corner (xmin, ymin) and its width and height, all in coordinates normalized by the image dimensions.

Used in: LocationData

optional float xmin = 1
optional float ymin = 2
optional float width = 3
optional float height = 4

A keypoint. The keypoint is defined by the coordinates (x, y), normalized by the image dimensions.

Used in: LocationData

optional float x = 1
optional float y = 2
optional string keypoint_label = 3
optional float score = 4

A way to identify a part of an image. A locus does not need to correspond to a subset of pixels -- e.g. for a local descriptor we might define a locus in terms of its location and scale, even if the support of the descriptor is the entire image (with location-dependent weighting).

optional Locus.LocusType locus_type = 1
optional fixed64 locus_id = 2
A unique identifier for the locus. It is meaningless to compare the locus_ids in different images. The client should not also assume that applying the same processing to the same image multiple times will produce the same locus_id.
optional fixed64 locus_id_seed = 6
optional bool concatenatable = 5
"Concatenatable" loci have the property that they appear in the same number and order for all images, so their corresponding features can be concatenated. Examples of concatenatable loci include global loci, those corresponding to fixed bounding boxes, or a single most salient region. Loci produced by segmentation with a variable number of segments, on the other hand, are not concatenatable. This flag is true by default.
optional BoundingBox bounding_box = 3
Required if locus_type = BOUNDING_BOX, Specifies a bounding box for the label
optional int32 timestamp = 7
Specifies a timestamp if this locus appears in a video. timestamp is specified in mSec from start of the video and refers to the begining of the locus.
optional Rasterization region = 4
Required if locus_type = REGION, Specifies a region using a scanline encoding
repeated Locus component_locus = 8
Required if locus_type = VIDEO_TUBE. Specifies the component loci of the tube.

Types of image loci on the granularity of the annotation.

Used in: Locus

GLOBAL = 1
The whole image, without localization.
BOUNDING_BOX = 2
The locus refers to a specified bounding box. Requires bounding_box below.
REGION = 3
The locus refers to specified regions in the image. Requires region below.
VIDEO_TUBE = 4
This locus refers to groups of loci. Requires component_locus below.

optional LogicCalculatorOptions.Operation op = 1
optional bool negate = 2
Whether to negate the result.
repeated bool input_value = 3
Optional bool input values.

The logical operation to apply.

Used in: LogicCalculatorOptions

AND = 0
OR = 1
XOR = 2

optional MaskOverlayCalculatorOptions.MaskChannel mask_channel = 1
Selects which channel of the MASK input to use for masking.

Used in: MaskOverlayCalculatorOptions

UNKNOWN = 0
RED = 1
ALPHA = 2

Proto for serializing Matrix data. Data are stored in column-major order by default.

optional int32 rows = 1
optional int32 cols = 2
repeated float packed_data = 3
optional MatrixData.Layout layout = 4
Order in which the data are stored. Defaults to COLUMN_MAJOR, which matches the default for mediapipe::Matrix and Eigen::Matrix*.

Used in: MatrixData

COLUMN_MAJOR = 0
ROW_MAJOR = 1

Options used by a MediaPipe object.

Used in: CalculatorGraphConfig, ExecutorConfig, InputStreamHandlerConfig, OutputStreamHandlerConfig, StatusHandlerConfig

(message has no fields)

Used in: MfccCalculatorOptions

optional int32 channel_count = 1
Total number of frequency bands to use.
optional float min_frequency_hertz = 2
Lower edge of lowest triangular Mel band.
optional float max_frequency_hertz = 3
Upper edge of highest triangular Mel band.

Stores offsets for random seek and time offsets for each frame of TrackingData. Stream offsets are specified relative w.r.t. end of metadata blob. Offsets specify start of the corresponding binary encoded TrackingContainer (for TrackingContainerFormat) or BinaryTrackingData proto (for TrackingContainerProto).

TrackingContainer::header = "META"

Used in: TrackingContainerProto

optional fixed32 num_frames = 2
repeated MetaData.TrackOffset track_offsets = 3

Used in: MetaData

optional fixed32 msec = 1
Time offset of the metadata in msec.
optional fixed32 stream_offset = 2
Offset of TrackingContainer or

optional MelSpectrumCalculatorOptions mel_spectrum_params = 1
Specification of the underlying mel filterbank.
optional uint32 mfcc_count = 2
How many MFCC coefficients to emit.

repeated AffineModel model = 1

Used in: ToneChange

repeated AffineToneModel model = 1

Used in: ToneChange

repeated GainBiasModel model = 1

Used in: CameraMotion

repeated Homography model = 1
optional MixtureHomography.VariableDOF dof = 2

Specifies which degree of freedom vary across mixture. Can be used to implement several transformation functions quicker.

Used in: MixtureHomography

ALL_DOF = 0
All dof are variable.
TRANSLATION_DOF = 1
Only translation (h_02, h_12) varies.
SKEW_ROTATION_DOF = 2
Translation (h_02, h_12), and skew-rotation
CONST_DOF = 3
(h_01, h_10) vary.
Mixture is constant.

Mixture models with higher degrees of freedom, according to \sum_i model(i) * weight(i), where weights are passed during transform and are expected to sum to one.

repeated LinearSimilarityModel model = 1

Next tag: 10

optional MotionAnalysisOptions analysis_options = 1
optional MotionAnalysisCalculatorOptions.SelectionAnalysis selection_analysis = 4
optional bool hybrid_selection_camera = 5
If activated when SELECTION input is activated, will replace the computed camera motion (for any of the ANALYSIS_* case above) with the one supplied by the frame selection, in case the frame selection one is more stable. For example, if recomputed camera motion is unstable but the one from the selection result is stable, will use the stable result instead.
optional MotionAnalysisCalculatorOptions.MetaAnalysis meta_analysis = 8
optional int32 meta_models_per_frame = 6
Determines number of homography models per frame stored in the CSV file or the homography metadata in META. For values > 1, MixtureHomographies are created.
optional float meta_outlier_domain_ratio = 9
Used for META_ANALYSIS_HYBRID. Rejects features which flow deviates domain_ratio * image diagonal size from the ground truth metadata motion.
optional bool bypass_mode = 7
If true, the MotionAnalysisCalculator will skip all processing and emit no packets on any output. This is useful for quickly creating different versions of a MediaPipe graph without changing its structure, assuming that downstream calculators can handle missing input packets. TODO: Remove this hack. See b/36485206 for more details.

Determines how optional input META is used to compute the final camera motion.

Used in: MotionAnalysisCalculatorOptions

META_ANALYSIS_USE_META = 1
Uses metadata supplied motions as is.
META_ANALYSIS_HYBRID = 2
Seeds visual tracking from metadata motions - estimates visual residual motion and combines with metadata.

Determines how optional input SELECTION (if present) is used to compute the final camera motion.

Used in: MotionAnalysisCalculatorOptions

ANALYSIS_RECOMPUTE = 1
Recompute camera motion for selected frame neighbors.
NO_ANALYSIS_USE_SELECTION = 2
Use composited camera motion and region flow from SELECTION input. No tracking or re-computation is performed. Note that in this case only CAMERA, FLOW and VIDEO_OUT tags are supported as output.
ANALYSIS_FROM_FEATURES = 3
Recompute camera motion for selected frame neighbors using features supplied by SELECTION input. No feature tracking is performed.
ANALYSIS_WITH_SEED = 4
Recomputes camera motion for selected frame neighbors but seeds initial transform with camera motion from SELECTION input.

Settings for MotionAnalysis. This class computes sparse, locally consistent flow (referred to as region flow), camera motions, and foreground saliency (i.e. likely foreground objects moving different from the background). Next tag: 16

Used in: MotionAnalysisCalculatorOptions

optional MotionAnalysisOptions.AnalysisPolicy analysis_policy = 14
optional RegionFlowComputationOptions flow_options = 1
Options for the actual motion stabilization (in order of object usage).
optional MotionEstimationOptions motion_options = 2
optional MotionSaliencyOptions saliency_options = 3
optional int32 estimation_clip_size = 4
Clip-size used for (parallelized) motion estimation.
optional bool subtract_camera_motion_from_features = 5
If set, camera motion is subtracted from features before output. Effectively outputs, residual motion w.r.t. background.
optional int32 track_index = 6
If flow_options().tracking_options().tracking_policy() equals POLICY_MULTI_FRAME, this flag indicates which RegionFlowFeatureList to use. Specifically, for frame C, we use the motion from C to C - 1 - track_index.
optional bool compute_motion_saliency = 7
If set, compute motion saliency (regions of moving foreground).
optional bool select_saliency_inliers = 8
Selects saliency inliers (only saliency locations with sufficient spatial and temporal support are kept). Only applied when compute_motion_saliency is set.
optional bool filter_saliency = 9
Performs spatio-temporal filtering of extracted foreground saliency. If used with above selection of saliency inliers, filtering is performed *after* inlier selection. Only applied when compute_motion_saliency is set.
optional bool post_irls_smoothing = 10
If set, irls weights of motion estimation are spatio-temporally smoothed after model estimation.
optional float rejection_transform_threshold = 13
If a rejection_transform is passed to AddFrameGeneric, features that do not agree with the transform within below threshold are removed.
optional MotionAnalysisOptions.VisualizationOptions visualization_options = 11
optional MotionAnalysisOptions.ForegroundOptions foreground_options = 12

Pre-configured policies for MotionAnalysis. For general use, it is recommended to select an appropiate policy instead of customizing flow and motion options by hand. Policies are being kept up to date with appropiate settings.

Used in: MotionAnalysisOptions

ANALYSIS_POLICY_LEGACY = 0
Default legacy options. Effectivley no op.
ANALYSIS_POLICY_VIDEO = 1
Use for video.
ANALYSIS_POLICY_VIDEO_MOBILE = 2
Use for video on mobile.
ANALYSIS_POLICY_CAMERA_MOBILE = 3
Use if applied to camera stream on mobile, e.g. low latency and high throughput. ASSUMES DOWNSAMPLED INPUT, e.g. from GPU.
ANALYSIS_POLICY_HYPERLAPSE = 4
Use for sped up video / hyperlapse when adding frames with seeds and rejection transforms. Mostly ups temporal consistency weights and relaxes stability constraints. Only recommended to be used as second pass after initial MotionAnalysis and FrameSelection.

Describes how to compute foreground from features.

Used in: MotionAnalysisOptions

optional float foreground_threshold = 1
Indicates the *inverse* registration error (i.e. the irls weight) that is deemed a complete inlier. Weights in the interval [0, foreground_threshold] (corresponding to pixel errors in the interval [1 / foreground_threshold, inf]) are mappend to 1 - [0, 1], i.e. foreground threshold is mapped to zero with weights below the threshold being assigned values > 0. Therefore, larger values will increase amount of detected foreground as well as noise.
optional float foreground_gamma = 2
By using foreground_gamma < 1.0 you can increase resolution of small foreground motion at the expense of the resolution of large foreground motions.
optional bool threshold_coverage_scaling = 3
Threshold is scaled by coverage, i.e. for frames with large registration error less forground is visualized.

Adapts visualization for rendered_results when passed to GetResults.

Used in: MotionAnalysisOptions

optional bool visualize_region_flow_features = 1
Visualizes tracked region flow features, colored w.r.t. fitting error.
optional bool visualize_salient_points = 2
Visualizes salient points. Only applicable is compute_motion_saliency is set to true.
optional int32 line_thickness = 5
Line thickness of ellipse when rendering salient points.
optional bool foreground_jet_coloring = 3
Instead of green burn in uses jet coloring to indicate magnitude of foreground motion.
optional bool visualize_blur_analysis_region = 4
If set, only keeps masks of pixels that is used for blur analysis, rest is set to zero.
optional bool visualize_stats = 6
optional int32 min_long_feature_track = 7
Only long feature tracks with specified minimum length are rendered. Set to zero to consider all tracks.
optional int32 max_long_feature_points = 8
Only the last N points of a long feature track are rendered. Set to zero to render all points.

Captures additional internal state info about the tracking.

Used in: MotionBoxState

repeated float pos_x = 1
Stores all motion vectors that were used for tracking as packed arrays, capturing position, object motion, camera motion, tracking id and corresponding inlier weight.
repeated float pos_y = 2
repeated float dx = 3
repeated float dy = 4
repeated float camera_dx = 5
repeated float camera_dy = 6
repeated int32 track_id = 7
repeated float inlier_score = 8
Within [0, 1]. 0 = outlier; 1 = inlier.

Next tag: 38

optional float pos_x = 1
Position (top-left corner) and fixed size of the current MotionBox, specified w.r.t. normalized domain (in [0, 1] along both dimensions).
optional float pos_y = 2
optional float width = 3
optional float height = 4
optional float scale = 5
Optional degrees of freedom; scale and rotation w.r.t. center of the box, i.e. [pos_x, pos_y] + 0.5 * [width, height]. To activate see TrackStepOptions::TrackingDegrees.
optional float rotation = 30
in radians.
optional MotionBoxState.Quad quad = 34
This field is only used when we try to track under TRACKING_DEGREE_OBJECT_PERSPECTIVE.
optional float aspect_ratio = 35
Aspect ratio (width / height) for the tracked rectangle in physical space.
optional bool request_grouping = 37
Whether we want this box to be potentially grouped with other boxes to track together. This is useful for tracking small boxes that lie on a plane. For example, when we detect a plane, track the plane, then all boxes within the plane can share the same homography transform.
optional Homography pnp_homography = 36
For quad tracking using pnp solver, Whether we use perspective-n-points to track quad between frames. That mode requires: 1. The quad which is being tracked is an rectangle in the physical world. 2. The `asepct_ratio` field has to be set in MotionBoxState.
optional float dx = 7
Object velocity in x and y, specified as normalized spatial unit per standard frame period (here calibrated w.r.t. kTrackingDefaultFps = 30 FPS), that is 33.3 ms. Object velocity refers to velocity after subtracting camera motion. If current frame period is 66.67 ms (i.e. 15 fps); actual velocity is obtained by multipling with a factor of 2. Similar for 60 fps factor is 0.5f. Standard frame period is chosen for legacy reasons to keep TrackStepOptions defaults.
optional float dy = 8
optional float kinetic_energy = 17
Weighted average of object velocity magnitude of inlier points (expressed in normalized spatial units per standard frame period).
optional float prior_weight = 9
Specifies how valid the prior was in the last step.
optional MotionBoxState.TrackStatus track_status = 10
optional int32 spatial_prior_grid_size = 11
Spatial prior (presence of inliers, i.e. where is the object located within the box that is currently being tracked) as a pair of a) prior (in [0, 1]) and b) confidence (number of features converted to score within [0, 1]). Prior is defined over a grid of size spatial_prior_grid_size x spatial_prior_grid_size.
repeated float spatial_prior = 12
repeated float spatial_confidence = 13
optional float prior_diff = 14
Difference score between previous prior and current prior (in [0, 1]). Currently not used.
optional float motion_disparity = 15
Score determining how much predicted motion disagrees with measured motion. If measured motion deviates strongly from predicted motion, disparity is +/-1, if motion agrees with predicted motion, disparity is 0. Sign indicates measured motion is accelerating (> 0) or de-accelerating (< 0) w.r.t. predicted motion.
optional float background_discrimination = 16
Score determining how discriminative estimated motion model is. In [0, 1] where 0 no discrimination w.r.t. background and 1 high discrimination.
optional float inlier_center_x = 18
Center of mass for inliers after tracking (center of feature that were used for motion estimation)
optional float inlier_center_y = 19
optional float inlier_sum = 24
Approximate number of inliers (each features scores a zero [outlier] or one [inlier]).
optional float inlier_ratio = 25
Ratio of above inlier_sum to average inlier_sum across last states.
optional float inlier_width = 22
Extent (width and height of inliers).
optional float inlier_height = 23
repeated uint32 inlier_ids = 26
Set of current inlier tracking ids.
repeated uint32 inlier_id_match_pos = 31
Corresponding x,y coordinates for each inlier.
repeated uint32 inlier_length = 27
Corresponding inlier score (currently: length of inlier observed).
repeated uint32 outlier_ids = 28
Set of outlier ids.
repeated uint32 outlier_id_match_pos = 32
Corresponding x,y coordinates for each outlier.
optional float tracking_confidence = 33
Confidence of box tracked in the range [0, 1], with 0 being least confident, and 1 being most confident. A reasonable threshold is 0.5 to filter out unconfident boxes.
optional MotionBoxInternalState internal = 29
Additional internal state.

Used in: MotionBoxState, TimedBoxProto

repeated float vertices = 1
Vertex 0 is according to x_0 = vertices(0), y_0 = vertices(1) Vertex 1 is according to x_1 = vertices(2), y_1 = vertices(3) Vertex 2 is according to x_2 = vertices(4), y_2 = vertices(5) Vertex 3 is according to x_3 = vertices(6), y_3 = vertices(7) Order of vertices should be aligned in counter-clockwise manner 0---------3 | | | | 1---------2

Tracking status indicating result of tracking: UNTRACKED: Box can not be tracked (either out of bound or too many tracking failures). EMPTY: Box has size of <= 0 along at least on of its dimensions (collapsed). NO_FEATURES: No features found within the box, tracking is not possible. TRACKED: Successful tracking. DUPLICATED: Successful tracked, but duplicated from previous result as frame was duplicated. BOX_TRACKED_OUT_OF_BOUND: Successful tracked, out of bound from screen area. Will advance by camera motion. Only used for static objects.

Used in: MotionBoxState

BOX_UNTRACKED = 0
BOX_EMPTY = 1
BOX_NO_FEATURES = 2
BOX_TRACKED = 3
BOX_DUPLICATED = 4
BOX_TRACKED_OUT_OF_BOUND = 5

Note: In general for Estimation modes, the prefix are used as follows: L2: minimize squared norm of error IRLS: iterative reweighted least square, L2 minimization using multiple iterations, downweighting outliers. Next tag: 69

Used in: MotionAnalysisOptions

optional bool estimate_translation_irls = 1
Specifies which camera models should be estimated, translation is always estimated.
optional MotionEstimationOptions.LinearSimilarityEstimation linear_similarity_estimation = 3
optional MotionEstimationOptions.AffineEstimation affine_estimation = 30
optional MotionEstimationOptions.HomographyEstimation homography_estimation = 5
optional bool homography_exact_denominator_scaling = 53
By default, homography estimation minimizes an objective that is not strictly the L2 distance between matched points. If the flag is set, each row of the linear system is scaled with the exact denominator which results in an objective that minimizes the L2 distance.
optional bool use_exact_homography_estimation = 54
Per default, we use exact solver for over-determined system using well-conditioned QR decomposition. For better speed, set value to false to use estimation via normal equations.
optional bool use_highest_accuracy_for_normal_equations = 55
If set uses double instead of float when computing normal equations.
optional float homography_perspective_regularizer = 61
Regularizer for perspective part of the homography. If zero, no regularization is performed. Should be >= 0.
optional MotionEstimationOptions.MixtureHomographyEstimation mix_homography_estimation = 12
optional int32 num_mixtures = 13
If row-wise mixture models are estimated, determines number of them. Note, changing number of mixtures, interpolation sigma and regularizer is very likely to impact the stability analysis for mixtures and rolling shutter scoring. At least MixtureHomographyBounds would need to be adjusted to the new values.
optional float mixture_row_sigma = 14
If row-wise mixture models are estimated, determines how much each point is influenced by its neigbhoring mixtures. Specified as relative sigma (standard deviation) w.r.t. frame_height.
optional float mixture_regularizer = 15
Mixture estimation uses L2 regularizer to assure that adjacent mixture models are similar.
optional float mixture_regularizer_levels = 42
Mixtures are estimated across a spectrum of exponentially increasingly regularizers. In particular the regularizer at level L is given as mixture_regularizer * mixture_regularizer_base^L. A maximum of 10 levels are supported (checked!). Note: When changing the number of levels you probably want to adapt the MotionStabilizationOptions::rolling_shutter_increment value as well, as the number of levels directly controls the highest threshold for the rolling shutter index analysis.
optional float mixture_regularizer_base = 43
optional int32 mixture_rs_analysis_level = 44
optional int32 irls_rounds = 17
IRLS rounds to down-weight outliers (default across all models). Note: IRLS in combination with full mixture models (as opposed to the default reduced ones) is somewhat expensive.
optional float irls_prior_scale = 50
If set to > 0 (always needs be less than 1.0), influence of supplied prior irls weights is linearlly decreased from the specified prior scale (weight 1.0) to prior_scale. Effectively, biases the solution to the supplied prior features. Note: Without irls_weights_preinitialized set to true, this option is effectively a no op. TODO: Retire this option.
optional float irls_motion_magnitude_fraction = 31
Determine how to normalize irls weights w.r.t. average motion magnitude. In general a residual of 1 pixel is assigned an IRLS weight of 1. However as larger motions in general are affected by a larger error, we normalize irls weights, such that a residual of distance of irls_motion_magnitude_fraction times <average translation magnitude> equals an IRLS weight of 1. Must be larger than zero.
optional float irls_mixture_fraction_scale = 68
Scale that is applied for mixture (where error is expected to be bigger).
optional bool irls_weights_preinitialized = 39
By default, irls weight of all features are set uniformly to one before estimating EACH model, refining them in subsequent irls iterations. If flag below is set, input irls weights are used instead for each motion model.
optional bool filter_initialized_irls_weights = 67
If weights are pre-initialized optionally min filter weights along track ids when long tracks are used. This can be used to consistently label outliers in time before estimation.
optional MotionEstimationOptions.IrlsOutlierInitialization irls_initialization = 56
optional bool feature_density_normalization = 62
Normalizes feature's irls weights prior to estimation such that feature in high density areas are downweighted. Multiplicative in case irls_weights_preinitialized is set to true.
optional int32 feature_mask_size = 63
A regular grid of size feature_mask_size x feature_mask_size is used to normalize features w.r.t. their density.
optional MotionEstimationOptions.LongFeatureInitialization long_feature_initialization = 66
optional MotionEstimationOptions.IrlsMaskOptions irls_mask_options = 57
optional MotionEstimationOptions.JointTrackEstimationOptions joint_track_estimation = 59
optional MotionEstimationOptions.LongFeatureBiasOptions long_feature_bias_options = 64
optional MotionEstimationOptions.EstimationPolicy estimation_policy = 58
optional int32 coverage_grid_size = 51
optional MotionEstimationOptions.MixtureModelMode mixture_model_mode = 23
optional bool use_only_lin_sim_inliers_for_homography = 6
If specified, only features that agree with the estimated linear similarity will be used to estimate the homography. If set, linear_similarity_estimation can not be ESTIMATION_NONE! (checked)
optional float lin_sim_inlier_threshold = 20
Max. deviation to be considered an inlier w.r.t. estimated similarity for above flag. This value is set w.r.t. normalized frame diameter. TODO: Should take GetIRLSResidualScale into account.
optional MotionEstimationOptions.TranslationBounds stable_translation_bounds = 32
optional MotionEstimationOptions.SimilarityBounds stable_similarity_bounds = 33
optional MotionEstimationOptions.HomographyBounds stable_homography_bounds = 11
optional MotionEstimationOptions.MixtureHomographyBounds stable_mixture_homography_bounds = 34
optional float strict_coverage_scale = 41
Scale for stricter coverage evaluation. Used for rolling shutter guess computation, by only using high quality inliers. Larger values reflect stricter coverage. Specifically, when computing coverage via GridCoverage call, frac_inlier_threshold is reduced (divided) by specified scale below.
optional bool label_empty_frames_as_valid = 22
By default frames with zero trackable features (e.g. at the beginning, empty frame or shot boundary) are set identity model but still labeled as valid. If set to false, these frames are flagged as invalid, which can be useful to locate shot boundaries, etc.
optional float feature_grid_size = 24
Setting for temporal smoothing of irls weights in optional post-processing step. In normalized coordinates w.r.t. frame domain.
optional float spatial_sigma = 25
optional int32 temporal_irls_diameter = 26
Frame diameter across which smoothing is performed.
optional float temporal_sigma = 27
in frames.
optional float feature_sigma = 28
Bilateral weight (for un-normalized color domain [0, .. 255]).
optional bool filter_5_taps = 29
If set to false 3 taps are used.
optional bool frame_confidence_weighting = 48
If set, during temporal smoothing, each frame is weighted by its confidence, defined as the square coverage (or square mean mixture coverage). Therefore, low confidence fits do not errornouesly propagate over time. In addition, if the confidence is below the specified confidence_threshold (relative the the maximum coverage observed in the test interval), irls weights are reset to 1, i.e. biased to be agree with the (unkown) background motion.
optional float reset_confidence_threshold = 49
optional MotionEstimationOptions.IRLSWeightFilter irls_weight_filter = 35
Calls TextureFilteredRegionFlowFeatureIRLSWeights on computed irls weights before smoothing them.
optional bool overlay_detection = 36
Attempts to detect overlays, i.e. static elements burned-into the video that potentially corrupt motion estimation.
optional int32 overlay_analysis_chunk_size = 37
Overlay detection is performed over specified number of frames.
optional MotionEstimationOptions.OverlayDetectionOptions overlay_detection_options = 38
optional MotionEstimationOptions.ShotBoundaryOptions shot_boundary_options = 60
optional bool output_refined_irls_weights = 40
By default, irls weights of each feature are overwritten with refined irls weights of the last iteration for the highest degree of freedom model that was estimated stable. If set to false, original irls weights are retained. Note: If overlay detection is activated, features to be deemed overlays have their irls weight set to zero, regardless of this setting. Similarily, an IRLSWeightFilter is applied if requested, regardless of this setting.
optional MotionEstimationOptions.HomographyIrlsWeightInitialization homography_irls_weight_initialization = 45
IRLS weights for homography estimation are initialized based on the specified options. If, options irls_weights_preinitialized is set, weights are multiplied instead of reset.
optional bool irls_use_l0_norm = 46
If set to false use L1 norm irls weights instead of L0 norm irls weights.
optional bool domain_limited_irls_scaling = 65
IRLS weights are determined in a limited domain (in particular helpful for stabilization analysis on HD videos). TODO: Make this the default.
optional bool deactivate_stable_motion_estimation = 47
For comparison and debugging purposes. Simply estimates requested models without checking their stability via the stable_*_bounds parameters. However, invertibility is still checked to avoid invalid data being passed to later stages of the stabilizer.
optional bool project_valid_motions_down = 52
Projects higher order motions if estimated correctly down to lower order motions, therefore replacing the previously estimated motions.
optional bool estimate_similarity = 2
DEPRECATED functionality. Use static functions as indicated instead. Non-linear similarity, use MotionEstimation::EstimateSimilarityModelL2.

Used in: MotionEstimationOptions

ESTIMATION_AFFINE_NONE = 0
ESTIMATION_AFFINE_L2 = 1
ESTIMATION_AFFINE_IRLS = 2

Controls how multiple models via EstimateMotionsParallel are estimated.

Used in: MotionEstimationOptions

INDEPENDENT_PARALLEL = 1
Models are estimated independently across
TEMPORAL_IRLS_MASK = 2
frames in parallel.
Previous frame's estimation biases
TEMPORAL_LONG_FEATURE_BIAS = 4
current one, controlled via above IrlsMaskOptions.
Frame's estimation is biased along
JOINTLY_FROM_TRACKS = 3
long features, controlled via above LongFeatureBiasOptions.
Estimation is performed jointly over

If any parameter of the estimated homography exceeds these bounds, we deem it UNSTABLE_SIM and use estimated similarity instead.

Used in: MotionEstimationOptions

optional float lower_scale = 1
optional float upper_scale = 2
1 / 0.8.
optional float limit_rotation = 3
15 degrees.
optional float limit_perspective = 4
optional float registration_threshold = 5
Inlier coverage is only tested for if average homography error exceeds registration_thresholds. Max of the following two thresholds is used. Absolute in pixels.
optional float frac_registration_threshold = 8
Scaled by frame diameter.
optional float min_inlier_coverage = 6
Minimum fraction of inlier features w.r.t. frame area.
optional float frac_inlier_threshold = 7
Grid coverage inlier threshold. Pixel errors below this threshold are considered inliers. Defined w.r.t. frame diameter, approx. 1.5 for 16:9 SD video (480p), i.e. threshold is multiplied by frame diameter.

Used in: MotionEstimationOptions

ESTIMATION_HOMOG_NONE = 0
ESTIMATION_HOMOG_L2 = 1
ESTIMATION_HOMOG_IRLS = 2

Weight initialization for homography estimation. This is to bias homography estimation either to foreground or background.

Used in: MotionEstimationOptions

IRLS_WEIGHT_CONSTANT_ONE = 1
Constant, treat all features equally.
IRLS_WEIGHT_CENTER_GAUSSIAN = 2
Weight features in the center higher.
IRLS_WEIGHT_PERIMETER_GAUSSIAN = 3
Tends to lock onto foreground.
Weight features around the

Filters irls weights before smoothing them according to specified operation.

Used in: MotionEstimationOptions

IRLS_FILTER_NONE = 0
IRLS_FILTER_TEXTURE = 1
IRLS_FILTER_CORNER_RESPONSE = 2

Irls initialization can be performed in a temporal depdent manner, (if estimation_policy() == TEMPORALLY_DEPENDENT), where the previous frame's motion estimation biases the IrlsInitialization of the currently processed frame. In particular the location and magnitude of inliers is used during the RANSAC selection stage, to favor those features that agree with the prior, represented as confidence mask of inliers (using same dimension as above feature_mask_size). After estimation, the prior is updated.

Used in: MotionEstimationOptions

optional float decay = 2
Amount prior is decayed after each iteration.
optional float inlier_score = 3
Score that each inlier adds to the current prior. Specified w.r.t. total number of features, i.e. each feature increases a bins score by inlier_score.
optional float base_score = 4
Each inlier scores at least this value regardless of the inlier mask (additive).
optional float min_translation_norm = 5
Motions are scored relative to previous motion. Threshold denotes absolute minimum of denominator.
optional float translation_blend_alpha = 6
Translation is updated in every step by blending it with the previous estimated translation. (alpha is within 0 to 1, where 0 indicates to use only measured translation, i.e. no blending).
optional float translation_prior_increase = 7
Every time translation is updated, prior (in [0, 1]) is increased by the specified amount.

If activated, irls weight of outlier features are reset. Outliers are defined as those features, for which the best model fit after #rounds iterations of RANSAC did NOT yield an error lower than cutoff. Only applies to translation and similarity estimation.

Used in: MotionEstimationOptions

optional bool activated = 1
optional int32 rounds = 2
optional float cutoff = 3

Describes how long feature tracks are leveraged for joint estimation across many frames.

Used in: MotionEstimationOptions

optional int32 num_motion_models = 1
For each frame-pair motion model, describing the motion between frame I and I - 1, estimate in addition several additional motion models along long feature tracks describing the motion between frame I and I - k * motion_stride (additional models are not output, but help to filter irls weights). Specifies total number of estimated motion models per frame-pair. Must be greater than zero.
optional int32 motion_stride = 2
Spacing in frames for additional motion models.
optional bool temporal_smoothing = 3
If set, performs temporal smoothing across frames of the obtained irls weights.

Used in: MotionEstimationOptions

ESTIMATION_LS_NONE = 0
ESTIMATION_LS_L2 = 1
L2 estimation
ESTIMATION_LS_IRLS = 4
good performance, robust to outliers.
ESTIMATION_LS_L2_RANSAC = 2
DEPRECATED modes.
DEPRECATED, use IRLS instead.
ESTIMATION_LS_L1 = 3
DEPRECATED, use IRLS instead, or static

Options being used to bias IRLS features if estimation mode TEMPORAL_LONG_FEATURE_BIAS is being used. Next Tag: 15

Used in: MotionEstimationOptions

optional int32 total_rounds = 13
Estimation is performed multiple times, alternating between model estimation and smooth temporal feature biasing for the specified number of rounds.
optional float inlier_bias = 1
Controls how fast the bias for a track gets updated, in case feature is an inlier. Use higher values for less decay of background motion over time.
optional float outlier_bias = 2
Same as above for outliers (or features with low prior), i.e those that got recently seeded.
optional int32 num_irls_observations = 3
Number of elements after which we deem estimation to be stable. Used to control weight of bias if fewer than the specified number have been observed. Also used as maximum ring buffer size (only most recent number of observations are kept). Must be > 0.
optional float max_irls_change_ratio = 4
Change in irls weight magnitude (from outlier to inlier) above which we reset the current bias.
optional float inlier_irls_weight = 5
Irls weight above which we consider it to be an inlier for bias update purposes (see above inlier and outlier bias). By default, outliers are allowed to update their bias faster than inliers. Must be > 0.
optional float bias_stdev = 12
Standard deviation used during feature initialization. Current bias of a track is used to pre-weight features via gaussian weighting with specified standard deviation.
optional bool use_spatial_bias = 6
When seeding new tracks (on the first frame), we bilaterally pool neighboring feature biases as seed. Details are controlled by options below. If false, the feature's estimation error is used instead (faster, but less spatially smooth). If activated it is advised to use a patch descriptor radius of at least 20 pixels.
optional float grid_size = 7
Newly observered tracks's biases are seeded by similar looking features in close spatial proximity. For efficieny a grid is used to determine proximity. Grid size in normalized coordinates w.r.t. frame domain.
optional float spatial_sigma = 8
Sigma's for combining feature biases.
optional float color_sigma = 9
optional int32 long_track_threshold = 10
Defines what we consider to be a long track. Features spawned around locations of similar looking long tracks are considered to have high prior, e.g. their initilization is given more weight.
optional float long_track_confidence_fraction = 11
Determines with fraction of long tracks is considered to be sufficient for highly confident bias seed.
optional bool seed_priors_from_bias = 14
If activated, uses the irls weights from the estimation of the lower degree of freedom model to seed the bias of the higher degree of freedom model. This improves rigidity of the computed motion.

In addition to above outlier and density initialization, long features that are present for a specified ratio of the analysis interval can be upweighted. This greatly improves temporal consistency.

Used in: MotionEstimationOptions

optional bool activated = 1
optional float min_length_percentile = 2
Tracks with a length greater of equal to the specified percentile are upweighted by the specified upweight_multiplier.
optional float upweight_multiplier = 3
Features passing above test have their irls weight increased by the specified multiplier prior to estimation.

If any parameter of the estimated homography mixture exceeds these bounds, we deem it UNSTABLE_HOMOG and use the estimated homography instead.

Used in: MotionEstimationOptions

optional float min_inlier_coverage = 1
Minimum fraction of inlier features w.r.t. block area.
optional int32 max_adjacent_outlier_blocks = 2
Each block is tested to be stable, regarding the outliers. A frame is labeled unstable, if more or equal than the specified adjacent blocks are labeled outliers.
optional int32 max_adjacent_empty_blocks = 3
Maximum number of adjacent empty blocks (no inliers).
optional float frac_inlier_threshold = 7
Grid coverage threshold inlier threshold. See identical parameter in HomographyBounds.

Note: Mixture models have high DOF are much more affected by outliers than models above. It is recommended that if IRLS estimation is NOT used, that mixture_regularizer is increased by a factor >=3.

Used in: MotionEstimationOptions

ESTIMATION_HOMOG_MIX_NONE = 0
ESTIMATION_HOMOG_MIX_L2 = 1
ESTIMATION_HOMOG_MIX_IRLS = 2
robust to outliers.

Degree of freedom of estimated homography mixtures. If desired, specific parts of the homography can be held constant across the mixture. For fast draft TRANSLATION_MIXTURE is recommended, for high quality SKEW_ROTATION_MIXTURE.

Used in: MotionEstimationOptions

FULL_MIXTURE = 0
8 dof * num_mixtures
TRANSLATION_MIXTURE = 1
6 dof + 2 dof * num_mixtures
SKEW_ROTATION_MIXTURE = 2
4 dof + 4 dof * num_mixtures

Used in: MotionEstimationOptions

optional int32 analysis_mask_size = 1
Potential overlay features are aggregated over a mask with cells mask_size x mask_size as specified below.
optional float strict_near_zero_motion = 2
A feature is a strict overlay feature if its motion is less than near_zero_motion and AND less than max_translation_ratio times the estimated translation magnitude at that frame AND is texturedness is sufficiently high.
optional float strict_max_translation_ratio = 3
optional float strict_min_texturedness = 5
Minimum texturedness of a feature to be considered an overlay. Motivation: Overlays are mostly text or graphics, i.e. have visually distinguished features.
optional float loose_near_zero_motion = 4
A feature is a loose overlay feature if its motion is less than loose_near_zero_motion.
optional float overlay_min_ratio = 6
Minimum fraction of strict overlay features within a cell to be considered an overlay cell.
optional float overlay_min_features = 7
Absolute minimum number of strict overlay features within a cell to be considered an overlay cel..

Shot boundaries are introduced in 3 different scenarios: a) Frame has zero tracked features w.r.t. previous frame b) Estimated motion is deemed invalid (CameraMotion::INVALID). c) Visual consistency is above threshold of two adjacent frames.

Used in: MotionEstimationOptions

optional float motion_consistency_threshold = 1
After cases a & b are determined from features/camera motion, they are verified by ensuring visual consistency is above specified threshold, if visual consistency has been computed. Only if this is case will the frame be labeled as shot boundary. Motivation is, that there should always be some (even small) measurable increase in the frame difference at a shot boundary. Verification is only performed if visual_consistency has been evaluated (value >= 0).
optional float appearance_consistency_threshold = 2
Threshold for case c). Sometimes, motion estimation will miss shot boundaries. We define shot boundaries for which the visual consistency is higher than the specified threshold for at least two adjacent frames.

If any test/bound is violated, the motion is deemed UNSTABLE.

Used in: MotionEstimationOptions

optional bool only_stable_input = 1
Input frame has to be labeled stable, i.e. enough features and coverage present.
optional float min_inlier_fraction = 2
Minimum number of inlier features (absolute and as fraction of total number of features). TODO: Dataset run setting this to 0.15
optional float min_inliers = 3
optional float lower_scale = 4
Bounds on valid similarities. We use larger values compared to homographies. Note: Bounds are necessary, to guarantee invertability of the resulting similarity.
optional float upper_scale = 5
1 / 0.8.
optional float limit_rotation = 6
15 degrees.
optional float inlier_threshold = 7
Thresholds for a feature to be considered inlier w.r.t similarity transform, expressed in terms of pixel residual error. Max of absolute and fractional thresholds is used. Ratio of inliers that pass regular and strict thresholds are storred in CameraMotion. TODO: Just use lin_sim_inlier_threshold directly, however that recomputes the error, and requires regression testing. Using an extra fractional inlier threshold for now. Absolute in pixels.
optional float frac_inlier_threshold = 8
Scaled by frame diameter.
optional float strict_inlier_threshold = 9
TODO: Revisit after frame selection change. Absolute in pixels.

If any parameter of the input flow or estimated translation exceeds these thresholds we deem the motion INVALID.

Used in: MotionEstimationOptions

optional int32 min_features = 1
Absolute minimum of features present.
optional float frac_max_motion_magnitude = 2
Max magnitude of the translation expressed w.r.t. frame diameter
optional float max_motion_stdev_threshold = 4
Motion magnitude is only tested for if standard deviation of estimated translation exceeds threshold.
optional float max_motion_stdev = 3
Max standard deviation of the estimated translation (normalized to frame diameter).
optional float max_acceleration = 5
Maximum acceleration between frames. Specified relative to minimum velocity across two adjacent frames (absolute minimum of 0.001 is enforced, ~1 pix for 480p). If exceeded for one frame, the whole batch passed to EstimateMotionsParallel is labeled unstable.

Next tag: 17

Used in: MotionAnalysisOptions

optional float bound_left = 1
Standard normalized bounds and weights used to initialize salient points. See region_flow.proto for details.
optional float bound_bottom = 2
optional float bound_right = 15
optional float bound_top = 16
optional float saliency_weight = 3
optional bool scale_weight_by_flow_magnitude = 8
If set, scales saliency_weight by flow magnitude.
optional int32 min_features = 4
Minimum number of features within a region to be considered salient. Only applicable for functions accepting RegionFlowFrames.
optional bool use_only_foreground_regions = 9
If set, only considers regions flagged as forground.
optional float min_irls_mode_weight = 10
Specifies roughly number of foreground features mapped to one mode, for mode to be considered salient.
optional int32 num_top_irls_modes = 11
Only returns the top N irls modes.
optional float mode_band_width = 12
Mode finding is performed with a fraction radius of 10% of frame diameter by default.
optional int32 selection_frame_radius = 5
We filter salient points along the temporal dimension only, keeping those that have sufficient support (in form of neighboring salient points). For every salient point in frame n, all points in frames [n - filtering_frame_radius, n + filtering_frame_radius] are tested, whether they support the current test point.
optional float selection_support_distance = 6
Fractional distance to be considered a supporting salient point for a test point.
optional int32 selection_minimum_support = 7
Minimum number of supporting salient points that need to be present in order for a point to be considered an inlier.
optional float filtering_sigma_space = 13
Sigma in space (normalized domain).
optional float filtering_sigma_time = 14
Sigma in time (in frames).

Header for a multi-stream time series. Each packet in the associated stream is a vector<Matrix> of size num_streams. Each Matrix in the vector is as specified by the time_series_header field.

optional TimeSeriesHeader time_series_header = 1
optional int32 num_streams = 2

A proto2 calculator options for testing.

optional double frame_rate = 1
The output frame rate measured in frames per second.
optional NightLightCalculatorOptions.OutputHeader output_header = 2
Whether and what kind of header to place on the output stream.
optional double jitter = 4
Adds jitter to resampling if set, so that Google's sampling is not
repeated int64 base_timestamp = 5
If specified, output timestamps are aligned with base_timestamp.
optional bool round_limits = 8
If set, the output timestamps nearest to start_time and end_time
optional string format_string = 9
Format string used by string::Substitute to construct the output.

Used in: NightLightCalculatorOptions

NONE = 0
PASS_HEADER = 1
UPDATE_VIDEO_HEADER = 2

Options for NodeChainSubgraph.

optional string node_type = 1
The type of the node. The node must have exactly one input stream and exactly one output stream.
optional int32 chain_length = 2
How many copies of the node should be chained in series.

Options to NonMaxSuppression calculator, which performs non-maximum suppression on a set of detections.

optional int32 num_detection_streams = 1
Number of input streams. Each input stream should contain a vector of detections.
optional int32 max_num_detections = 2
Maximum number of detections to be returned. If -1, then all detections are returned.
optional float min_score_threshold = 6
Minimum score of detections to be returned.
optional float min_suppression_threshold = 3
Jaccard similarity threshold for suppression -- a detection would suppress all other detections whose scores are lower and overlap by at least the specified threshold.
optional NonMaxSuppressionCalculatorOptions.OverlapType overlap_type = 4
optional bool return_empty_detections = 5
Whether to put empty detection vector in output stream.
optional NonMaxSuppressionCalculatorOptions.NmsAlgorithm algorithm = 7

Algorithms that can be used to apply non-maximum suppression.

Used in: NonMaxSuppressionCalculatorOptions

DEFAULT = 0
WEIGHTED = 1
Only supports relative bounding box for weighted NMS.

During the overlap computation, which is used to determine whether a rectangle suppresses another rectangle, one can use the Jaccard similarity, defined as the ration of the intersection over union of the two rectangles. Alternatively a modified version of Jaccard can be used, where the normalization is done by the area of the rectangle being checked for suppression.

Used in: NonMaxSuppressionCalculatorOptions

UNSPECIFIED_OVERLAP_TYPE = 0
JACCARD = 1
MODIFIED_JACCARD = 2
INTERSECTION_OVER_UNION = 3

A normalized version of above Landmark proto. All coordinates should be within [0, 1].

Used in: NormalizedLandmarkList

optional float x = 1
optional float y = 2
optional float z = 3
optional float visibility = 4
optional float presence = 5

Group of NormalizedLandmark protos.

repeated NormalizedLandmark landmark = 1

A rectangle with rotation in normalized coordinates. The values of box center location and size are within [0, 1].

required float x_center = 1
Location of the center of the rectangle in image coordinates. The (0.0, 0.0) point is at the (top, left) corner.
required float y_center = 2
required float height = 3
Size of the rectangle.
required float width = 4
optional float rotation = 5
Rotation angle is clockwise in radians.
optional int64 rect_id = 6
Optional unique id to help associate different NormalizedRects to each other.

optional bool apply_orientation_from_exif_data = 1
If set, we will attempt to automatically apply the orientation specified by the image's EXIF data when loading the image. Otherwise, the image data will be loaded as-is.

optional int32 quality = 1
Quality of the encoding. An integer between (0, 100].

TODO: Consider renaming it to EncodedImage.

optional bytes encoded_image = 1
Pixel data encoded as JPEG.
optional int32 height = 2
Height of the image data under #1 once decoded.
optional int32 width = 3
Width of the image data under #1 once decoded.
optional OpenCvImageEncoderCalculatorResults.ColorSpace colorspace = 4
Color space used.

Used in: OpenCvImageEncoderCalculatorResults

UNKNOWN = 0
GRAYSCALE = 1
RGB = 2

optional string codec = 1
The 4-character code of the codec to encode the video.
optional string video_format = 2
The video format of the output video file.
optional double fps = 3
The frame rate in Hz at which the video frames are output.
optional int32 width = 4
Dimensions of the video in pixels.
optional int32 height = 5

optional int32 width = 1
optional int32 height = 2
repeated float dx = 3
Stores the two channels of the flow field in raster order.
repeated float dy = 4

Settings specifying an output stream handler.

Used in: CalculatorGraphConfig, CalculatorGraphConfig.Node

optional string output_stream_handler = 1
Name of the registered output stream handler class.
repeated string input_side_packet = 2
Names of the input side packets for the handler specifically and distinct from the side packets for the calculator (but could be shared).
optional MediaPipeOptions options = 3
Options for the output stream handler.

optional bool output_only_when_all_inputs_received = 1
When true, this calculator will drop received TICK packets if any input stream hasn't received a packet yet.

A PacketFactory creates a side packet.

Used in: CalculatorGraphConfig, PacketManagerConfig

optional string packet_factory = 1
The name of the registered packet factory class.
optional string output_side_packet = 2
The name of the output side packet that this packet factory creates.
optional string external_output = 1002
DEPRECATED: The old name for output_side_packet.
optional PacketFactoryOptions options = 3
The options for the packet factory.

Options used by a PacketFactory to create the Packet.

Used in: PacketFactoryConfig

(message has no fields)

Contains the packet frequency information.

optional double packet_frequency_hz = 1
Packet frequency (packets per second).
optional string label = 2
A label that identifies what this packet frequency is for. Eg. "Gaze", "Gesture", etc.

Options for PacketFrequencyCalculator.

optional double time_window_sec = 1
Time window (in seconds) over which the packet frequency is computed. Must be greater than 0 and less than 100 seconds (in order to limit memory usage).
repeated string label = 2
Text identifiers for the input streams.

The settings specifying a packet generator and how it is connected.

Used in: CalculatorGraphConfig

optional string packet_generator = 1
The name of the registered packet generator class.
repeated string input_side_packet = 2
The names of the input side packets. The PacketGenerator can choose to access its input side packets either by index or by tag.
repeated string external_input = 1002
DEPRECATED(mgeorg) The old name for input_side_packet.
repeated string output_side_packet = 3
The names of the output side packets that this generator produces. The PacketGenerator can choose to access its output side packets either by index or by tag.
repeated string external_output = 1003
DEPRECATED(mgeorg) The old name for output_side_packet.
optional PacketGeneratorOptions options = 4
The options for the packet generator.

Options used by a PacketGenerator.

Used in: PacketGeneratorConfig

(message has no fields)

Contains the latency information for a packet stream in mediapipe. The following are provided 1. current latency 2. running average 3. histogram of latencies observed 4. cumulative sum of latencies observed NextId: 13

optional int64 current_latency_usec = 8
Current latency (delay in microseconds wrt a reference packet).
repeated int64 counts = 9
The latency histogram which stores the count recorded for each specified interval.
optional int64 num_intervals = 10
Number of intervals for the latency histogram output.
optional int64 interval_size_usec = 11
Size of the histogram intervals (in microseconds). The first interval is [0, interval_size_usec). The last interval extends to +inf.
optional int64 avg_latency_usec = 2
Running average of latencies observed so far.
optional string label = 7
An identifier label for the packet.
optional int64 sum_latency_usec = 12
Cumulative sum of individual packet latencies of all the packets output so far.

optional int64 num_intervals = 1
Number of intervals for the latency histogram output.
optional int64 interval_size_usec = 2
Interval size (in microseconds) for the histogram.
optional int64 reset_duration_usec = 3
Reset time (in microseconds) for histogram and average. The histogram and running average are initialized to zero periodically based on the specified duration. Negative value implies never resetting the statistics.
repeated string packet_labels = 4
Identifier labels for each input packet stream. The order of labels must correspond 1:1 with the input streams order. The labels are copied to the latency information output by the calculator.

The configuration for a PacketManager.

repeated PacketFactoryConfig packet = 1

optional double frame_rate = 1
The output frame rate measured in frames per second. The closest packet in time in each period will be chosen. If there is no packet in the period then the most recent packet will be chosen (not the closest in time).
optional PacketResamplerCalculatorOptions.OutputHeader output_header = 2
Whether and what kind of header to place on the output stream. Note, this is about the actual header, not the VIDEO_HEADER stream. If this option is set to UPDATE_VIDEO_HEADER then the header will also be parsed (updated) and passed along to the VIDEO_HEADER stream.
optional bool flush_last_packet = 3
Flush last packet even if its timestamp is greater than the final stream timestamp.
optional double jitter = 4
Adds jitter to resampling if set, so that Google's sampling is not externally deterministic. When set, the randomizer will be initialized with a seed. Then, the first sample is chosen randomly (uniform distribution) among frames that correspond to timestamps [0, 1/frame_rate). Let the chosen frame correspond to timestamp t. The next frame is chosen randomly (uniform distribution) among frames that correspond to [t+(1-jitter)/frame_rate, t+(1+jitter)/frame_rate]. t is updated and the process is repeated. Valid values are in the range of [0.0, 1.0] with the default being 0.0 (no jitter). A typical value would be a value in the range of 0.1-0.25. Note that this does NOT guarantee the desired frame rate, but if the pseudo-random number generator does its job and the number of frames is sufficiently large, the average frame rate will be close to this value.
optional bool jitter_with_reflection = 9
Enables reflection when applying jitter. This option is ignored when reproducible_sampling is true, in which case reflection will be used. New use cases should use reproducible_sampling = true, as jitter_with_reflection is deprecated and will be removed at some point.
optional bool reproducible_sampling = 10
If set, enabled reproducible sampling, allowing frames to be sampled without regards to where the stream starts. See packet_resampler_calculator.h for details. This enables reflection (ignoring jitter_with_reflection setting).
optional int64 base_timestamp = 5
If specified, output timestamps are aligned with base_timestamp. Otherwise, they are aligned with the first input timestamp. In order to ensure that the outptut timestamps are reproducible, with round_limits = false, the bounds for input timestamps must include: [start_time - period / 2, end_time + period / 2], with round_limits = true, the bounds for input timestamps must include: [start_time - period, end_time + period], where period = 1 / frame_rate. For example, in PacketResamplerCalculatorOptions specify "start_time: 3000000", and in MediaDecoderOptions specify "start_time: 2999950".
optional int64 start_time = 6
If specified, only outputs at/after start_time are included.
optional int64 end_time = 7
If specified, only outputs before end_time are included.
optional bool round_limits = 8
If set, the output timestamps nearest to start_time and end_time are included in the output, even if the nearest timestamp is not between start_time and end_time.

Used in: PacketResamplerCalculatorOptions

NONE = 0
Do not output a header, even if the input contained one.
PASS_HEADER = 1
Pass the header, if the input contained one.
UPDATE_VIDEO_HEADER = 2
Update the frame rate in the header, which must be of type VideoHeader.

repeated int32 x = 1777
Tests that the tags used to encode the timestamp do not interfere with proto tags.
repeated int32 y = 268437233
The tag below = 1777 | (1 << 28).

optional PacketThinnerCalculatorOptions.ThinnerType thinner_type = 1
optional int64 period = 2
The period (in microsecond) specifies the temporal interval during which only a single packet is emitted in the output stream. Has subtly different semantics depending on the thinner type, as follows. Async thinner: this option is a refractory period -- once a packet is emitted, we guarantee that no packets will be emitted for period ticks. Sync thinner: the period specifies a temporal interval during which only one packet is emitted. The emitted packet is guaranteed to be the one closest to the center of the temporal interval (no guarantee on how ties are broken). More specifically, intervals are centered at start_time + i * period (for non-negative integers i). Thus, each interval extends period/2 ticks before and after its center. Additionally, in the sync thinner any packets earlier than start_time are discarded and the thinner calls Close() once timestamp equals or exceeds end_time.
optional int64 start_time = 3
Packets before start_time and at/after end_time are discarded. Additionally, for a sync thinner, start time specifies the center of time invervals as described above and therefore should be set explicitly.
If not specified, set to 0 for SYNC type,
optional int64 end_time = 4
and set to Timestamp::Min() for ASYNC type.
Set to Timestamp::Max() if not specified.
optional bool sync_output_timestamps = 5
Whether the timestamps of packets emitted by sync thinner should correspond to the center of their corresponding temporal interval. If false, packets emitted using original timestamp (as in async thinner).
optional bool update_frame_rate = 6
If true, update the frame rate in the header, if it's available, to an estimated frame rate due to the sampling.

Used in: PacketThinnerCalculatorOptions

ASYNC = 1
Asynchronous thinner, described below [default].
SYNC = 2
Synchronous thinner, also described below.

Captures additional information about a RegionFlowFeature's surrounding patch. Using MotionEstimation::RetrieveRegionFlowFeatureList or ComputeRegionFlowFeatureDescriptors the patch descriptor has the folling layout: (9 dimensional: 3 mean intensities, 3x3 covariance matrix, (only store upper half (6 elems) in column major order, i.e. indices for data in patch descriptor refer to: mean: 0 1 2, covariance: 3 4 5 6 7 8

Used in: RegionFlowFeature

repeated float data = 1
The actual feature descriptor.

repeated ToneMatch tone_match = 1
Several intensity matches computed from equal percentiles of matching patch pairs. No number or particular ordering is assumed.
optional float irls_weight = 2

Configs for the profiler for a calculator. Not applicable to subgraphs.

Used in: CalculatorGraphConfig, CalculatorGraphConfig.Node

int64 histogram_interval_size_usec = 1
Size of the runtimes histogram intervals (in microseconds) to generate the histogram of the Process() time. The last interval extends to +inf. If not specified, the interval is 1000000 usec = 1 sec.
int64 num_histogram_intervals = 2
Number of intervals to generate the histogram of the Process() runtime. If not specified, one interval is used.
bool enable_input_output_latency = 3
TODO: clean up after migration to MediaPipeProfiler. DEPRECATED: If true, the profiler also profiles the input output latency. Should be true only if the packet timestamps corresponds to the microseconds wall time from epoch.
bool enable_profiler = 4
If true, the profiler starts profiling when graph is initialized.
bool enable_stream_latency = 5
If true, the profiler also profiles the stream latency and input-output latency. No-op if enable_profiler is false.
bool use_packet_timestamp_for_added_packet = 6
If true, the profiler uses packet timestamp (as production time and source production time) for packets added by calling CalculatorGraph::AddPacketToInputStream(). If false, uses profiler's clock.
int64 trace_log_capacity = 7
The maximum number of trace events buffered in memory. The default value buffers up to 20000 events.
repeated int32 trace_event_types_disabled = 8
Trace event types that are not logged.
string trace_log_path = 9
The output directory and base-name prefix for trace log files. Log files are written to: StrCat(trace_log_path, index, ".binarypb")
int32 trace_log_count = 10
The number of trace log files retained. The trace log files are named "trace_0.log" through "trace_k.log". The default value specifies 2 output files retained.
int64 trace_log_interval_usec = 11
The interval in microseconds between trace log output. The default value specifies trace log output once every 0.5 sec.
int64 trace_log_margin_usec = 12
The interval in microseconds between TimeNow and the highest times included in trace log output. This margin allows time for events to be appended to the TraceBuffer.
bool trace_log_duration_events = 13
Deprecated, replaced by trace_log_instant_events.
int32 trace_log_interval_count = 14
The number of trace log intervals per file. The total log duration is: trace_log_interval_usec * trace_log_file_count * trace_log_interval_count. The default value specifies 10 intervals per file.
bool trace_log_disabled = 15
An option to turn ON/OFF writing trace files to disk. Saving trace files to disk is enabled by default.
bool trace_enabled = 16
If true, tracer timing events are recorded and reported.
bool trace_log_instant_events = 17
False specifies an event for each calculator invocation. True specifies a separate event for each start and finish time.

optional float bilateral_sigma = 1
Sigma for color difference.
optional float pull_propagation_scale = 3
Determines how fast confident values can propagate. Filters are normalized, such that confidence dissipates quickly instead of propagating. To ensure confidence propagates the importance weight is scaled by the scalars specified below. Larger values yield quicker propagation.
optional float push_propagation_scale = 4
optional float pull_bilateral_scale = 5
Above bilateral sigma is scaled at each level by the specified scale (for push and pull phase). This is due to iterative downsampling of the guidance image introduces errors making bilateral weighting increasingly errorneous.
optional float push_bilateral_scale = 6

optional float max_quantized_value = 1
optional float min_quantized_value = 2

Message storing min value and max value for normalization in all channels.

repeated float min_value = 1
For all channels.
repeated float max_value = 2

optional int32 rows = 1
optional int32 cols = 2
optional int64 start_timestamp = 3
optional int64 limit_timestamp = 4
optional int64 timestamp_step = 5

A Region can be represented in each frame as a set of scanlines (compressed RLE, similar to rasterization of polygons). For each scanline with y-coordinate y, we save (possibly multiple) intervals of occupied pixels represented as a pair [left_x, right_x].

Used in: LocationData.BinaryMask, Locus

repeated Rasterization.Interval interval = 1
Intervals are always sorted by y-coordinate. Therefore, a region occupies a set of scanlines ranging from interval(0).y() to interval(interval_size() - 1)).y(). Note: In video, at some scanlines no interval might be present.

Used in: Rasterization

required int32 y = 1
required int32 left_x = 2
required int32 right_x = 3

NOTE: This calculator uses QResampler, despite the name, which supersedes RationalFactorResampler.

optional double target_sample_rate = 1
target_sample_rate is the sample rate, in Hertz, of the output stream. Required. Must be greater than 0.
optional RationalFactorResampleCalculatorOptions.ResamplerRationalFactorOptions resampler_rational_factor_options = 2
optional bool check_inconsistent_timestamps = 3
Set to false to disable checks for jitter in timestamp values. Useful with live audio input.

Parameters for initializing QResampler. See QResampler for more details.

Used in: RationalFactorResampleCalculatorOptions

optional double radius = 1
Kernel radius in units of input samples.
optional double cutoff = 2
Anti-aliasing cutoff frequency in Hertz. A reasonable setting is 0.45 * min(input_sample_rate, output_sample_rate).
optional double kaiser_beta = 3
The Kaiser beta parameter for the kernel window.

optional RecolorCalculatorOptions.MaskChannel mask_channel = 1
Selects which channel of the MASK input to use for masking.
optional Color color = 2
Color to blend into input image where mask is > 0. The blending is based on the input image luminosity.
optional bool invert_mask = 3
Swap the meaning of mask values for foreground/background.
optional bool adjust_with_luminance = 4
Whether to use the luminance of the input image to further adjust the blending weight, to help preserve image textures.

Used in: RecolorCalculatorOptions

UNKNOWN = 0
RED = 1
ALPHA = 2

A rectangle with rotation in image coordinates.

required int32 x_center = 1
Location of the center of the rectangle in image coordinates. The (0, 0) point is at the (top, left) corner.
required int32 y_center = 2
required int32 height = 3
Size of the rectangle.
required int32 width = 4
optional float rotation = 5
Rotation angle is clockwise in radians.
optional int64 rect_id = 6
Optional unique id to help associate different Rects to each other.

optional bool filled = 1
Whether the rendered rectangle should be filled.
optional Color color = 2
Line color or filled color of the rectangle.
optional double thickness = 3
Thickness of the line (applicable when the rectangle is not filled).
optional bool oval = 4
Whether the rendered rectangle should be an oval.

optional float multiplier = 1
Multiplier to apply to the rect size. If one defined `thickness` for RenderData primitives for object (e.g. pose, hand or face) of size `A` then multiplier should be `1/A`. It means that when actual object size on the image will be `B`, than all RenderData primitives will be scaled with factor `B/A`.

optional float scale_x = 1
Scaling factor along the side of a rotated rect that was aligned with the X and Y axis before rotation respectively.
optional float scale_y = 2
optional float rotation = 3
Additional rotation (counter-clockwise) around the rect center either in radians or in degrees.
optional int32 rotation_degrees = 4
optional float shift_x = 5
Shift along the side of a rotated rect that was aligned with the X and Y axis before rotation respectively. The shift is relative to the length of corresponding side. For example, for a rect with size (0.4, 0.6), with shift_x = 0.5 and shift_y = -0.5 the rect is shifted along the two sides by 0.2 and -0.3 respectively.
optional float shift_y = 6
optional bool square_long = 7
Change the final transformed rect into a square that shares the same center and rotation with the rect, and with the side of the square equal to either the long or short side of the rect respectively.
optional bool square_short = 8

optional int32 kernel_size = 1
optional float min_confidence_to_refine = 2
optional bool refine_presence = 3
optional bool refine_visibility = 4

Next tag: 67

Used in: MotionAnalysisOptions

optional TrackingOptions tracking_options = 1
optional int32 min_feature_inliers = 2
Features are binned into grids of different resolutions (see fast_estimation_block_size below) and retained if they survive a localized translation based RANSAC algorithm and at the survivors are at least of size min_feature_inliers. Must be at least 3!
optional float relative_min_feature_inliers = 46
Relative number of inlier features w.r.t. average number of features per grid bin. Maximum of both thresholds is used as actual threshold.
optional float pre_blur_sigma = 33
Pre-blur before computing features to reduce noise. Set to zero for no blurring.
optional int32 ransac_rounds_per_region = 3
Number of ransac rounds to estimate per region flow vector. This could be adaptive, but the required number of rounds is so low, that estimating the bound is more costly than just running it for a fixed number of times.
optional float absolute_inlier_error_threshold = 4
Error thresholds for a feature to be considered as an inlier in pixel-distance. The max of all three thresholds below is used as the actual threshold. Absolute in pixels.
optional float frac_inlier_error_threshold = 52
Scaled w.r.t. frame diameter.
optional float relative_inlier_error_threshold = 44
Scaled w.r.t model estimated during each RANSAC round.
optional int32 top_inlier_sets = 45
Returns for each grid only the top N inlier sets.
optional bool no_estimation_mode = 40
For debugging purposes, uses all tracked features regardless of the above setting.
optional float fast_estimation_block_size = 6
Block size in pixels. If fractional block_size is used (0 < size < 1), it is interpreted as fraction of the image dimensions. We use 4 blocks in each dimension by standard.
optional int32 fast_estimation_min_block_size = 25
Minimum block size in pixels (larger dimension) to perform fast estimation on. Pyramid levels are allocated such that block_size * 0.5^(level - 1) = min_block_size. At least two levels are used.
optional int32 fast_estimation_overlap_grids = 22
We use overlapping versions of the grid, next parameters specifies how many in each dimensions (total is therefore, the value squared!).
optional float max_magnitude_threshold_ratio = 23
Flow features with motion above this thresholds (w.r.t. frame diameter) are rejected.
optional float median_magnitude_bounds = 51
Flow features that have a motion that is larger than median_magnitude_bounds times the median magnitude are discarded. If set to zero, test is not enforced.
optional RegionFlowComputationOptions.IrlsInitialization irls_initialization = 49
If this option is activated, feature's irls weight is initialized to the inverse of its computed flow.
optional RegionFlowComputationOptions.DownsampleMode downsample_mode = 11
optional int32 downsampling_size = 12
Specify the size of either dimension here, the frame will be downsampled to fit downsampling_size.
optional float downsample_factor = 18
optional bool round_downsample_factor = 62
If set, we will force the computed downsampling factor to be the nearest integer, resulting in faster downsampling. This will have no effect for DOWNSAMPLE_TO_INPUT_SIZE, DOWNSAMPLE_BY_FACTOR, and DOWNSAMPLE_BY_SCHEDULE, which should have exact values defined.
optional RegionFlowComputationOptions.DownSampleSchedule downsample_schedule = 19
Used if downsample_mode is DOWNSAMPLE_BY_SCHEDULE.
optional int32 min_feature_requirement = 13
Minimum number of good features that we require to be present. Without good features, the estimated motion models will do more harm than good, so it is better to use simply the identity transform for this frame, and set the flag unstable_models to true in RegionFlow.
optional float min_feature_cover = 14
We also require features to cover a minimum percentage area of the frame. We use downsampling and plot each feature by a 1 in a grid, this is equivalent to plotting each feature by a rectangle in the original frame.
optional int32 min_feature_cover_grid = 20
Grid size for above min feature cover.
optional bool compute_blur_score = 17
Computes blur score for each frame. Score is proportional to amount of blur present in a frame, i.e. higher scores reflect more blurred frames. Note that the score is dependent on the gradient distribution of the image content, i.e. the score itself is rather meaningless but needs to be compared to scores of neighboring frames.
optional RegionFlowComputationOptions.BlurScoreOptions blur_score_options = 31
optional RegionFlowComputationOptions.VisualConsistencyOptions visual_consistency_options = 55
optional int32 patch_descriptor_radius = 21
Radius of patch descriptor computed during RetrieveRegionFlowFeatureList call.
optional int32 distance_from_border = 50
Minimum distance from image border. Must be greater or equal to patch_descriptor_radius.
optional float corner_response_scale = 26
Corner response is scaled by scalar below and normalized to lie within [0, 1], where 0 is low corner score and 1 high corner score.
optional bool verify_features = 27
Verifies reliablity of features, by back-tracking operation from matched location. If returned location is within verification_distance feature is accepted otherwise discarded.
optional float verification_distance = 28
optional bool verify_long_features = 53
If set, consistency of long features is verified (in case tracking_policy is set to POLICY_LONG_FEATURES) by extracting a patch around the feature during the very first observation and comparing the matching patching along the long feature trajectory via SSD. If the difference is above the long_feature_verification_threshold the feature is removed.
optional float long_feature_verification_threshold = 54
Maximum average per pixel error (in L1 norm) in the normalized intensity domain for matching patches to be considered to be consistent.
optional float max_long_feature_acceleration = 56
Long features are expected to have limited acceleration over time. If acceleration exceeds specified value based on the setting in verify_long_feature_acceleration either: a) verify_long_feature_acceleration = false A new track is started instead of continuing the old one. The track itself is not removed in this case. b) verify_long_feature_acceleration = true The track is flagged for verification, by back-tracking operation from matched location. If track fails verification test it is discarded. This only triggers if at least verify_long_feature_trigger_ratio of features have been flagged, otherwise option a is used.
optional bool verify_long_feature_acceleration = 63
optional float verify_long_feature_trigger_ratio = 64
optional bool histogram_equalization = 57
If true, histogram equalization is performed to the input image sequence before registration.
optional bool use_synthetic_zero_motion_tracks_all_frames = 34
If true, synthetic region flows with zero motion are used for all (or just the first) frame.
optional bool use_synthetic_zero_motion_tracks_first_frame = 35
optional bool gain_correction = 36
Optional gain correction before tracking features. Improves robustness when lighting is changing.
optional bool fast_gain_correction = 61
If set performs gain correction by simply equalizing mean intensity between frames, instead of using ToneEstimation.
optional bool gain_correction_multiple_hypotheses = 47
If the multiple hypothesis flag is set, features are tracked using both with and without gain correction, and the hypothesis with more inliers is selected.
optional float gain_correction_inlier_improvement_frac = 48
This flag, when used together with the multiple hypotheses flag, specifies that gain correction should increase the number of inliers by at least this fraction for it to be used instead of default tracking.
optional bool gain_correction_bright_reference = 59
If set, always uses the brighter frame as reference. This is the preferred direction of correction, to avoid overexposed regions from being corrected which leads to spurious matches.
optional float gain_correction_triggering_ratio = 60
Only performs gain correction if number of tracked features falls under specified ratio (w.r.t. previous frame). Set to zero, to always perform gain correction if requested.
optional float frac_gain_feature_size = 37
Gain correction is based on a grid of zero motion features, independent of the underlying motion. Fractional parameter specifies resolution of the grid w.r.t. frame size.
optional float frac_gain_step = 38
optional RegionFlowComputationOptions.GainCorrectMode gain_correct_mode = 41
optional ToneEstimationOptions.GainBiasBounds gain_bias_bounds = 39
Bounds for the estimated model. If not set externally, will be set based on GainCorrectMode.
optional RegionFlowComputationOptions.ImageFormat image_format = 58
Image format of the input.
optional RegionFlowComputationOptions.DescriptorExtractorType descriptor_extractor_type = 65
The descriptor extractor type used.
optional bool compute_derivative_in_pyramid = 66
Whether to compute derivatives when building the pyramid. When set to true, it's building a Laplacian pyramid. When set to false, it's building a Gaussian pyramid.

Used in: RegionFlowComputationOptions

optional int32 box_filter_diam = 1
Blur score is only computed over image regions of high cornerness (as blur in any direction will always alter these regions). First, the corner image (smallest eigenvalue of 2nd moment matrix) is box filtered, and then thresholded.
optional float relative_cornerness_threshold = 2
Specifies relative (w.r.t. maximum) and absolute corneress threshold for threshold operation.
optional float absolute_cornerness_threshold = 3
optional float median_percentile = 5
Blur score is defined as 1.0 / <median cornerness>, where <median cornerness> is the n-th percentile of the cornerness evaluated over the image regions of high corness as specified above.

Used in: RegionFlowComputationOptions

ORB = 0
http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.370.4395&rep=rep1&type=pdf

Downsampling schedule. Frame sizes up to which a particular downsampling factor is applied. Factor chosen by comparing actual frame area against standard area (standard_width * standard_height), where standard_width = 16/9 X standard_height.

Used in: RegionFlowComputationOptions

optional float downsample_factor_360p = 1
For <= 360p.
optional float downsample_factor_480p = 2
For <= 480p.
optional float downsample_factor_720p = 3
For <= 720p.
optional float downsample_factor_1080p = 4
>= 720p.

We support down-sampling of an incoming frame before running the resolution dependent part of the region flow computation (feature extraction and tracking if desired). Note that in all downsampling modes except for DOWNSAMPLE_TO_INPUT_SIZE, for uneven dimensions after downsampling, we always round up to the nearest even dimension, i.e. 350p with a downsample_factor of 2.0 would expect an input of size 176p.

Used in: RegionFlowComputationOptions

DOWNSAMPLE_NONE = 1
No downsampling.
DOWNSAMPLE_TO_MAX_SIZE = 2
Downsizes the input frame such that frame_size == downsampling_size, where frame_size := max(width, height).
DOWNSAMPLE_BY_FACTOR = 3
Downsizes frame by pre-defined factor, downsample_factor below.
DOWNSAMPLE_BY_SCHEDULE = 4
Downsampling based on downsampling schedule, see DownsampleSchedule below for details.
DOWNSAMPLE_TO_MIN_SIZE = 5
Downsizes the input frame such that frame_size == downsampling_size, where frame_size := min(width, height).
DOWNSAMPLE_TO_INPUT_SIZE = 6
Input frame is assumed to be already downsampled by the factor specified by downsample_factor below. For example if the original frame is 720p, and downsample_factor is set to 2.0, then we expect as input 360p.

Used in: RegionFlowComputationOptions

GAIN_CORRECT_DEFAULT_USER = 1
Uses default or user supplied bounds,
GAIN_CORRECT_VIDEO = 2
i.e. gain_bias_bounds is left untouched.
Uses defaults for video (most strict).
GAIN_CORRECT_HDR = 3
Uses most relaxed settings to track
GAIN_CORRECT_PHOTO_BURST = 4
across HDR frames, taken at different exposures.
More relaxed than video but stricter

Supported image formats. All images are converted to grayscale before processing. These image formats only concern AddImage. IMPORTANT: All the Retrieve* methods expect RGB when the descriptors are computed.

Used in: RegionFlowComputationOptions

FORMAT_GRAYSCALE = 1
FORMAT_RGB = 2
FORMAT_RGBA = 3
FORMAT_BGR = 4
FORMAT_BGRA = 5

Determines how irls weights for computed features are initialized. In general, more stable features are given higher weight.

Used in: RegionFlowComputationOptions

INIT_UNIFORM = 1
All weights equal 1
INIT_CONSISTENCY = 2
Feature's irls weight is initialized to a value in [0, 2] indicating how consistent the feature's motion is w.r.t. neighboring features (high values = very consistent). Determined by counting how often a feature is part of the inlier set for a particular bin.

Determines how/if visual consistency is computed. If activated, computes the absolute *change* in visual difference between two adjancent frame pairs, i.e. the modulus of the 2nd derivative of the frame appearance. Stores result in RegionFlowFeatureList::visual_consistency.

Used in: RegionFlowComputationOptions

optional bool compute_consistency = 1
Computation of visual consistency is only performed if activated.
optional int32 tiny_image_dimension = 2
Incoming color or gray scale image is scaled to a tiny square image of the specified dimension. Used to compare adjacent images via SSD.

Tracked feature at location (x,y) with flow (dx, dy) and patch based error (sum of absolute value of intensity difference). Next tag: 19

Used in: RegionFlowFeatureList, RegionFlowFrame.RegionFlow

optional float x = 1
optional float y = 2
optional float dx = 3
optional float dy = 4
optional int32 track_id = 13
Features that belong to the same feature track are assigned a unique id and are identified via it. Note, this id is only unique within the lifetime of a RegionFlowComputation object. That is, if distribution or parallelization using multiple instances was used, the ids are only unique within that instance context.
no id.
optional float tracking_error = 5
Tracking error as patch intensity residual (SSD).
optional float irls_weight = 6
Inverse of registration error (in pixels), after parametric motion model fitting. Values are in [0, 1e6]. Low values correspond to outliers, high values to inliers. Set by MotionEstimation::EstimateMotions*
optional float corner_response = 11
Corner response (computed as minimum eigenvalue of block filtered 2nd moment matrix).
optional PatchDescriptor feature_descriptor = 7
Patch feature descriptors. *For internal use only*. External clients should not rely on their contents.
optional PatchDescriptor feature_match_descriptor = 8
optional TemporalIRLSSmoothing internal_irls = 10
Internal datastructure used temporally during temporal IRLS smoothing.
optional string label = 14
Optional label for debugging purposes.
optional int32 flags = 15
optional int32 feature_id = 16
Unique feature id per RegionFlowComputation object.
optional int32 octave = 17
octave (pyramid layer) from which the keypoint has been extracted
optional BinaryFeatureDescriptor binary_feature_descriptor = 18
Feature descriptor for the current feature.

Flags indicating specific statuses.

FLAG_BROKEN_TRACK = 1
Used for long feature tracks if track id

Encapsulates a list of features with associated flow. Can be extracted from RegionFlow via GetRegionFlowFeatureList declared in region_flow.h. This is the essential (additional) information required by Cropper using wobble_suppression with displacements. Next tag: 14

Used in: FrameSelectionResult

repeated RegionFlowFeature feature = 1
optional int32 frame_width = 2
optional int32 frame_height = 3
optional bool unstable = 4
Set from corresponding RegionFlowFrame field.
optional int32 distance_from_border = 5
Records the minimum distance from the image border for each feature and matching feature (if enforced > 0).
optional float blur_score = 6
Set from corresponding RegionFlowFrame field.
optional bool long_tracks = 7
If set, indicates, that features represent long tracks, i.e. each feature has a valid track_id() >= 0.
optional float frac_long_features_rejected = 8
If long_tracks, stores number of long feature tracks that got rejected in this frame, as their patches were deemed inconsistent with the track's very first extracted patch.
optional float visual_consistency = 9
Measures visual consistency between adjacent frames. In particular, stores the absolute *change* in visual difference between two adjancent frame pairs, i.e. the modulus of the 2nd derivative of the frame appearance. Normalized w.r.t. number of channels and total pixels of the underlying frame. In particular for sudden changes (e.g. shot boundaries) this value will be significantly non-zero (> 0.05). Negative value per default indicates no consistency has been computed.
optional int64 timestamp_usec = 10
Timestamp in micro seconds of the underlying frame, that is the frame for which the source features (not matching features) were computed.
optional int32 match_frame = 11
Denotes the frame that flow was computed w.r.t. to, locally to the current frame. For example, if current frame is N, N + match_frame is the matching frame that flow was computed to. Values < 0 indicate backward tracking, while values > 0 indicate forward tracking. By default, for empty feature lists, matching frame is the same as current frame, i.e. match_frame = 0.
optional bool is_duplicated = 12
Set, if frame is estimated to be an exact duplicate of the previous frame.
repeated int32 actively_discarded_tracked_ids = 13
Stores all the tracked ids that have been discarded actively in this frame. This information will be popluated via RegionFlowFeatureList, so that the downstreaming modules can receive it and use it to avoid misjudgement on tracking continuity. Discard reason: (1) A tracked feature has too long track, which might create drift. (2) A tracked feature in a highly densed area, which provides little value.

RegionFlowFrame is a optical flow representation where each region has a consistent optical flow (adheres to local translational model). Regions are arranged in a regular grid according to BlockDescriptor. Next tag: 11.

repeated RegionFlowFrame.RegionFlow region_flow = 1
Sorted by id for quick lookup.
optional int32 num_total_features = 2
Total number of features in all RegionFlow's.
optional bool unstable_frame = 4
If set, indicates that the frame's region flow is unstable. (not enough features or coverage too low).
optional float blur_score = 7
Blur score of the current frame is defined as the n-th percentile of the corneress of the input frame evaluated over regions of high corneress. For details see BlurScoreOptions in region_flow_computation.proto. The actual value is pretty meaningless, but relative to the blur score of other frames one can detect blurry frames, e.g. by a 'significant' local maxima in a sequence of blur_scores.
optional int32 frame_width = 8
optional int32 frame_height = 9
optional RegionFlowFrame.BlockDescriptor block_descriptor = 10

Region flow is estimated using a grid of equal sized bins as regions. BlockDescriptor specifies size of bins/blocks.

Used in: RegionFlowFrame

optional int32 block_width = 1
optional int32 block_height = 2
optional int32 num_blocks_x = 3
optional int32 num_blocks_y = 4

Next tag: 8

Used in: RegionFlowFrame

required int32 region_id = 1
optional float centroid_x = 2
Mean anchor point (centroid) of flow vector and mean flow.
optional float centroid_y = 3
optional float flow_x = 4
optional float flow_y = 5
repeated RegionFlowFeature feature = 7

Used in: RenderData

oneof data
The RenderAnnotation can be one of the below formats.
- RenderAnnotation.Rectangle rectangle = 1
- RenderAnnotation.FilledRectangle filled_rectangle = 2
- RenderAnnotation.Oval oval = 3
- RenderAnnotation.FilledOval filled_oval = 4
- RenderAnnotation.Point point = 5
- RenderAnnotation.Line line = 6
- RenderAnnotation.Arrow arrow = 7
- RenderAnnotation.Text text = 8
- RenderAnnotation.RoundedRectangle rounded_rectangle = 9
- RenderAnnotation.FilledRoundedRectangle filled_rounded_rectangle = 10
- RenderAnnotation.GradientLine gradient_line = 14
optional double thickness = 11
Thickness for drawing the annotation.
optional Color color = 12
Color for drawing the annotation. For FilledRectangle and FilledOval, this color is used only for drawing the boundary.
optional string scene_tag = 13
A hint regarding what this annotation is for. Should be unique across all annotation types.

Used in: RenderAnnotation

optional double x_start = 1
The arrow head will be drawn at (x_end, y_end).
optional double y_start = 2
optional double x_end = 3
optional double y_end = 4
optional bool normalized = 5

Used in: RenderAnnotation

optional Oval oval = 1
optional Color fill_color = 2
Color to fill in the oval.

Used in: RenderAnnotation

optional Rectangle rectangle = 1
optional Color fill_color = 2
Color to fill in the rectangle.

Used in: RenderAnnotation

optional RoundedRectangle rounded_rectangle = 1
optional Color fill_color = 2
Color to fill in the rounded rectangle.

Used in: RenderAnnotation

optional double x_start = 1
optional double y_start = 2
optional double x_end = 3
optional double y_end = 4
optional bool normalized = 5
optional Color color1 = 6
Linearly interpolate between color1 and color2 along the line.
optional Color color2 = 7

Used in: RenderAnnotation

optional double x_start = 1
optional double y_start = 2
optional double x_end = 3
optional double y_end = 4
optional bool normalized = 5
optional Line.LineType line_type = 6

Used in: Line

UNKNOWN = 0
SOLID = 1
DASHED = 2

Used in: RenderAnnotation, FilledOval

optional Rectangle rectangle = 1
An oval is specified by the rectangle that encloses the oval. For example, a circle with center at (x,y) and radius r can be specified as a Rectangle with left = x - r, right = y - r, and width = height = 2 * r.

Used in: RenderAnnotation

optional double x = 1
optional double y = 2
optional bool normalized = 3

Used in: RenderAnnotation, FilledRectangle, Oval, RoundedRectangle

optional double left = 1
Left and top refer to the x and y coordinates of the top-left corner of rectangle, whereas right and bottom refer to the x and y coordinates of the bottom-right corner of rectangle.
optional double top = 2
optional double right = 3
optional double bottom = 4
optional bool normalized = 5
optional double rotation = 6
Rotation in radians.

Used in: RenderAnnotation, FilledRoundedRectangle

optional Rectangle rectangle = 1
A rounded rectangle is specified by a rectangle and a corner radius to round each corner by. A corner radius of 0 implies a standard non-rounded rectangle (i.e. sharp edges) but as the radius increases proportionally to the width and height of the overall rectangle size, the corners increasingly round.
optional int32 corner_radius = 2
The radius of the round corners.
optional int32 line_type = 3
Use one of the following: -1: a filled line (FILLED) 4: a 4-connected line (LINE_4) 8: a 8-connected line (LINE_8) 16: an antialiased line (LINE_AA).

message RenderAnnotation.Text

render_data.proto:138

Used in: DetectionsToRenderDataCalculatorOptions, RenderAnnotation

optional string display_text = 1
optional double left = 2
The location to render the text. Left and baseline refer to the x and y coordinates of the start location of text respectively.
optional double baseline = 3
optional double font_height = 4
The height of the text from top to baseline. When normalized=true, font height is specified wrt the image height.
optional bool normalized = 5
optional int32 font_face = 6
Specifies the font for the text. Font must be one of the following from OpenCV: cv::FONT_HERSHEY_SIMPLEX (0) cv::FONT_HERSHEY_PLAIN (1) cv::FONT_HERSHEY_DUPLEX (2) cv::FONT_HERSHEY_COMPLEX (3) cv::FONT_HERSHEY_TRIPLEX (4) cv::FONT_HERSHEY_COMPLEX_SMALL (5) cv::FONT_HERSHEY_SCRIPT_SIMPLEX (6) cv::FONT_HERSHEY_SCRIPT_COMPLEX (7)
optional bool center_horizontally = 7
Options to center text around the anchor point (left, baseline) by taking into account font shape, size and text length (e.g., [left, baseline] represent [center_x, center_y].
optional bool center_vertically = 8

A RenderData is a collection of multiple RenderAnnotations. For example, a face can be rendered using a group of annotations: a bounding box around the face (rectangle) and annotations for various face parts such as eyes, nose etc (2D points).

repeated RenderAnnotation render_annotations = 1
optional string scene_class = 2
An optional string that uniquely identifies this class of annotations.
optional RenderViewport scene_viewport = 3
An optional viewport to which this set of annotations are intended to be rendered. If left unset, the annotations are meant to render overtop of the existing camera feed in the "main" viewport. If set, the annotations are to be rendered in a separate viewport.

Represents a destination viewport to render annotations into, when specified in RenderData.

Used in: RenderData

optional string id = 1
A unique identifier for this viewport.
optional int32 width_px = 2
The width and height of this viewport in absolute pixels. Normalized coordinates on annotations destined for this viewport as normalized relative to these absolute pixel dimensions. Camera feeds destined for this viewport will be rescaled to match these dimensions. Note: It is not expected that mid-stream resizing should be possible -- the visualizer is epxected to use the first dimensions it sees for a given viewport and ignore any ignore subsequent changes.
optional int32 height_px = 3
optional bool compose_on_video = 4
Set to true if this viewport should render its annotations overtop of a (rescaled to width/height) copy of the camera feed.

Options to generate anchors for Retina object detection models.

required int32 input_size_width = 1
Size of input images.
required int32 input_size_height = 2
required float min_scale = 3
Min and max scales for generating anchor boxes on feature maps.
required float max_scale = 4
required float anchor_offset_x = 5
The offset for the center of anchors. The value is in the scale of stride. E.g. 0.5 meaning 0.5 * |current_stride| in pixels.
required float anchor_offset_y = 6
required int32 num_layers = 7
Number of output feature maps to generate the anchors on.
repeated int32 feature_map_width = 8
Sizes of output feature maps to create anchors. Either feature_map size or stride should be provided.
repeated int32 feature_map_height = 9
repeated int32 strides = 10
Strides of each output feature maps.
repeated float aspect_ratios = 11
List of different aspect ratio to generate anchors.
optional bool reduce_boxes_in_lowest_layer = 12
A boolean to indicate whether the fixed 3 boxes per location is used in the lowest layer.
optional float interpolated_scale_aspect_ratio = 13
An additional anchor is added with this aspect ratio and a scale interpolated between the scale for a layer and the scale for the next layer (1.0 for the last layer). This anchor is not included if this value is 0.
optional bool fixed_anchor_size = 14
Whether use fixed width and height (e.g. both 1.0f) for each anchor. This option can be used when the predicted anchor width and height are in pixels.

Counterclockwise rotation.

(message has no fields)

Used in: ImageTransformationCalculatorOptions

UNKNOWN = 0
ROTATION_0 = 1
ROTATION_90 = 2
ROTATION_180 = 3
ROTATION_270 = 4

For TYPE_INCLUDE: During retargeting and stabilization salient points introduce constraints that will try to keep the normalized location in the rectangle frame_size - normalized bounds. For this soft constraints are used, therefore the weight specifies how "important" the salient point is (higher is better). In particular for each point p the retargeter introduces two pairs of constraints of the form: x - slack < width - right and x + slack > 0 + left, with slack > 0 where the weight specifies the importance of the slack. For TYPE_EXCLUDE_*: Similar to above, but constraints are introduced to keep the point to the left of the left bound OR the right of the right bound. In particular: x - slack < left OR x + slack >= right Similar to above, the weight specifies the importance of the slack. Note: Choosing a too high weight can lead to jerkiness as the stabilization essentially starts tracking the salient point.

Used in: SalientPointFrame

optional float norm_point_x = 1
Normalized location of the point (within domain [0, 1] x [0, 1].
optional float norm_point_y = 2
optional SalientPoint.SalientPointType type = 11
Salient point type. By default we try to frame the salient point within the bounding box specified by left, bottom, right, top. Alternatively, one can choose to exclude the point. For details, see discussion above.
optional float left = 3
Bounds are specified in normalized coordinates [0, 1], FROM the specified border. Opposing bounds (e.g. left and right) may not add to values larger than 1. Default bounds center salient point within centering third of the frame.
optional float bottom = 4
optional float right = 9
optional float top = 10
optional float weight = 5
optional float norm_major = 6
In addition salient point can represent a region of interest (defined as ellipse of size norm_major x norm_minor (normalized to [0, 1] domain) which orientation is given by angle (in radians in [0, pi]). Due to aspect ratio change of the normalized domain, it is recommended that transformations to other domains are done via the ScaleSalientPoint function.
optional float norm_minor = 7
optional float angle = 8
Angle of major axis with x-axis (counter-clock wise, in radians).

Used in: SalientPoint

TYPE_INCLUDE = 1
TYPE_EXCLUDE_LEFT = 2
TYPE_EXCLUDE_RIGHT = 3

Aggregates SalientPoint's for a frame.

repeated SalientPoint point = 1

Order of operations. 1) Crop the image to fit within min_aspect_ratio and max_aspect_ratio. 2) Scale and convert the image to fit inside target_width x target_height using the specified scaling algorithm. (maintaining the aspect ratio if preserve_aspect_ratio is true). The output width and height will be divisible by 2, by default. It is possible to output width and height that are odd numbers when the output format is SRGB and the aspect ratio is left unpreserved. See scale_to_multiple_of for details.

optional int32 target_width = 1
Target output width and height. The final output's size may vary depending on the other options below. If unset, use the same width or height as the input. If only one is set then determine the other from the aspect ratio (after cropping). The output width and height will be divisible by 2, by default.
optional int32 target_height = 2
optional bool preserve_aspect_ratio = 3
If true, the image is scaled up or down proportionally so that it fits inside the box represented by target_width and target_height. Otherwise it is scaled to fit target_width and target_height completely. In any case, the aspect ratio that is preserved is that after cropping to the minimum/maximum aspect ratio. Additionally, if true, the output width and height will be divisible by 2.
optional string min_aspect_ratio = 4
If ratio is positive, crop the image to this minimum and maximum aspect ratio (preserving the center of the frame). This is done before scaling. The string must contain "/", so to disable cropping, set both to "0/1". For example, for a min_aspect_ratio of "9/16" and max of "16/9" the following cropping will occur: 1920x1080 (which is 16:9) is not cropped 640x1024 (which is 10:16) is not cropped 640x320 (which is 2:1) cropped to 568x320 (just under 16/9) 96x480 (which is 1:5), cropped to 96x170 (just over 9/16) The resultant frame will always be between (or at) the min_aspect_ratio and max_aspect_ratio.
optional string max_aspect_ratio = 5
optional ImageFormat.Format output_format = 6
If unset, use the same format as the input. NOTE: in the current implementation, the output format (either specified in the output_format option or inherited from the input format) must be SRGB. It can be YCBCR420P if the input_format is also the same.
optional ScaleImageCalculatorOptions.ScaleAlgorithm algorithm = 7
The upscaling algorithm to use. The default is to use CUBIC. Note that downscaling unconditionally uses DDA; see image_processing:: AffineGammaResizer for documentation.
optional int32 alignment_boundary = 8
The output image will have this alignment. If set to zero, then any alignment could be used. If set to one, the output image will be stored contiguously.
optional bool set_alignment_padding = 9
Set the alignment padding area to deterministic values (as opposed to possibly leaving it as uninitialized memory). The padding is the space between the pixel values in a row and the end of the row (which may be different due to alignment requirements on the length of a row).
optional bool OBSOLETE_skip_linear_rgb_conversion = 10
optional float post_sharpening_coefficient = 11
Applies sharpening for downscaled images as post-processing. See image_processing::AffineGammaResizer for documentation.
optional ImageFormat.Format input_format = 12
If input_format is YCBCR420P, input packets contain a YUVImage. If input_format is a format other than YCBCR420P or is unset, input packets contain an ImageFrame. NOTE: in the current implementation, the input format (either specified in the input_format option or inferred from the input packets) must be SRGB or YCBCR420P.
optional int32 scale_to_multiple_of = 13
If set to 2, the target width and height will be rounded-down to the nearest even number. If set to any positive value other than 2, preserve_aspect_ratio must be false and the target width and height will be rounded-down to multiples of the given value. If set to any value less than 1, it will be treated like 1. NOTE: If set to an odd number, the output format must be SRGB.
optional bool use_bt709 = 14
If true, assume the input YUV is BT.709 (this is the HDTV standard, so most content is likely using it). If false use the previous assumption of BT.601 (mid-80s standard). Ideally this information should be contained in the input YUV Frame, but as of 02/06/2019, it's not. Once this info is baked in, this flag becomes useless.

Used in: ScaleImageCalculatorOptions

DEFAULT = 0
LINEAR = 1
CUBIC = 2
AREA = 3
LANCZOS = 4
DEFAULT_WITHOUT_UPSCALE = 5
Option to disallow upscaling.

We wrap the enum in a message to avoid namespace collisions.

(message has no fields)

This enum mirrors the ScaleModes supported by Quad Renderer.

Used in: GlScalerCalculatorOptions, GlSurfaceSinkCalculatorOptions, ImageTransformationCalculatorOptions

DEFAULT = 0
STRETCH = 1
Stretch the frame to the exact provided output dimensions.
FIT = 2
Scale the frame up to fit the drawing area, preserving aspect ratio; may letterbox.
FILL_AND_CROP = 3
Scale the frame up to fill the drawing area, preserving aspect ratio; may crop.

optional int32 packet_offset = 1

A proto that acts as the proxy of SerializationProxyTestClass for serialization.

optional bool bool_value = 1
repeated float float_value = 2
repeated string string_value = 3

optional sint32 alpha_value = 1
The value to set the alpha channel to (0-255). This option is ignored when set to -1 (use image mask instead).

optional int32 num_inputs = 1
Number of side packets which are fed to graph internal streams.
optional SidePacketsToStreamsCalculatorOptions.SetTimestampMode set_timestamp = 2
If true, then a timestamp is set for each packet.
optional bool vectors_of_packets = 3
If true, then side packets are vectors of packets; otherwise, they are single packets.

We need to accommodate various timestamp modes depending on what we're connecting to.

Used in: SidePacketsToStreamsCalculatorOptions

VECTOR_INDEX = 0
For vectors of packets, the timestamp is the index of the packet within the vector. For single packets, the timestamp is zero.
PRE_STREAM = 1
Timestamps are always set to PreStream.
WHOLE_STREAM = 2
Timestamps are always set to PostStream. TODO Rename to POST_STREAM.
NONE = 3
Do not set timestamp. Can only be used if vectors_of_packets is true. Will cause Timestamp::Unset() run-time errors if the inner packets in the vectors do not already have Timestamps.

Non-linear similarity model (w.r.t. to its parametrization). c_r := cos(rotation); s_r := sin(rotation); Transformation applied to x: [scale 0; * [c_r -s_r; * x + [dx; 0 scale] s_r c_r] dy]

Used in: CameraMotion

optional float dx = 1
optional float dy = 2
optional float scale = 3
optional float rotation = 4
angle in [-pi, pi].

A proto like InputCollection::Inputs which has embedded strings within it.

repeated bytes value = 1

A proto3 calculator options for testing.

string sky_color = 1
repeated int32 sky_grid = 2

optional double frame_duration_seconds = 1
Analysis window duration in seconds. Required. Must be greater than 0. (Note: the spectrogram DFT length will be the smallest power-of-2 sample count that can hold this duration.)
optional double frame_overlap_seconds = 2
Duration of overlap between adjacent windows. Hence, frame_rate = 1/(frame_duration_seconds - frame_overlap_seconds). Required that 0 <= frame_overlap_seconds < frame_duration_seconds.
optional bool pad_final_packet = 3
Whether to pad the final packet with zeros. If true, guarantees that all input samples will output. If set to false, any partial packet at the end of the stream will be dropped.
optional SpectrogramCalculatorOptions.OutputType output_type = 4
optional bool allow_multichannel_input = 5
If set to true then the output will be a vector of spectrograms, one for each channel and the stream will have a MultiStreamTimeSeriesHeader.
optional SpectrogramCalculatorOptions.WindowType window_type = 6
optional double output_scale = 7
Support a fixed multiplicative scaling of the output. This is applied uniformly regardless of output type (i.e., even dBs are multiplied, not offset).
optional bool use_local_timestamp = 8
If use_local_timestamp is true, the output packet's timestamp is based on the last sample of the packet and it's inferred from the latest input packet's timestamp. If false, the output packet's timestamp is based on the cumulative timestamping, which is inferred from the intial input timestamp and the cumulative number of samples.

Output value type can be squared-magnitude, linear-magnitude, deciBels (dB, = 20*log10(linear_magnitude)), or std::complex.

Used in: SpectrogramCalculatorOptions

SQUARED_MAGNITUDE = 0
LINEAR_MAGNITUDE = 1
DECIBELS = 2
COMPLEX = 3

Which window to use when computing the FFT.

Used in: SpectrogramCalculatorOptions

HANN = 0
HAMMING = 1
COSINE = 2

optional float stabilizer = 1
The calculator computes log(x + stabilizer). stabilizer must be >= 0, with 0 indicating a lack of stabilization.
optional bool check_nonnegativity = 2
If true, CHECK that all input values in are >= 0. If false, the code will take the log of the potentially negative input values plus the stabilizer.
optional double output_scale = 3
Support a fixed multiplicative scaling of the output.

The settings specifying a status handler and its required external inputs.

Used in: CalculatorGraphConfig

optional string status_handler = 1
The name of the registered status handler class.
required
repeated string input_side_packet = 2
The name of the input side packets. The StatusHandler can access its input side packets by index or by tag. A StatusHandler will only be called if all of its requested input side packets are available (and won't be called if a PacketFactory or PacketGenerator which produces one fails).
repeated string external_input = 1002
DEPRECATED(mgeorg) The old name for input_side_packet.
optional MediaPipeOptions options = 3
The options for the status handler.

Stores the profiling information of a stream.

Used in: CalculatorProfile

optional string name = 1
Stream name.
optional bool back_edge = 2
If true, than this is a back edge input stream and won't be profiled.
optional TimeHistogram latency = 3
Total and histogram of the time that this stream took.

Options for a switch-container directing traffic to one of several contained subgraph or calculator nodes.

repeated CalculatorGraphConfig.Node contained_node = 2
The contained registered subgraphs or calculators.
optional int32 select = 3
Activates the specified channel to receive input packets.
optional bool enable = 4
Activates channel 1 for enable = true, channel 0 otherwise.

repeated SyncSetInputStreamHandlerOptions.SyncSet sync_set = 1
Each synchronization set describes a collection of inputs which must be provided together to the calculator. Any streams which are not in any sync_set will be grouped into a (default) sync set.

Used in: SyncSetInputStreamHandlerOptions

repeated string tag_index = 1
A description of the streams which will be synchronized together. This description uses the Calculator visible specification of a stream. The format is a tag, then an index with both being optional. If the tag is missing it is assumed to be "" and if the index is missing then it is assumed to be 0. If the index is provided then a colon (':') must be used. Examples: "TAG" -> tag "TAG", index 0 "" -> tag "", index 0 ":0" -> tag "", index 0 ":3" -> tag "", index 3 "VIDEO:0" -> tag "VIDEO", index 0 "VIDEO:2" -> tag "VIDEO", index 2

The value for a template parameter. The value can be either a simple value, a dictionary, or a list.

Used in: TemplateDict.Parameter

oneof param_value
- string str = 1
  A string value for the parameter.
- double num = 2
  A numeric value for the parameter.
- TemplateDict dict = 3
  A dictionary of values for the parameter.
repeated TemplateArgument element = 4
An ordered list of values for the parameter.

A dictionary of parameter values.

Used in: TemplateArgument, TemplateSubgraphOptions

repeated TemplateDict.Parameter arg = 1
A map from parameter name to parameter value.

Used in: TemplateDict

optional string key = 1
optional TemplateArgument value = 2

A template rule or a template rule argument expression.

Used in: CalculatorGraphTemplate

optional string param = 1
A template parameter name or a literal value.
optional string op = 2
A template rule operation or a template expression operation.
repeated TemplateExpression arg = 3
Nested template expressions, which define the operation args. TODO: Rename this field to avoid collision with TemplateDict::arg.
optional string path = 4
The path within the protobuf to the modified field values.
optional FieldDescriptorProto.Type field_type = 5
The FieldDescriptor::Type of the modified field.
optional string field_value = 7
Alternative value for the modified field, in protobuf binary format.

Options for a mediapipe template subgraph consisting of mediapipe template arguments.

optional TemplateDict dict = 1
The template arguments used to expand the template for the subgraph.

Internal datastructure used during temporal IRLS smoothing.

Used in: RegionFlowFeature

optional float weight_sum = 1
optional float value_sum = 2

Full Example: node { calculator: "TfLiteConverterCalculator" input_stream: "IMAGE_IN:input_image" output_stream: "TENSOR_OUT:image_tensor" options { [mediapipe.TengineConverterCalculatorOptions.ext] { zero_center: true } } }

optional bool zero_center = 1
Choose normalization mode for output (not applied for Matrix inputs). true = [-1,1] false = [0,1] Ignored if using quantization.
optional bool use_custom_normalization = 6
Custom settings to override the internal scaling factors `div` and `sub`. Both values must be set to non-negative values. Will only take effect on CPU AND when |use_custom_normalization| is set to true. When these custom values take effect, the |zero_center| setting above will be overriden, and the normalized_value will be calculated as: normalized_value = input / custom_div - custom_sub.
optional float custom_div = 7
optional float custom_sub = 8
optional bool flip_vertically = 2
Whether the input image should be flipped vertically (along the y-direction). This is useful, for example, when the input image is defined with a coordinate system where the origin is at the bottom-left corner (e.g., in OpenGL) whereas the ML model expects an image with a top-left origin.
optional int32 max_num_channels = 3
Controls how many channels of the input image get passed through to the tensor. Valid values are 1,3,4 only. Ignored for iOS GPU.
optional bool row_major_matrix = 4
The calculator expects Matrix inputs to be in column-major order. Set row_major_matrix to true if the inputs are in row-major order.
optional bool use_quantized_tensors = 5
Quantization option (CPU only). When true, output kTfLiteUInt8 tensor instead of kTfLiteFloat32.
optional TengineConverterCalculatorOptions.TensorFloat tensor_mean = 9
Normalization option.
optional TengineConverterCalculatorOptions.TensorFloat tensor_scale = 10

Used in: TengineConverterCalculatorOptions

optional float val1 = 1
optional float val2 = 2
optional float val3 = 3

Full Example: node { calculator: "TfLiteInferenceCalculator" input_stream: "TENSOR_IN:image_tensors" output_stream: "TENSOR_OUT:result_tensors" options { [mediapipe.TengineInferenceCalculatorOptions.ext] { model_path: "model.tflite" delegate { gpu {} } } } }

optional string model_path = 1
Path to the TF Lite model (ex: /path/to/modelname.tflite). On mobile, this is generally just modelname.tflite.
optional bool use_gpu = 2
Whether the TF Lite GPU or CPU backend should be used. Effective only when input tensors are on CPU. For input tensors on GPU, GPU backend is always used. DEPRECATED: configure "delegate" instead.
optional bool use_nnapi = 3
Android only. When true, an NNAPI delegate will be used for inference. If NNAPI is not available, then the default CPU delegate will be used automatically. DEPRECATED: configure "delegate" instead.
optional int32 cpu_num_thread = 4
The number of threads available to the interpreter. Effective only when input tensors are on CPU and 'use_gpu' is false.
optional TengineInferenceCalculatorOptions.Delegate delegate = 5
TfLite delegate to run inference. If not specified, when any of the input and output is on GPU (i.e, using the TENSORS_GPU tag) TFLite GPU delegate is used (as if "gpu {}" is specified), or otherwise regular TFLite on CPU is used (as if "tflite {}" is specified) except when building with emscripten where xnnpack is used. NOTE: use_gpu/use_nnapi are ignored if specified. (Delegate takes precedence over use_* deprecated options.)
required string data_type = 6
required int32 output_num = 7
required int32 max_dim = 8
optional bool yolov5_focus = 9
required string tengine_backend = 10

Used in: TengineInferenceCalculatorOptions

oneof delegate
- Delegate.TfLite tflite = 1
- Delegate.Gpu gpu = 2
- Delegate.Nnapi nnapi = 3
- Delegate.Xnnpack xnnpack = 4

Delegate to run GPU inference depending on the device. (Can use OpenGl, OpenCl, Metal depending on the device.)

Used in: Delegate

optional bool use_advanced_gpu_api = 1
Experimental, Android/Linux only. Use TFLite GPU delegate API2 for the NN inference. example: delegate: { gpu { use_advanced_gpu_api: true } }
optional Gpu.Api api = 4
optional bool allow_precision_loss = 3
This option is valid for TFLite GPU delegate API2 only, Set to true to use 16-bit float precision. If max precision is needed, set to false for 32-bit float calculations only.
optional string cached_kernel_path = 2
Load pre-compiled serialized binary cache to accelerate init process. Only available for OpenCL delegate on Android. Kernel caching will only be enabled if this path is set.

This option is valid for TFLite GPU delegate API2 only, Choose any of available APIs to force running inference using it.

Used in: Gpu

ANY = 0
OPENGL = 1
OPENCL = 2

Android only.

Used in: Delegate

(message has no fields)

Default inference provided by tflite.

Used in: Delegate

(message has no fields)

Used in: Delegate

optional int32 num_threads = 1
Number of threads for XNNPACK delegate. (By default, calculator tries to choose optimal number of threads depending on the device.)

required string data_type = 1

required int32 num_classes = 1
The number of output classes predicted by the detection model.
required int32 num_boxes = 2
The number of output boxes predicted by the detection model.
required int32 num_coords = 3
The number of output values per boxes predicted by the detection model. The values contain bounding boxes, keypoints, etc.
optional int32 keypoint_coord_offset = 9
The offset of keypoint coordinates in the location tensor.
optional int32 num_keypoints = 10
The number of predicted keypoints.
optional int32 num_values_per_keypoint = 11
The dimension of each keypoint, e.g. number of values predicted for each keypoint.
optional int32 box_coord_offset = 12
The offset of box coordinates in the location tensor.
optional float x_scale = 4
Parameters for decoding SSD detection model.
optional float y_scale = 5
optional float w_scale = 6
optional float h_scale = 7
optional bool apply_exponential_on_box_size = 13
optional bool reverse_output_order = 14
Whether to reverse the order of predicted x, y from output. If false, the order is [y_center, x_center, h, w], if true the order is [x_center, y_center, w, h].
repeated int32 ignore_classes = 8
The ids of classes that should be ignored during decoding the score for each predicted box.
optional bool sigmoid_score = 15
optional float score_clipping_thresh = 16
optional bool flip_vertically = 17
Whether the detection coordinates from the input tensors should be flipped vertically (along the y-direction). This is useful, for example, when the input tensors represent detections defined with a coordinate system where the origin is at the top-left corner, whereas the desired detection representation has a bottom-left origin (e.g., in OpenGL).
optional float min_score_thresh = 18
Score threshold for perserving decoded detections.
required string data_type = 19

required int32 num_classes = 1
The number of output classes predicted by the detection model.
required int32 num_boxes = 2
The number of output boxes predicted by the detection model.
required int32 num_coords = 3
The number of output values per boxes predicted by the detection model. The values contain bounding boxes, keypoints, etc.
optional float x_scale = 4
Parameters for decoding SSD detection model.
optional float y_scale = 5
optional float w_scale = 6
optional float h_scale = 7
optional bool reverse_output_order = 8
Whether to reverse the order of predicted x, y from output. If false, the order is [y_center, x_center, h, w], if true the order is [x_center, y_center, w, h].
repeated int32 ignore_classes = 9
The ids of classes that should be ignored during decoding the score for each predicted box.
optional bool sigmoid_score = 10
optional float score_clipping_thresh = 11
optional bool flip_vertically = 12
Whether the detection coordinates from the input tensors should be flipped vertically (along the y-direction). This is useful, for example, when the input tensors represent detections defined with a coordinate system where the origin is at the top-left corner, whereas the desired detection representation has a bottom-left origin (e.g., in OpenGL).
required float min_score_thresh = 13
Score threshold for perserving decoded detections.
required int32 img_width = 14
required int32 img_height = 15
required string data_type = 16

Full Example: node { calculator: "TensorConverterCalculator" input_stream: "IMAGE_IN:input_image" output_stream: "TENSOR_OUT:image_tensor" options { [mediapipe.TensorConverterCalculatorOptions.ext] { zero_center: true } } }

optional bool zero_center = 1
Choose normalization mode for output (not applied for Matrix inputs). true = [-1,1] false = [0,1] Ignored if using quantization.
optional bool use_custom_normalization = 6
Custom settings to override the internal scaling factors `div` and `sub`. Both values must be set to non-negative values. Will only take effect on CPU AND when |use_custom_normalization| is set to true. When these custom values take effect, the |zero_center| setting above will be overriden, and the normalized_value will be calculated as: normalized_value = input / custom_div - custom_sub.
optional float custom_div = 7
optional float custom_sub = 8
optional bool flip_vertically = 2
Whether the input image should be flipped vertically (along the y-direction). This is useful, for example, when the input image is defined with a coordinate system where the origin is at the bottom-left corner (e.g., in OpenGL) whereas the ML model expects an image with a top-left origin.
optional int32 max_num_channels = 3
Controls how many channels of the input image get passed through to the tensor. Valid values are 1,3,4 only. Ignored for iOS GPU.
optional bool row_major_matrix = 4
The calculator expects Matrix inputs to be in column-major order. Set row_major_matrix to true if the inputs are in row-major order.
optional bool use_quantized_tensors = 5
Quantization option (CPU only). When true, output kUint8 tensor instead of kFloat32.
optional TensorConverterCalculatorOptions.TensorFloatRange output_tensor_float_range = 9
Normalization option. Setting normalization_range results in the values normalized to the range [output_tensor_float_range.min, output_tensor_float_range.max].

Used in: TensorConverterCalculatorOptions

optional float min = 1
optional float max = 2

Full Example: node { calculator: "TfLiteConverterCalculator" input_stream: "IMAGE_IN:input_image" output_stream: "TENSOR_OUT:image_tensor" options { [mediapipe.TensorrtConverterCalculatorOptions.ext] { zero_center: true } } }

optional bool zero_center = 1
Choose normalization mode for output (not applied for Matrix inputs). true = [-1,1] false = [0,1] Ignored if using quantization.
optional bool use_custom_normalization = 6
Custom settings to override the internal scaling factors `div` and `sub`. Both values must be set to non-negative values. Will only take effect on CPU AND when |use_custom_normalization| is set to true. When these custom values take effect, the |zero_center| setting above will be overriden, and the normalized_value will be calculated as: normalized_value = input / custom_div - custom_sub.
optional float custom_div = 7
optional float custom_sub = 8
optional bool flip_vertically = 2
Whether the input image should be flipped vertically (along the y-direction). This is useful, for example, when the input image is defined with a coordinate system where the origin is at the bottom-left corner (e.g., in OpenGL) whereas the ML model expects an image with a top-left origin.
optional int32 max_num_channels = 3
Controls how many channels of the input image get passed through to the tensor. Valid values are 1,3,4 only. Ignored for iOS GPU.
optional bool row_major_matrix = 4
The calculator expects Matrix inputs to be in column-major order. Set row_major_matrix to true if the inputs are in row-major order.
optional bool use_quantized_tensors = 5
Quantization option (CPU only). When true, output kTfLiteUInt8 tensor instead of kTfLiteFloat32.
optional TensorrtConverterCalculatorOptions.TensorFloat tensor_mean = 9
Normalization option.
optional TensorrtConverterCalculatorOptions.TensorFloat tensor_scale = 10

Used in: TensorrtConverterCalculatorOptions

optional float val1 = 1
optional float val2 = 2
optional float val3 = 3

Full Example: node { calculator: "TfLiteInferenceCalculator" input_stream: "TENSOR_IN:image_tensors" output_stream: "TENSOR_OUT:result_tensors" options { [mediapipe.TensorrtInferenceCalculatorOptions.ext] { model_path: "model.tflite" delegate { gpu {} } } } }

required string onnx_path = 1
Path to the TF Lite model (ex: /path/to/modelname.tflite). On mobile, this is generally just modelname.tflite.
optional bool use_gpu = 2
Whether the TF Lite GPU or CPU backend should be used. Effective only when input tensors are on CPU. For input tensors on GPU, GPU backend is always used. DEPRECATED: configure "delegate" instead.
optional bool use_nnapi = 3
Android only. When true, an NNAPI delegate will be used for inference. If NNAPI is not available, then the default CPU delegate will be used automatically. DEPRECATED: configure "delegate" instead.
optional int32 cpu_num_thread = 4
The number of threads available to the interpreter. Effective only when input tensors are on CPU and 'use_gpu' is false.
optional TensorrtInferenceCalculatorOptions.Delegate delegate = 5
TfLite delegate to run inference. If not specified, when any of the input and output is on GPU (i.e, using the TENSORS_GPU tag) TFLite GPU delegate is used (as if "gpu {}" is specified), or otherwise regular TFLite on CPU is used (as if "tflite {}" is specified) except when building with emscripten where xnnpack is used. NOTE: use_gpu/use_nnapi are ignored if specified. (Delegate takes precedence over use_* deprecated options.)
required string engine_path = 6
optional bool use_fp16 = 7
optional int32 batch_size = 8
optional float detect_threshold = 9
optional float nms_threshold = 10
required int32 output_num = 11

Used in: TensorrtInferenceCalculatorOptions

oneof delegate
- Delegate.TfLite tflite = 1
- Delegate.Gpu gpu = 2
- Delegate.Nnapi nnapi = 3
- Delegate.Xnnpack xnnpack = 4

Delegate to run GPU inference depending on the device. (Can use OpenGl, OpenCl, Metal depending on the device.)

Used in: Delegate

optional bool use_advanced_gpu_api = 1
Experimental, Android/Linux only. Use TFLite GPU delegate API2 for the NN inference. example: delegate: { gpu { use_advanced_gpu_api: true } }
optional Gpu.Api api = 4
optional bool allow_precision_loss = 3
This option is valid for TFLite GPU delegate API2 only, Set to true to use 16-bit float precision. If max precision is needed, set to false for 32-bit float calculations only.
optional string cached_kernel_path = 2
Load pre-compiled serialized binary cache to accelerate init process. Only available for OpenCL delegate on Android. Kernel caching will only be enabled if this path is set.

This option is valid for TFLite GPU delegate API2 only, Choose any of available APIs to force running inference using it.

Used in: Gpu

ANY = 0
OPENGL = 1
OPENCL = 2

Android only.

Used in: Delegate

(message has no fields)

Default inference provided by tflite.

Used in: Delegate

(message has no fields)

Used in: Delegate

optional int32 num_threads = 1
Number of threads for XNNPACK delegate. (By default, calculator tries to choose optimal number of threads depending on the device.)

optional string data_type = 1

required int32 num_classes = 1
The number of output classes predicted by the detection model.
required int32 num_boxes = 2
The number of output boxes predicted by the detection model.
required int32 num_coords = 3
The number of output values per boxes predicted by the detection model. The values contain bounding boxes, keypoints, etc.
optional int32 keypoint_coord_offset = 9
The offset of keypoint coordinates in the location tensor.
optional int32 num_keypoints = 10
The number of predicted keypoints.
optional int32 num_values_per_keypoint = 11
The dimension of each keypoint, e.g. number of values predicted for each keypoint.
optional int32 box_coord_offset = 12
The offset of box coordinates in the location tensor.
optional float x_scale = 4
Parameters for decoding SSD detection model.
optional float y_scale = 5
optional float w_scale = 6
optional float h_scale = 7
optional bool apply_exponential_on_box_size = 13
optional bool reverse_output_order = 14
Whether to reverse the order of predicted x, y from output. If false, the order is [y_center, x_center, h, w], if true the order is [x_center, y_center, w, h].
repeated int32 ignore_classes = 8
The ids of classes that should be ignored during decoding the score for each predicted box.
optional bool sigmoid_score = 15
optional float score_clipping_thresh = 16
optional bool flip_vertically = 18
Whether the detection coordinates from the input tensors should be flipped vertically (along the y-direction). This is useful, for example, when the input tensors represent detections defined with a coordinate system where the origin is at the top-left corner, whereas the desired detection representation has a bottom-left origin (e.g., in OpenGL).
optional float min_score_thresh = 19
Score threshold for perserving decoded detections.

required int32 num_classes = 1
The number of output classes predicted by the detection model.
optional int32 num_boxes = 2
The number of output boxes predicted by the detection model.
optional int32 num_coords = 3
The number of output values per boxes predicted by the detection model. The values contain bounding boxes, keypoints, etc.
optional float x_scale = 4
Parameters for decoding SSD detection model.
optional float y_scale = 5
optional float w_scale = 6
optional float h_scale = 7
optional bool reverse_output_order = 8
Whether to reverse the order of predicted x, y from output. If false, the order is [y_center, x_center, h, w], if true the order is [x_center, y_center, w, h].
repeated int32 ignore_classes = 9
The ids of classes that should be ignored during decoding the score for each predicted box.
optional bool sigmoid_score = 10
optional float score_clipping_thresh = 11
optional bool flip_vertically = 12
Whether the detection coordinates from the input tensors should be flipped vertically (along the y-direction). This is useful, for example, when the input tensors represent detections defined with a coordinate system where the origin is at the top-left corner, whereas the desired detection representation has a bottom-left origin (e.g., in OpenGL).
required float min_score_thresh = 13
Score threshold for perserving decoded detections.
required int32 img_width = 14
required int32 img_height = 15

optional float min_score_threshold = 1
Score threshold for perserving the class.
optional int32 top_k = 2
Number of highest scoring labels to output. If top_k is not positive then all labels are used.
optional string label_map_path = 3
Path to a label map file for getting the actual name of class ids.
optional TensorsToClassificationCalculatorOptions.LabelMap label_map = 5
Label map. (Can be used instead of label_map_path.) NOTE: "label_map_path", if specified, takes precedence over "label_map".
optional bool binary_classification = 4
Whether the input is a single float for binary classification. When true, only a single float is expected in the input tensor and the label map, if provided, is expected to have exactly two labels. The single score(float) represent the probability of first label, and 1 - score is the probabilility of the second label.

Used in: TensorsToClassificationCalculatorOptions

repeated LabelMap.Entry entries = 1

message TensorsToClassificationCalculatorOptions.LabelMap.Entry

tensors_to_classification_calculator.proto:29

Used in: LabelMap

optional int32 id = 1
optional string label = 2

optional int32 num_classes = 1
[Required] The number of output classes predicted by the detection model.
optional int32 num_boxes = 2
[Required] The number of output boxes predicted by the detection model.
optional int32 num_coords = 3
[Required] The number of output values per boxes predicted by the detection model. The values contain bounding boxes, keypoints, etc.
optional int32 keypoint_coord_offset = 9
The offset of keypoint coordinates in the location tensor.
optional int32 num_keypoints = 10
The number of predicted keypoints.
optional int32 num_values_per_keypoint = 11
The dimension of each keypoint, e.g. number of values predicted for each keypoint.
optional int32 box_coord_offset = 12
The offset of box coordinates in the location tensor.
optional float x_scale = 4
Parameters for decoding SSD detection model.
optional float y_scale = 5
optional float w_scale = 6
optional float h_scale = 7
optional bool apply_exponential_on_box_size = 13
optional bool reverse_output_order = 14
Whether to reverse the order of predicted x, y from output. If false, the order is [y_center, x_center, h, w], if true the order is [x_center, y_center, w, h].
repeated int32 ignore_classes = 8
The ids of classes that should be ignored during decoding the score for each predicted box. Can be overridden with IGNORE_CLASSES side packet.
optional bool sigmoid_score = 15
optional float score_clipping_thresh = 16
optional bool flip_vertically = 18
Whether the detection coordinates from the input tensors should be flipped vertically (along the y-direction). This is useful, for example, when the input tensors represent detections defined with a coordinate system where the origin is at the top-left corner, whereas the desired detection representation has a bottom-left origin (e.g., in OpenGL).
optional float min_score_thresh = 19
Score threshold for perserving decoded detections.

optional TensorsToFloatsCalculatorOptions.Activation activation = 1
Apply activation function to the floats.

Used in: TensorsToFloatsCalculatorOptions

NONE = 0
SIGMOID = 1

optional int32 num_landmarks = 1
[Required] Number of landmarks from the output of the model.
optional int32 input_image_width = 2
Size of the input image for the model. These options are used only when normalized landmarks are needed. Z coordinate is scaled as X assuming a weak perspective projection camera model.
optional int32 input_image_height = 3
optional bool flip_vertically = 4
Whether the detection coordinates from the input tensors should be flipped vertically (along the y-direction). This is useful, for example, when the input tensors represent detections defined with a coordinate system where the origin is at the top-left corner, whereas the desired detection representation has a bottom-left origin (e.g., in OpenGL).
optional bool flip_horizontally = 6
Whether the detection coordinates from the input tensors should be flipped horizontally (along the x-direction). This is useful, for example, when the input image is horizontally flipped in ImageTransformationCalculator beforehand.
optional float normalize_z = 5
A value that Z coordinates should be divided by. This option is used only when normalized landmarks are needed. It is applied in addition to Z coordinate being re-scaled as X.
optional TensorsToLandmarksCalculatorOptions.Activation visibility_activation = 7
Apply activation function to the tensor representing landmark visibility.
optional TensorsToLandmarksCalculatorOptions.Activation presence_activation = 8
Apply activation function to the tensor representing landmark presence.

Used in: TensorsToLandmarksCalculatorOptions

NONE = 0
SIGMOID = 1

optional GpuOrigin.Mode gpu_origin = 1
For CONVENTIONAL mode in OpenGL, textures start at bottom and needs to be flipped vertically as tensors are expected to start at top. (DEFAULT or unset is interpreted as CONVENTIONAL.)
optional TensorsToSegmentationCalculatorOptions.Activation activation = 2
Activation function to apply to input tensor. Softmax requires a 2-channel tensor, see output_layer_index below.
optional int32 output_layer_index = 3
Channel to use for processing tensor. Only applies when using activation=SOFTMAX. Works on two channel input tensor only.

Supported activation functions for filtering.

Used in: TensorsToSegmentationCalculatorOptions

NONE = 0
Assumes 1-channel input tensor.
SIGMOID = 1
Assumes 1-channel input tensor.
SOFTMAX = 2
Assumes 2-channel input tensor.

message ThreadPoolExecutorOptions

thread_pool_executor.proto:25

optional int32 num_threads = 1
Number of threads for running calculators in multithreaded mode. When ThreadPoolExecutorOptions is used in the ExecutorOptions for the default executor with the executor type unspecified, the num_threads field is allowed to be -1 or 0. If not specified or -1, the scheduler will pick an appropriate number of threads depending on the number of available processors.
optional int32 stack_size = 2
Make all worker threads have the specified stack size (in bytes). NOTE: The stack_size option may not be implemented on some platforms.
optional int32 nice_priority_level = 3
The nice priority level of the worker threads. The nice priority level is 0 by default, and lower value means higher priority. The valid thread nice priority level value range varies by OS. Refer to system documentation for more details.
optional ThreadPoolExecutorOptions.ProcessorPerformance require_processor_performance = 4
The performance hint of the processor(s) that the threads will be bound to. Framework will make the best effort to run the threads on the specific processors based on the performance hint. The attempt may fail for various reasons. Success isn't guaranteed.
optional string thread_name_prefix = 5
Name prefix for worker threads, which can be useful for debugging multithreaded applications.

enum ThreadPoolExecutorOptions.ProcessorPerformance

thread_pool_executor.proto:46

Processor performance enum.

Used in: ThreadPoolExecutorOptions

NORMAL = 0
LOW = 1
HIGH = 2

optional double threshold = 1

Stores the profiling information. It is the responsibility of the user of this message to make sure the 'total' field and the interval information (num, size and count) are in a valid state and all get updated together. Each interval of the histogram is closed on the lower range and open on the higher end. An example histogram with interval_size=1000 and num_interval=3 will have the following intervals: - First interval = [0, 1000) - Second interval = [1000, 2000) - Third interval = [2000, +inf) IMPORTANT: If You add any new field, update CalculatorProfiler::Reset() accordingly.

Used in: CalculatorProfile, StreamProfile

optional int64 total = 1
Total time (in microseconds).
optional int64 interval_size_usec = 2
Size of the runtimes histogram intervals (in microseconds) to generate the histogram of the Process() time. The last interval extends to +inf.
optional int64 num_intervals = 3
Number of intervals to generate the histogram of the Process() runtime.
repeated int64 count = 4
Number of calls in each interval.

optional double frame_duration_seconds = 1
Frame duration in seconds. Required. Must be greater than 0. This is rounded to the nearest integer number of samples.
optional double frame_overlap_seconds = 2
Frame overlap in seconds. If emulate_fractional_frame_overlap is false (the default), then the frame overlap is rounded to the nearest integer number of samples, and the step from one frame to the next will be the difference between the number of samples in a frame and the number of samples in the overlap. If emulate_fractional_frame_overlap is true, then frame overlap will be a variable number of samples, such that the long-time average time step from one frame to the next will be the difference between the (nominal, not rounded) frame_duration_seconds and frame_overlap_seconds. This is useful where the desired time step is not an integral number of input samples. A negative frame_overlap_seconds corresponds to skipping some input samples between each frame of emitted samples. Required that frame_overlap_seconds < frame_duration_seconds.
optional bool emulate_fractional_frame_overlap = 5
See frame_overlap_seconds for semantics.
optional bool pad_final_packet = 3
Whether to pad the final packet with zeros. If true, guarantees that all input samples (other than those that fall in gaps implied by negative frame_overlap_seconds) will be emitted. If set to false, any partial packet at the end of the stream will be dropped.
optional TimeSeriesFramerCalculatorOptions.WindowFunction window_function = 4
optional bool use_local_timestamp = 6
If use_local_timestamp is true, the output packet's timestamp is based on the last sample of the packet and it's inferred from the latest input packet's timestamp. If false, the output packet's timestamp is based on the cumulative timestamping, which is inferred from the intial input timestamp and the cumulative number of samples.

Optional windowing function. The default is NONE (no windowing function).

Used in: TimeSeriesFramerCalculatorOptions

NONE = 0
HAMMING = 1
HANN = 2

Header for a uniformly sampled time series stream. Each Packet in the stream is a Matrix, and each column is a (vector-valued) sample of the series, i.e. each column corresponds to a distinct sample in time.

Used in: MultiStreamTimeSeriesHeader

optional double sample_rate = 1
Number of samples per second (hertz). The sample_rate is the reciprocal of the period between consecutive samples within a packet. Required, and must be greater than zero.
optional int32 num_channels = 2
The number of channels in each sample. This is the number of rows in the matrix. Required, and must be greater than zero.
optional int32 num_samples = 3
For streams that output a fixed number of samples per packet. This field should not be set if the number of samples varies from packet to packet. This is the number of columns in the matrix.
optional double packet_rate = 4
For streams that output Packets at a fixed rate, in Packets per second. In other words, the reciprocal of the difference between consecutive Packet timestamps.
optional double audio_sample_rate = 5
Spectral representations (e.g. from SpectrogramCalculator) will have their sample_rate field indicating the frame rate (e.g. 100 Hz), but downstream consumers need to know the sample_rate of the source time-domain waveform in order to correctly interpret the spectral bins. Units are hertz.

optional string label_map_path = 1
Path to a label map file for getting the actual name of detected classes.

optional Color box_color = 1
Color of boxes.
optional double thickness = 2
Thickness of the drawing of boxes.

Next tag: 14 Proto equivalent of struct TimedBox.

Used in: BoxDetectorIndex.BoxEntry.FrameEntry, TimedBoxProtoList

optional float top = 1
Normalized coords - in [0, 1]
optional float left = 2
optional float bottom = 3
optional float right = 4
optional float rotation = 7
Rotation of box w.r.t. center in radians.
optional MotionBoxState.Quad quad = 9
optional int64 time_msec = 5
optional int32 id = 6
Unique per object id to disambiguate boxes.
optional string label = 13
Box lable name.
optional float confidence = 8
Confidence of box tracked in the range [0, 1], with 0 being least confident, and 1 being most confident. A reasonable threshold is 0.5 to filter out unconfident boxes.
optional float aspect_ratio = 10
Aspect ratio (width / height) for the tracked rectangle in physical space. If this field is provided, quad tracking will be performed using 6 degrees of freedom perspective transform between physical rectangle and frame quad. Otherwise, 8 degrees of freedom homography tracking between adjacent frames will be used.
optional bool reacquisition = 11
Whether or not to enable reacquisition functionality for this specific box.
optional bool request_grouping = 12
Whether we want this box to be potentially grouped with other boxes to track together. This is useful for tracking small boxes that lie on a plane. For example, when we detect a plane, track the plane, then all boxes within the plane can share the same homography transform.

Used in: BoxTrackerCalculatorOptions

repeated TimedBoxProto box = 1

optional string timestamp_base_tag_index = 1
The TAG:index of the input stream used as the timestamp base. TimestampAlignInputStreamHandler aligns the packet timestamps of all other input streams with the packet timestamps of this input stream.

Capture tone change between two frames and per-frame tone statistics. The estimated tone change describes the transformation of color intensities from the current to the previous frame. Next tag: 16

optional GainBiasModel gain_bias = 1
optional AffineToneModel affine = 2
optional MixtureGainBiasModel mixture_gain_bias = 3
optional MixtureAffineToneModel mixture_affine = 4
optional float mixture_domain_sigma = 5
TODO: Implement.
optional float frac_clipped = 6
Fraction of clipped pixels in [0, 1]. A pixel is considered clipped if more than ToneEstimationOptions::max_clipped_channels are over- or under exposed.
optional float low_percentile = 8
[low|mid|high]_percentile's.
optional float low_mid_percentile = 9
optional float mid_percentile = 10
optional float high_mid_percentile = 11
optional float high_percentile = 12
optional bool log_domain = 13
If set, all models are estimated in log domain, specifically intensity I is transformed via log(1.0 + I) := I' Consequently after apply the models, intensity needs to be transformed back to visible range via exp(I') - 1.0.
optional ToneChange.Type type = 14
optional ToneChange.StabilityStats stability_stats = 15

Stats based on stability analysis.

Used in: ToneChange

optional int32 num_inliers = 1
Number of tone matches that were iniliers (used for tone estimation).
optional float inlier_fraction = 2
Fraction of tone matches that were inliers.
optional double inlier_weight = 3
Total IRLS weight summed over all inliers.

ToneChange type indicates whether highest degree of freedom (DOF) model estimation was deemed stable, in which case ToneChange::Type is set to VALID. If a model was deemed not stable (according to *StabilityBounds in ToneEstimationOptions), it is set to the lower dof type which was deemed stable.

Used in: ToneChange

VALID = 0
INVALID = 10
Identity model, gain bias unrealiable.

Next tag: 13

optional ToneMatchOptions tone_match_options = 1
optional ClipMaskOptions clip_mask_options = 2
optional float stats_low_percentile = 3
Percentiles for tone statistics.
optional float stats_low_mid_percentile = 4
optional float stats_mid_percentile = 5
optional float stats_high_mid_percentile = 6
optional float stats_high_percentile = 7
optional int32 irls_iterations = 8
optional ToneEstimationOptions.GainBiasBounds stable_gain_bias_bounds = 9
optional ToneEstimationOptions.DownsampleMode downsample_mode = 10
optional int32 downsampling_size = 11
Specify the size of either dimension here, the frame will be downsampled to fit downsampling_size.
optional float downsample_factor = 12

We support down-sampling of an incoming frame before running the resolution dependent part of the tone estimation. tracking if desired).

Used in: ToneEstimationOptions

DOWNSAMPLE_NONE = 1
no downsampling.
DOWNSAMPLE_TO_MAX_SIZE = 2
downsizes frame such that frame_size ==
DOWNSAMPLE_BY_FACTOR = 3
downsampling_size. frame_size := max(width, height).
downsizes frame by pre-defined factor.
DOWNSAMPLE_TO_MIN_SIZE = 4
downsizes frame such that frame_size ==

Used in: RegionFlowComputationOptions, ToneEstimationOptions

optional float min_inlier_fraction = 1
optional float min_inlier_weight = 2
Accept 2% intensity difference as valid inlier.
optional float lower_gain = 3
optional float upper_gain = 4
optional float lower_bias = 5
optional float upper_bias = 6

Used in: PatchToneMatch

optional float curr_val = 1
Intensity in current frame.
optional float prev_val = 2
Matching intensity in previous frame.

Used in: ToneEstimationOptions

optional float min_match_percentile = 1
ToneChange's are fit to ToneMatches extracted from matching patches, using order statistics of their corresponding intensities. Matches are defined by having the same percentile of ordered intensities. If any member of the ToneMatch is below under or above over-exposed the match is discarded (based on parameters min and max_exposure above). Matches are extracted from min_match_percentile to max_match_percentile in #match_percentile_steps equidistant steps.
optional float max_match_percentile = 2
optional int32 match_percentile_steps = 3
optional int32 patch_radius = 4
Patch radius from which order statistics are collected.
optional float max_frac_clipped = 5
Only matches with not too many pixels over- or underexposed are used.
optional bool log_domain = 8
If set matches will be collected in the log domain.

optional int32 top_k = 1
How many highest scoring packets to output.
optional float threshold = 2
If set, only keep the scores that are greater than the threshold.
optional string label_map_path = 3
Path to a label map file for getting the actual name of classes.

Next tag: 42

Used in: BoxTrackerOptions

optional TrackStepOptions.TrackingDegrees tracking_degrees = 28
optional bool track_object_and_camera = 32
If set and one of the TRACKING_DEGREE_OBJECT degrees are set also applies camera motion in addition to the object motion.
optional int32 irls_iterations = 1
Number of iterations to iteratively estimate model and re-estimate influence of each vector.
optional float spatial_sigma = 2
Gaussian spatial prior sigma relative to box size. For motivation, see this plot: http://goo.gl/BCfcy.
optional float min_motion_sigma = 3
Gaussian velocity prior sigma. It is computed as the maximum of the absolute minimum sigma (in normalized domain) and the relative sigma w.r.t. previous motion.
optional float relative_motion_sigma = 4
optional float motion_disparity_low_level = 6
Settings for motion disparity. Difference between previous and current motion magnitude is scored linearly, from motion_disparity_low_level to motion_disparity_high_level (mapped to score of 0 and 1 respectively). Motivation is to ensure acceleration between frames are within reasonable bounds. Represents a maximum acceleration of around 4 - 5 pixels per frame in 360p video to be unpenalized, with accelerations of around >= 10 pixels being considered inconsitent with prediction.
optional float motion_disparity_high_level = 7
optional float disparity_decay = 8
Motion disparity decays across frames. Disparity of previous frame decays over time. If disparity in current frame is not higher, i.e. the larger of the current and decayed disparity is taken. Motivation is, that if acceleration was unreasonable high (and we likely lost tracking) we enter a stage of trying to regain tracking by looking for vectors that agree with the previous prediction.
optional float motion_prior_weight = 9
Object motion is given as linear combination of previous and measured motion depending on the motion_disparity (a high disparity is giving high weight to the previous motion). We enforce at least a minimum of the below motion_prior_weight regardless of the motion disparity.
optional float background_discrimination_low_level = 10
Settings for motion discrimination. Current motion magnitude is scored linearly, from background_discrimination_low_level to background_discrimination_high_level (mapped to score of 0 and 1 respectively). Motivation is that high object motions are easy to discriminate from the background, whereas small object motions are virtually indistinguishable. Represents a range of 2 - 4 pixels for 360p video.
optional float background_discrimination_high_level = 11
optional float inlier_center_relative_distance = 12
Spring force settings. If difference between predicted center of the box in the next frame and the predicted center of the inliers deviates by more than inlier_center_relative_distance times the box [width|height] a spring force is applied to the box. The amount of force is spring_force times the difference.
optional float inlier_spring_force = 13
optional float kinetic_center_relative_distance = 14
Same as above, but for the center of large motion magnitudes.
optional float kinetic_spring_force = 15
optional float kinetic_spring_force_min_kinetic_energy = 21
Spring force towards large motions is only applied when kinetic energy is above the specified threshold.
optional float velocity_update_weight = 16
Bias of old velocity during update step.
optional int32 max_track_failures = 17
Maximum number of frames considered to be tracking failures -> If over threshold, box is considered untrackable.
optional float expansion_size = 18
Domain used for tracking is always larger than the current box. If current motion is not negligible, box is expanded in the direction the motion, otherwise expanded in all directions by the amount specified below (w.r.t. normalized domain).
optional float inlier_low_weight = 19
Features are scored based on the magnitude of their irls weights, mapped to [0, 1] using the following range. The range represents roughly 3 - 1.5 pixels error for 360p video.
optional float inlier_high_weight = 20
optional float kinetic_energy_decay = 22
Kinetic energy decays over time by the specified rate.
optional float prior_weight_increase = 23
Amount by which prior is increased/decreased in case of valid/invalid measurements.
optional float low_kinetic_energy = 24
We map the amount of present kinetic energy linearly to the domain [0, 1] describing if an object is static (0) or moving (1).
~0.4 pix
optional float high_kinetic_energy = 25
~3 pix
optional bool return_internal_state = 26
Outputs internal state to MotionBoxState.
optional bool use_post_estimation_weights_for_state = 29
Specifies which weights are stored in the internal state. By default post-estimation weights are stored, otherwise pre-estimation weights are stored.
optional bool compute_spatial_prior = 27
Computes spatial grid of inliers and stores it in the MotionBoxState.
optional TrackStepOptions.IrlsInitialization irls_initialization = 30
optional float static_motion_temporal_ratio = 33
Ratio between static motion and temporal scale. This is actually the threshold on speed, under which we consider static (non-moving object).
optional TrackStepOptions.CancelTrackingWithOcclusionOptions cancel_tracking_with_occlusion_options = 34
optional int32 object_similarity_min_contd_inliers = 35
If number of continued inliers is less than this number, then the object motion model will fall back to translation model. Set this min_continued_inliers threshold to a low number to make sure they follow local object rotation and scale, but it may result in un-robust rotation and scale estimation if the threshold is too low. Recommend that you don't set a number < 4.
optional float box_similarity_max_scale = 36
Maximum acceptable scale component of object similarity transform. Minimum scale is computed as 1.0 / max_scale. Exclusive for tracking a box with similarity.
optional float box_similarity_max_rotation = 37
Maximum acceptable object similarity rotation in radians.
optional float quad_homography_max_scale = 38
Homography transform will first be projected to similarity, and the scale component of the similarity transform should be within the range of [1.0 / max_scale, max_scale].
optional float quad_homography_max_rotation = 39
The rotation component of the projected similarity should be smaller than this maximum rotation threshold.
optional TrackStepOptions.CameraIntrinsics camera_intrinsics = 40
optional bool forced_pnp_tracking = 41
Specifically for quad tracking (aka TRACKING_DEGREE_OBJECT_PERSPECTIVE mode), if aspect_ratio field is set in start pos, pnp tracking will be deployed. If aspect_ratio is unknown (not set), but forced_pnp_tracking is true, we will first estimate the aspect ratio for the 3D quadrangle, then perform pnp tracking. If aspect_ratio is unknown and pnp tracking is not forced, general homography tracking will be deployed.

Pre-calibrated camera intrinsics parameters, including focal length, center point, distortion coefficients (only 3 radial factors) and image width / height. The image formation model is described here: https://docs.opencv.org/2.4/doc/tutorials/calib3d/camera_calibration/camera_calibration.html Only used for quad tracking mode. Leave it empty if unknown.

Used in: TrackStepOptions

optional float fx = 1
optional float fy = 2
optional float cx = 3
optional float cy = 4
optional float k0 = 5
optional float k1 = 6
optional float k2 = 7
optional int32 w = 8
optional int32 h = 9

Different control parameters to terminate tracking when occlusion occurs.

Used in: TrackStepOptions

optional bool activated = 1
optional float min_motion_continuity = 2
optional float min_inlier_ratio = 3

Irls initialization by performing several rounds of RANSAC to preselect features for motion estimation scoring outliers low and inliers to be at least of median inlier weight.

Used in: TrackStepOptions

optional bool activated = 1
optional int32 rounds = 2
Rounds of RANSAC.
optional float cutoff = 3
Normalized cutoff threshold for a vector to be considered an inlier.

Degrees of freedom being used for tracking. By default tracker only uses translation. Additionally scale and rotation from the camera motion and / or object motion can be taken into account.

Used in: TrackStepOptions

TRACKING_DEGREE_TRANSLATION = 0
TRACKING_DEGREE_CAMERA_SCALE = 1
Additional tracking degrees according to camera motion.
TRACKING_DEGREE_CAMERA_ROTATION = 2
TRACKING_DEGREE_CAMERA_ROTATION_SCALE = 3
TRACKING_DEGREE_CAMERA_PERSPECTIVE = 4
TODO: Implement!
TRACKING_DEGREE_OBJECT_SCALE = 5
Tracking degrees modeling object motion. Note that additional object degrees of freedom are only applied when estimation is deemed stable, in particular sufficient inliers are present. By default, does NOT apply camera motion. If that is desired set the flag: track_object_and_camera to true.
TRACKING_DEGREE_OBJECT_ROTATION = 6
TRACKING_DEGREE_OBJECT_ROTATION_SCALE = 7
TRACKING_DEGREE_OBJECT_PERSPECTIVE = 8

optional TrackedDetectionManagerConfig tracked_detection_manager_options = 1

Used in: TrackedDetectionManagerCalculatorOptions

optional float is_same_detection_max_area_ratio = 1
When we compare two detection boxes, if the ratio of the area is larger than is_same_detection_max_area_ratio, we consider them being different detections.
optional float is_same_detection_min_overlap_ratio = 2
When we compare two detection boxes, if the overlap ratio is larger than is_same_detection_min_overlap_ratio, we consider them being same detection.

TrackingContainer is self-describing container format to store arbitrary chunks of binary data. Each container is typed via its 4 character header, versioned via an int, and followed by the size of the binary data and the actual data. Designed for clients without availability of protobuffer support. Note: This message is mainly used for documentation purposes and uses custom encoding as specified by FlowPackager::TrackingContainerFormatToBinary. Default binary size of a TrackingContainer (DO NOT CHANGE!): header: 4 byte + version: 4 byte + size: 4 byte + data #size SUM: 12 + #size.

Used in: TrackingContainerFormat

optional string header = 1
4 character header.
optional fixed32 version = 2
Version information.
optional fixed32 size = 3
Size of binary data held by container
optional bytes data = 4
Binary data encoded.

Container format for clients without proto support (written via FlowPackager::TrackingContainerFormatToBinary and read via FlowPackager::TrackingContainerFormatFromBinary). Proto here is intermediate format for documentationa and internal use. Stores multiple TrackingContainers of different types. Meta data is storred first, to facilitate random seek (via stream offset positions) to arbitrary binary TrackinData. Termination container signals end of stream.

optional TrackingContainer meta_data = 1
Wraps binary meta data, via
repeated TrackingContainer track_data = 2
custom encode.
Wraps BinaryTrackingData.
optional TrackingContainer term_data = 3
Add new TrackingContainers above before end of stream indicator. Zero sized termination container with TrackingContainer::header = "TERM".

Simplified proto format of above TrackingContainerFormat. Instead of using self-describing TrackingContainer's, we simply use the proto wire format for encoding and decoding (proto format is typed and versioned via ids).

optional MetaData meta_data = 1
repeated BinaryTrackingData track_data = 2

Next flag: 9

Used in: TrackingDataChunk.Item

optional int32 frame_flags = 1
optional int32 domain_width = 2
Tracking data is resolution independent specified w.r.t. specified domain.
optional int32 domain_height = 3
optional float frame_aspect = 6
Aspect ratio (w/h) of the original frame tracking data was computed from.
optional Homography background_model = 4
optional TrackingData.MotionData motion_data = 5
optional uint32 global_feature_count = 7
Total number of features in our analysis
optional float average_motion_magnitude = 8
Average of all motion vector magnitudes (without accounting for any motion model), within 10th to 90th percentile (to remove outliers).

FLAG_PROFILE_BASELINE = 0
FLAG_PROFILE_HIGH = 1
FLAG_HIGH_FIDELITY_VECTORS = 2
FLAG_BACKGROUND_UNSTABLE = 4
Background model could not be estimated.
FLAG_DUPLICATED = 8
Frame is duplicated, i.e. identical to
FLAG_CHUNK_BOUNDARY = 16
previous one. Indicates the beginning of a new chunk. In this case the track_id's are not compatible w.r.t. previous one.

Stores num_elements vectors of motion data. (x,y) position encoded via row_indices and col_starts, as compressed sparse column matrix storage format: (https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_column_.28CSC_or_CCS.29), Vector data is stored as (dx, dy) position. Optionally we store the fitting error and track id for each feature.

Used in: TrackingData

optional int32 num_elements = 1
repeated float vector_data = 2
#num_elements pairs (flow_x, flow_y) densely packed.
repeated int32 track_id = 3
Stores corresponding track index for each feature. Features belonging to the same track over time are assigned the same id. NOTE: Due to size, tracking ids are never stored as compressed binary tracking data.
repeated int32 row_indices = 4
# num_elements row indices.
repeated int32 col_starts = 5
Start index in above array for each column (#domain_width + 1 entries).
repeated BinaryFeatureDescriptor feature_descriptors = 6
Feature descriptors for num_elements feature points.
repeated int32 actively_discarded_tracked_ids = 7
Stores all the tracked ids that have been discarded actively. This information will be used by downstreaming to avoid misjudgement on tracking continuity.

repeated TrackingDataChunk.Item item = 1
optional bool last_chunk = 2
Set as marker for last chunk.
optional bool first_chunk = 3
Set as marker for first chunk.

Used in: TrackingDataChunk

optional TrackingData tracking_data = 1
optional int32 frame_idx = 2
Global frame index.
optional int64 timestamp_usec = 3
Corresponding timestamp.
optional int64 prev_timestamp_usec = 4
Previous frame timestamp.

Next tag: 33

Used in: RegionFlowComputationOptions

optional TrackingOptions.FlowDirection internal_tracking_direction = 19
Flow direction used internally during tracking features. Forward tracking allows reusing tracked features instead of explicitly tracking them in every frame, and can therefore be faster. See the reuse_features_XXX options below. However, if not reusing features, then it is best to match the direction for both internal tracking and output flow, for peformance reasons.
optional TrackingOptions.FlowDirection output_flow_direction = 20
Direction of flow vectors that are computed and output by calls to retrieve region flow, tracked features, etc. Note when this is BACKWARD, then the returned flow for frame N contains features tracked *from* frame N to a previous frame N-k. When this is FORWARD, the flow for frame N contains the flow from features in a previous frame N-k, tracked *to* frame N. Note that the output flow direction can only be set to FORWARD or BACKWARD.
optional TrackingOptions.TrackingPolicy tracking_policy = 25
optional int32 multi_frames_to_track = 1
Number of frame-pairs used for POLICY_MULTI_FRAME, ignored for other policies. Value of 1 means we are tracking features in the current frame, w.r.t. the previous one. Value of 2 denotes tracking of features in current w.r.t the previous one and the one before the previous one, etc.
optional int32 long_tracks_max_frames = 26
Maximum length of long feature tracks for POLICY_LONG_TRACKS in frames. Note: This maximum is not hard enforced, to avoid that many long tracks are dropped at the same time. Instead if a feature reaches long_tracks_max_frames * 0.8, it will get dropped with a probability of X, where X is calculated, such that 95% of all qualifying features are dropped within the interval [.8, 1.2] * long_tracks_max_frames.
optional int32 max_features = 2
Hard limit of maximum number of features. Control density of features, with min_feature_distance option. This limit is to guarantee that the run-time of RegionFlowComputation does not spiral out of control.
optional TrackingOptions.CornerExtractionMethod corner_extraction_method = 27
optional TrackingOptions.MinEigValExtractionSettings min_eig_val_settings = 28
optional TrackingOptions.HarrisExtractionSettings harris_settings = 29
optional TrackingOptions.FastExtractionSettings fast_settings = 31
optional int32 tracking_window_size = 4
optional int32 tracking_iterations = 5
optional float fractional_tracking_distance = 6
Fractional tracking distance w.r.t. to frame diameter d. The number of pyramid levels l is chosen such that 2^l * tracking_window_size / 2 >= fractional_tracking_distance * d. Therefore, theoretically it is guaranteed that objects moving less than fractional_tracking_distance * d can be tracked.
optional bool adaptive_tracking_distance = 24
If set, modifies tracking distance to be 130% of maximum average tracking distances of previous frames.
optional float min_feature_distance = 7
Minimum feature distance in pixels. Close features are suppressed. If value < 1, the distance is computed as a fraction of the frame diameter.
optional bool distance_downscale_sqrt = 21
By default, when downscaling by factor x, the minimum feature distance is downscaled by a factor of sqrt(x). If set false, no scaling is performed.
optional bool adaptive_good_features_to_track = 8
Uses grid based extraction of features. Quality level is local within a grid cell and results are combined over all cells and multiple scales and grid offsets. Default option, setting it to false is deprecated and will fail.
optional float adaptive_features_block_size = 9
Size of each grid cell. Values < 1 are interpreted to be relative to frame_width_ x frame_height_.
optional int32 adaptive_features_levels = 10
Scales / levels employed for feature extraction. Grid cell size is scaled by 0.5 for each level.
optional int32 adaptive_extraction_levels = 22
If > 1, feature extraction is carried out at multiple scales by downscaling the image repeatedly, extracting features (eigenvalue images) and upscaling them.
optional int32 adaptive_extraction_levels_lowest_size = 23
Alternate way of specifying extraction levels: number of levels is automatically computed by downsampling the image until its maximum dimension (width or height) reaches this value. Overrides adaptive_extraction_levels if > 0.
optional float synthetic_zero_motion_grid_step = 13
Grid step-size in fraction of width or height used for creating synthetic zero motion tracks with feature points lying on a grid. Can be set based on desired number of total features as 1/sqrt(num_features), e.g. .04 ~= 1/sqrt(600).
optional bool wide_baseline_matching = 14
If set, uses ORB features with brute force matching and ratio test to track frames across larger perspective changes than possible with default KLT features.
optional float ratio_test_threshold = 15
Only brute force matches with best_match_distance < ratio_test_threshold * second_best_match_distance are retained.
optional bool refine_wide_baseline_matches = 16
Refines wide baseline matches by estimating affine transform to wide-baseline matches which is used to seed initial positions for KLT matches.
optional int32 reuse_features_max_frame_distance = 17
When tracking features, features tracked from frame A to frame B may be reused as the features for frame B when tracking from it (instead of extracting features). The max_frame_distance flag limits the distance between A and B for the features to be reused. Setting it to 0 => no re-use.
optional float reuse_features_min_survived_frac = 18
In conjunction with above, the features are reused in frame B only if they are at-least this fraction of the original features in frame A. Otherwise they are reset and extracted from scratch.
optional bool use_cv_tracking_algorithm = 30
If set uses newer OpenCV tracking algorithm. Recommended to be set for all new projects.
optional TrackingOptions.KltTrackerImplementation klt_tracker_implementation = 32
Implementation choice of KLT tracker.

Specifies the extraction method for features.

Used in: TrackingOptions

EXTRACTION_HARRIS = 1
Using Harris' approximation of
EXTRACTION_MIN_EIG_VAL = 2
EXTRACTION_MIN_EIG_VAL.
Exact smallest eigenvalue computation.
EXTRACTION_FAST = 3
Extract using FAST feature detector.

message TrackingOptions.FastExtractionSettings

region_flow_computation.proto:137

Used in: TrackingOptions

optional int32 threshold = 1
threshold on difference between intensity of the central pixel and pixels of a circle around this pixel. Empirically, the larger the threshold, the fewer the keypoints will be detected. Default value set as the same with OpenCV.

Describes direction of flow during feature tracking and for the output region flow.

Used in: TrackingOptions

FORWARD = 1
Tracks are forward, from frame N-k -> frame N (k > 0).
BACKWARD = 2
Tracks are backward, from frame N -> frame N-k
CONSECUTIVELY = 3
(k > 0).
Try forward and backward tracking consecutively.

Used in: TrackingOptions

optional float feature_quality_level = 1
Same as in MinEigValExtractionSettings.

Used in: TrackingOptions

UNSPECIFIED = 0
KLT_OPENCV = 1
Use OpenCV's implementation of KLT tracker.

Settings for above corner extraction methods.

Used in: TrackingOptions

optional float feature_quality_level = 1
Quality level of features (features with min_eig_value < quality_level * max_eig_value are rejected). Here [min|max]_eig_value denote the minimum and maximum eigen value of the auto-correlation matrix of the patch centered at a feature point. The ratio of eigenvalues denotes the "cornerness", lower means more pronounced corners. (see http://en.wikipedia.org/wiki/Harris-Affine for details.)
optional float adaptive_lowest_quality_level = 2
Features below this quality level are always discarded, even if their score is above feature_quality_level() * local maximum within that grid cell. This prevents us from including very poor features.

Specifies how a feature is tracked w.r.t. previous or next frames (dependent on the FlowDirection options above). Per default, each frame is tracked w.r.t. a single neighboring frame (TRACK_SINGLE_FRAME). If associations across multiple frames are desired, TRACK_MULTI_FRAME creates multiple results for the current frame, by tracking features w.r.t. multiple neighbors. Number of neighbors is specified by multi_frames_to_track. If long feature tracks are desired (i.e. a track across a frame pair that is identified to belong to an earlier known feature), use TRACK_ACROSS_FRAMES. Maximum track length can be specified by long_tracks_max_frames.

Used in: TrackingOptions

POLICY_SINGLE_FRAME = 1
Tracks w.r.t. previous or next frame.
POLICY_MULTI_FRAME = 2
Tracks w.r.t. multiple frames.
POLICY_LONG_TRACKS = 3
Create long feature tracks.

Simple translational model: I * x + [dx; dy] with I being 2x2 identity transform.

Used in: CameraMotion

optional float dx = 1
optional float dy = 2

optional VideoPreStreamCalculatorOptions.Fps fps = 1

An arbitrary number of frames per second. Prefer the StandardFps enum to store industry-standard, safe FPS values.

Used in: VideoPreStreamCalculatorOptions

optional double value = 1
The possibly approximated value of the frame rate, in frames per second. Unsafe to use in accurate computations because prone to rounding errors. For example, the 23.976 FPS value has no exact representation as a double.
optional Fps.Rational32 ratio = 2
The exact value of the frame rate, as a rational number.

Used in: Fps

optional int32 numerator = 1
optional int32 denominator = 2

optional bool copy_visibility = 1
optional bool copy_presence = 2

oneof filter_options
- VisibilitySmoothingCalculatorOptions.NoFilter no_filter = 1
- VisibilitySmoothingCalculatorOptions.LowPassFilter low_pass_filter = 2

Used in: VisibilitySmoothingCalculatorOptions

optional float alpha = 1
Coefficient applied to a new value, whilte `1 - alpha` is applied to a stored value. Should be in [0, 1] range. The smaller the value - the smoother result and the bigger lag.

Default behaviour and fast way to disable smoothing.

Used in: VisibilitySmoothingCalculatorOptions

(message has no fields)

package mediapipe

message AffineModel

optional float dx = 1

optional float dy = 2

optional float a = 3

optional float b = 4

optional float c = 5

optional float d = 6

message AffineToneModel

optional float g_00 = 1

optional float g_01 = 2

optional float g_02 = 3

optional float g_03 = 4

optional float g_10 = 5

optional float g_11 = 6

optional float g_12 = 7

optional float g_13 = 8

optional float g_20 = 9

optional float g_21 = 10

optional float g_22 = 11

optional float g_23 = 12

message Anchor

required float x_center = 1

required float y_center = 2

required float h = 3

required float w = 4

message AnnotationOverlayCalculatorOptions

optional int32 canvas_width_px = 2

optional int32 canvas_height_px = 3

optional Color canvas_color = 4

optional bool flip_text_vertically = 5

optional bool gpu_uses_top_left_origin = 6

optional float gpu_scale_factor = 7

message AssociationCalculatorOptions

optional float min_similarity_threshold = 1

message AudioDecoderOptions

repeated AudioStreamOptions audio_stream = 1

optional double start_time = 2

optional double end_time = 3

message AudioStreamOptions

optional int64 stream_index = 1

optional bool allow_missing = 2

optional bool ignore_decode_failures = 3

optional bool output_regressing_timestamps = 4

optional bool correct_pts_for_rollover = 5

message BilateralFilterCalculatorOptions

optional float sigma_color = 1

optional float sigma_space = 2

message BinaryFeatureDescriptor

optional bytes data = 1

message BinaryTrackingData

optional bytes data = 1

message BoundingBox

optional int32 left_x = 1

optional int32 upper_y = 2

optional int32 right_x = 3

optional int32 lower_y = 4

message BoxDetectorCalculatorOptions

optional BoxDetectorOptions detector_options = 1

repeated string index_proto_filename = 2

message BoxDetectorIndex

repeated BoxDetectorIndex.BoxEntry box_entry = 1

message BoxDetectorIndex.BoxEntry

repeated BoxEntry.FrameEntry frame_entry = 1

message BoxDetectorIndex.BoxEntry.FrameEntry

optional TimedBoxProto box = 1

repeated float keypoints = 2

repeated BinaryFeatureDescriptor descriptors = 3

message BoxDetectorOptions

optional BoxDetectorOptions.IndexType index_type = 1

optional int32 detect_every_n_frame = 2

optional bool detect_out_of_fov = 4

optional BoxDetectorOptions.ImageQuerySettings image_query_settings = 3

optional int32 descriptor_dims = 5

optional int32 min_num_correspondence = 6

optional float ransac_reprojection_threshold = 7

optional float max_match_distance = 8

optional float max_perspective_factor = 9

message BoxDetectorOptions.ImageQuerySettings

optional int32 pyramid_bottom_size = 1