package yggdrasil_decision_forests.model.proto

Get desktop application:
View/edit binary Protocol Buffers messages

Contains the same information as a model::AbstractModel (without the data_spec field).

optional string name = 1
Name of the model. Should match one of the registered models in the :model_library.
optional Task task = 2
Task solved by the model e.g. classification, regression.
optional int32 label_col_idx = 3
Index of the label column in the dataspec.
optional dataset.proto.LinkedWeightDefinition weights = 4
Training example weights.
repeated int32 input_features = 5
List of indices (in the dataspec) of the model input features.
optional int32 ranking_group_col_idx = 6
Index of the "grouping" attribute in the dataspec for ranking problems e.g. the query in a <query,document> ranking problem.
map<string, VariableImportanceSet> precomputed_variable_importances = 7
Pre-computed variable importances (VI). The VIs of the model are composed of the pre-computed VIs (this field) and the "model specific VIs" (i.e. variable importance computed on the fly based on the models structure).
optional bool classification_outputs_probabilities = 8
If true, the output of a task=CLASSIFICATION model is a probability and can be used accordingly (e.g. averaged, clamped to [0,1]). If false, the output of the task=CLASSIFICATION model might not be a probability.
optional int32 uplift_treatment_col_idx = 9
Index of the "treatment" attribute in the dataspec for uplift problems.
optional Metadata metadata = 10
optional HyperparametersOptimizerLogs hyperparameter_optimizer_logs = 11
Logs of the automated hyper-parameter tuning of the model.
optional FeatureSelectionLogs feature_selection_logs = 13
Logs of the automated feature selection of the model.
optional bool is_pure_model = 12
Indicate if a model is pure for serving i.e. the model was tripped of all information not required for serving.

Specification of the computing resources used to perform an action (e.g. train a model, run a cross-validation, generate predictions). The deployment configuration does not impact the results (e.g. learned model). If not specified, more consumer will assume local computation with multiple threads.

Next ID: 9

Used in: distributed_gradient_boosted_trees.proto.WorkerWelcome, generic_worker.proto.Request.TrainModel, hyperparameters_optimizer_v2.proto.HyperParametersOptimizerLearnerTrainingConfig, multitasker.proto.MultitaskerTrainingConfig

optional string cache_path = 1
Path to temporary directory available to the training algorithm. Currently cache_path is only used (and required) by the distributed algorithms or if "try_resume_training=True" (for the snapshots). In case of distributed training, the "cache_path" should be available by the manager and the workers (unless specified otherwise) -- so local machine/memory partition won't work.
optional int32 num_threads = 2
Number of threads.
optional bool try_resume_training = 6
If true, try to resume an interrupted training using snapshots stored in the "cache_path". Not supported by all learning algorithms. Resuming training after changing the hyper-parameters might lead to failure when training is resumed.
optional int64 resume_training_snapshot_interval_seconds = 7
Indicative number of seconds in between snapshots when "try_resume_training=True". Might be ignored by some algorithms.
optional int32 num_io_threads = 8
Number of threads to use for IO operations e.g. reading a dataset from disk. Increasing this value can speed-up IO operations when IO operations are either latency or cpu bounded.
optional int32 max_kept_snapshots = 9
Maximum number of snapshots to keep.
optional bool use_gpu = 10
Use GPU for algorithms that supports it if a GPU is available and if YDF is compiled with GPU support.
oneof execution
Computation distribution engine.
- DeploymentConfig.Local local = 3
  Local execution.
- distribute.proto.Config distribute = 5
  Distribution using the Distribute interface. Note that the selected distribution strategy implementation (selected in "distribute") needs to be linked with the binary if you are using the C++ API.

Used in: DeploymentConfig

(message has no fields)

Used in: AbstractModel

repeated FeatureSelectionLogs.Iteration iterations = 1
optional int32 best_iteration_idx = 2

Logs of a feature selection algorithm.

Used in: FeatureSelectionLogs

optional float score = 1
repeated string features = 2
map<string, float> metrics = 3

Definition of the type, possible values and default values of the generic hyper parameters of a learner. Also contains some documentation (free text + links).

map<string, GenericHyperParameterSpecification.Value> fields = 1
Individual fields / hyper-parameters. Also contains the per-fields documentation.
optional GenericHyperParameterSpecification.LearnerDocumentation documentation = 2
Documentation for the entire learner.

Conditional existence of a parameter. A parameter exist iff. the other parameter "control_field" satisfy "constraint".

Used in: Value

optional string control_field = 1
Name of the control parameter.
oneof constraint
Constraint on the parent.
- Conditional.Categorical categorical = 2
  One of the following values.

Used in: Conditional

repeated string values = 1

Documentation about the entire learner.

Used in: GenericHyperParameterSpecification

optional string description = 1
Free text description of the learning algorithm.

Used in: GenericHyperParameterSpecification

oneof Type
- Value.Categorical categorical = 2
- Value.Integer integer = 3
- Value.Real real = 4
- Value.CategoricalList categorical_list = 6
optional Value.Documentation documentation = 5
optional Conditional conditional = 7
If set, this parameter exists conditionally on other parameter values.
optional Value.MutuallyExclusivityCondition mutual_exclusive = 8
If set, this parameter is mutually exclusive with other parameters.

Categorical hyper parameter i.e. the hyper parameter takes a values from a set of possible values.

Used in: Value

repeated string possible_values = 1
optional string default_value = 2

List of categorical values.

Used in: Value

(message has no fields)

Links to the documentation of the hyper-parameter.

Used in: Value

optional string proto_path = 1
Path to the proto relative to YDF root directory.
optional string proto_field = 2
Name of the proto field. If not specific, use "name" instead.
optional string description = 3
Free text description of the parameter.
optional bool deprecated = 8
When a field is deprecated.

Integer hyper parameter.

Used in: Value

optional int32 minimum = 1
optional int32 maximum = 2
optional int32 default_value = 3

Used in: Value

repeated string other_parameters = 1
List of parameters this parameter is mutually exclusive with. Any parameter in this list must have this parameter in its `other_parameters` list.
optional bool is_default = 2
True if this parameter is the default parameter of a list of mutually exclusive parameters.

Real hyper parameter.

Used in: Value

optional double minimum = 1
optional double maximum = 2
optional double default_value = 3

Generic hyper parameters of a learner. Learner hyper parameters are normally provided through the "TrainingConfig" proto extended by each learner. The "Generic hyper parameters" (the following message) is a parallel solution to specify the hyper parameters of a learner using a list of key-values. The "Generic hyper parameters" are designed for the interfacing with hyper-parameter optimization algorithms, while the "TrainingConfig" proto is designed for direct user input. For this reason, the generic hyper parameters are not guaranteed to be as expressive as the "TrainingConfig". However, the default values of the "Generic hyper parameters" are guaranteed to be equivalent to the default value of the training config.

Used in: generic_worker.proto.Request.TrainModel, HyperparametersOptimizerLogs, HyperparametersOptimizerLogs.Step, PredefinedHyperParameterTemplate

repeated GenericHyperParameters.Field fields = 1
optional int64 id = 2
Unique id of the parameters. Might be missing if the parameters are generated by a user, or by a AbstractOptimizer that does not require ids.

Used in: GenericHyperParameters

optional string name = 1
Hyper parameter name. Should match the "name" of the hyper parameter specification.
optional Value value = 2

Used in: Field, HyperParameterSpace.DiscreteCandidates

oneof Type
Hyper parameter value. Should match the type defined in the hyper parameter specification.
- string categorical = 2
- int32 integer = 3
- double real = 4
- Value.CategoricalList categorical_list = 5

Used in: Value

repeated string values = 1

Set of hyper-parameter-sets aka. hyper-parameter search space.

Used in: hyperparameters_optimizer_v2.proto.HyperParametersOptimizerLearnerTrainingConfig, HyperparametersOptimizerLogs

repeated HyperParameterSpace.Field fields = 1

Used in: Field

repeated GenericHyperParameters.Value possible_values = 1
repeated double weights = 2
If set, "weights" has the same number of elements as "possible_values". "weights[i]" is the weight of this specific value for the optimizer. Different optimizers can use this weight differently. Random optimizer: Weight of the field during random sampling. If not specified, all the hyper-parameter combinations have the same probability of sampling. It means that a possible value with conditional children will be more likely to be sampled.

Used in: HyperParameterSpace

optional string name = 1
Name of the hyper parameter. Should match one of the generic hyper parameter of the model (use "GetGenericHyperParameterSpecification" for the list of generic hyper parameters).
oneof Type
Definition of the candidate values.
- DiscreteCandidates discrete_candidates = 2
oneof MatchingParentValues
If this field has a parent field, then it is only activated if its parent's value is one of these.
- DiscreteCandidates parent_discrete_values = 3
repeated Field children = 4
List of child fields.

Used in: AbstractModel

repeated HyperparametersOptimizerLogs.Step steps = 1
Optimization steps ordered chronologically by evaluation_time.
optional HyperParameterSpace space = 2
Domain of search for the hyper-parameters.
optional string hyperparameter_optimizer_key = 3
Registered key for the hyperparameter optimizer.
optional GenericHyperParameters best_hyperparameters = 5
The selected hyperparameters and its score. Note: It is possible that the best hyperparameters are not part of the "steps".
optional float best_score = 4

Used in: HyperparametersOptimizerLogs

optional double evaluation_time = 1
Time, in seconds, relative to the start of the hyper-parameter tuning, of the consuption of the hyperparameters evaluation.
optional GenericHyperParameters hyperparameters = 2
Tested hyperparameters.
optional float score = 3
Score (the higher, the better) of the hyperparameters. A NaN value indicates that the hyperparameters are unfeasible.

"Capabilities" of a learner. Describe the capabilities/constraints/properties of a learner (all called "capabilities"). Capabilities are non-restrictive i.e. enabling a capability cannot restrict the domain of use of a learner/model (i.e. use "support_tpu" instead of "require_tpu"). Using a learner with non-available capabilities raises an error.

optional bool support_max_training_duration = 1
Does the learner support the "maximum_training_duration_seconds" parameter in the TrainingConfig.
optional bool resume_training = 2
The learner can resume training of the model from the "cache_path" given in the deployment configuration.
optional bool support_validation_dataset = 3
If true, the algorithm uses a validation dataset for training (e.g. for early stopping) and support for the validation dataset to be passed to the training method (with the "valid_dataset" or "typed_valid_path" argument). If the learning algorithm has the "use_validation_dataset" capability and no validation dataset is given to the training function, the learning algorithm will extract a validation dataset from the training dataset.
optional bool support_partial_cache_dataset_format = 4
If true, the algorithm supports training datasets in the "partial cache dataset" format.
optional bool support_max_model_size_in_memory = 5
If true, the algorithm supports training with a maximum model size (maximum_model_size_in_memory_in_bytes).
optional bool support_monotonic_constraints = 6
If true, the algorithm supports monotonic constraints over numerical features.
optional bool require_label = 7
If true, the learner requires a label. If false, the learner does not require a label.
optional bool support_custom_loss = 8
If true, the learner supports custom losses.

Information about the model.

Used in: AbstractModel, TrainingConfig

optional string owner = 1
Owner of the model. Default to the user who ran the training code if available.
optional int64 created_date = 2
Unix Timestamp of the model training. Expressed in seconds.
optional uint64 uid = 3
Unique identifier of the model.
optional string framework = 4
Framework used to create the model.
repeated Metadata.CustomField custom_fields = 5

Used in: Metadata

optional string key = 1
optional bytes value = 2

Monotonic constraints between model's output and numerical input features.

Used in: PerColumn, TrainingConfig

optional string feature = 1
Regular expressions over the input features.
optional MonotonicConstraint.Direction direction = 2

Used in: MonotonicConstraint

INCREASING = 0
Ensure the model output is monotonic increasing (non-strict) with the feature.
DECREASING = 1
Ensure the model output is monotonic decreasing (non-strict) with the feature.

Used in: TrainingConfigLinking

optional MonotonicConstraint monotonic_constraint = 1
If set, the attribute has a monotonic constraint. Note: monotonic_constraint.feature might not be set.

Returns a list of hyper-parameter sets that outperforms the default hyper-parameters (either generally or in specific scenarios). Like default hyper-parameters, existing pre-defined hyper-parameters cannot change.

optional string name = 1
Name of the template. Should be unique for a given learning algorithm.
optional int32 version = 2
Version of the template.
optional string description = 3
Free text describing how this template was created.
optional GenericHyperParameters parameters = 4
Effective hyper-parameters.

Generic prediction (prediction over a single example). Those are usually the output of a ML model. Optionally, it may contains the ground truth (e.g. the label value). When the ground truth is present, such a "Prediction" proto can be used for evaluation (see "metric.h").

Used in: metric.proto.EvaluationResults, utils.model_analysis.proto.FeatureVariationItem.Bin, utils.model_analysis.proto.PredictionAnalysisResult

oneof type
- Prediction.Classification classification = 1
- Prediction.Regression regression = 2
- Prediction.Ranking ranking = 5
- Prediction.Uplift uplift = 6
- Prediction.AnomalyDetection anomaly_detection = 7
optional float weight = 3
optional string example_key = 4
Identifier about the example.

Used in: Prediction

optional float value = 1
Anomaly score between 0 (normal) and 1 (anomaly).

Used in: Prediction

optional int32 value = 1
Predicted class as indexed in the dataspec.
optional utils.proto.IntegerDistributionFloat distribution = 2
Predicted distribution over the possible classes. If specified, the following relation holds: "value == argmax_i(distribution[i])".
optional int32 ground_truth = 3

Used in: Prediction

optional float relevance = 1
Predicted relevance (the higher, the most likely to be selected).
optional float ground_truth_relevance = 2
optional int32 deprecated_group = 3
Group of the predictions. Predictions with a same group are competing.
optional uint64 group_id = 4
Group of the predictions. Can be a categorical or a hash value.

Used in: Prediction

optional float value = 1
optional float ground_truth = 2

Used in: Prediction

repeated float treatment_effect = 1
Predicted treatment effect. treatment_effect[i] is the effect of the "i+1"-th treatment (categorical value i+2) compared to the control group (0-th treatment; categorical value = 1). The treatment out-of-vocabulary item (value = 0) is not taken into account.
optional int32 treatment = 2
Applied treatment. The control group is treatment = 1. Other treatments are >1.
oneof outcome_type
Outcome (with or without treatment).
- int32 outcome_categorical = 3
- float outcome_numerical = 4

Proto used to serialize / deserialize the model to / from string. See "SerializeModel" and "DeserializeModel". This message does not contains the entire model data.

optional AbstractModel abstract_model = 1

Modeling task.

Used in: metric.proto.EvaluationOptions, metric.proto.EvaluationResults, AbstractModel, TrainingConfig, utils.model_analysis.proto.PredictionAnalysisResult, utils.model_analysis.proto.StandaloneAnalysisResult

UNDEFINED = 0
CLASSIFICATION = 1
REGRESSION = 2
RANKING = 3
In case of ranking, the label is expected to be between 0 and 4, and to have the NDCG semantic: 0: Completely unrelated. 4: Perfect match.
CATEGORICAL_UPLIFT = 4
Predicts the incremental impact of a treatment on a categorical outcome. See https://en.wikipedia.org/wiki/Uplift_modelling.
NUMERICAL_UPLIFT = 5
Predicts the incremental impact of a treatment on a numerical outcome. See https://en.wikipedia.org/wiki/Uplift_modelling.
ANOMALY_DETECTION = 6
Predicts if an instance is similar to the majority of the training data or anomalous (a.k.a. an outlier). An anomaly detection prediction is a value between 0 and 1, where 0 indicates the possible most normal instance and 1 indicates the most possible anomalous instance.

Training configuration. Contains all the configuration for the training of a model e.g. label, input features, hyper-parameters.

Next ID: 13

Used in: example.proto.Request, distributed_gradient_boosted_trees.proto.WorkerWelcome, generic_worker.proto.Request.TrainModel, hyperparameters_optimizer_v2.proto.HyperParametersOptimizerLearnerTrainingConfig, multitasker.proto.MultitaskerTrainingConfig, multitasker.proto.SubTask

optional string learner = 1
Identifier of the learner e.g. "RANDOM_FOREST". The learner should be registered i.e. injected as a dependency to the binary. The list of available learners is available with "AllRegisteredModels()" in "model_library.h".
repeated string features = 2
List of regular expressions over the dataset columns defining the input features of the model. If empty, all the columns (with the exception of the label and cv_group) will be added as input features.
optional string label = 3
Label column.
optional string cv_group = 4
Name of the column used to split the dataset for in-training cross-validation i.e. all the records with the same "cv_group" value are in the same cross-validation fold. If not specified, examples are randomly assigned to train and test. This field is ignored by learner that do not run in-training cross-validation.
optional Task task = 5
Task / problem solved by the model.
optional dataset.proto.WeightDefinition weight_definition = 6
Weighting of the training examples. If not specified, the weight is assumed uniform.
optional int64 random_seed = 7
Random seed for the training of the model. Learners are expected to be deterministic by the random seed.
optional string ranking_group = 8
Column identifying the groups in a ranking task. For example, in a document/query ranking problem, the "ranking_group" will be the query. The ranking column can be either a HASH or a CATEGORICAL. HASH is recommended. If CATEGORICAL, ensure dictionary is not pruned (i.e. minimum number of observations = 0 and maximum numbers of items = -1 => infinity).
optional double maximum_training_duration_seconds = 9
Maximum training duration of the training expressed in seconds. If the learner does not support constrained the training time, the training will fails immediately. Each learning algorithm is free to use this parameter as it see fit. Enabling maximum training duration makes the model training non-deterministic.
optional int64 maximum_model_size_in_memory_in_bytes = 11
Limits the trained model by memory usage. Different algorithms can enforce this limit differently. Serialized or compiled models are generally much smaller. This limit can be fussy: The final model can be slightly larger.
optional string uplift_treatment = 12
Categorical column identifying the treatment group in an uplift task. For example, whether a patient received a treatment in a study about the impact of a medication. Only binary treatments are currently supported.
optional Metadata metadata = 13
Metadata of the model. Non specified fields are automatically set. For example, if "metadata.date" is not set, it will be automatically set to the training date.
optional bool pure_serving_model = 14
Clear the model from any information that is not required for model serving. This includes debugging, model interpretation and other meta-data. The size of the serialized model can be reduced significatively (50% model size reduction is common). This parameter has no impact on the quality, serving speed or RAM usage of model serving.
repeated MonotonicConstraint monotonic_constraints = 15
Set of monotonic constraints between the model's input features and output.

Resolution column string names into column indices. The column indies are defined in a given dataspec e.g. If dataspec.columns[5].name = "toto", then the column idx of "toto" is 5.

Used in: distributed_gradient_boosted_trees.proto.WorkerWelcome

repeated int32 features = 1
Next ID: 10 Input features of the models.
repeated int32 numerical_features = 9
Features of type NUMERICAL.
optional int32 label = 2
Label column.
optional int32 num_label_classes = 3
Number categories of label (used for classification only).
optional int32 cv_group = 4
Index of the column matching "cv_group" in the "TrainingConfig".
optional dataset.proto.LinkedWeightDefinition weight_definition = 7
optional int32 ranking_group = 8
Index of the column matching "ranking_group" in the "TrainingConfig".
optional int32 uplift_treatment = 12
Index of the column matching "uplift_treatment" in the "TrainingConfig".
repeated PerColumn per_columns = 13
Data for specific dataset columns. This field is either empty, or contains exactly one value for each column in the dataset.

Description of the importance of a given attribute. The semantic of "importance" is variable.

Next ID: 3

Used in: VariableImportanceSet, random_forest.proto.Header

optional int32 attribute_idx = 1
optional double importance = 2

Next ID: 2

Used in: AbstractModel, utils.model_analysis.proto.AnalysisResult

repeated VariableImportance variable_importances = 1

package yggdrasil_decision_forests.model.proto

message AbstractModel

optional string name = 1

optional Task task = 2

optional int32 label_col_idx = 3

optional dataset.proto.LinkedWeightDefinition weights = 4

repeated int32 input_features = 5

optional int32 ranking_group_col_idx = 6

map<string, VariableImportanceSet> precomputed_variable_importances = 7

optional bool classification_outputs_probabilities = 8

optional int32 uplift_treatment_col_idx = 9

optional Metadata metadata = 10

optional HyperparametersOptimizerLogs hyperparameter_optimizer_logs = 11

optional FeatureSelectionLogs feature_selection_logs = 13

optional bool is_pure_model = 12

message DeploymentConfig

optional string cache_path = 1

optional int32 num_threads = 2

optional bool try_resume_training = 6

optional int64 resume_training_snapshot_interval_seconds = 7

optional int32 num_io_threads = 8

optional int32 max_kept_snapshots = 9

optional bool use_gpu = 10

oneof execution

DeploymentConfig.Local local = 3

distribute.proto.Config distribute = 5

message DeploymentConfig.Local

message FeatureSelectionLogs

repeated FeatureSelectionLogs.Iteration iterations = 1

optional int32 best_iteration_idx = 2

message FeatureSelectionLogs.Iteration

optional float score = 1

repeated string features = 2

map<string, float> metrics = 3

message GenericHyperParameterSpecification

map<string, GenericHyperParameterSpecification.Value> fields = 1

optional GenericHyperParameterSpecification.LearnerDocumentation documentation = 2

message GenericHyperParameterSpecification.Conditional

optional string control_field = 1

oneof constraint

Conditional.Categorical categorical = 2

message GenericHyperParameterSpecification.Conditional.Categorical

repeated string values = 1

message GenericHyperParameterSpecification.LearnerDocumentation

optional string description = 1

message GenericHyperParameterSpecification.Value

oneof Type

Value.Categorical categorical = 2

Value.Integer integer = 3

Value.Real real = 4

Value.CategoricalList categorical_list = 6

optional Value.Documentation documentation = 5

optional Conditional conditional = 7

optional Value.MutuallyExclusivityCondition mutual_exclusive = 8

message GenericHyperParameterSpecification.Value.Categorical

repeated string possible_values = 1

optional string default_value = 2

message GenericHyperParameterSpecification.Value.CategoricalList

message GenericHyperParameterSpecification.Value.Documentation

optional string proto_path = 1

optional string proto_field = 2

optional string description = 3

optional bool deprecated = 8

message GenericHyperParameterSpecification.Value.Integer

optional int32 minimum = 1

optional int32 maximum = 2

optional int32 default_value = 3

message GenericHyperParameterSpecification.Value.MutuallyExclusivityCondition

repeated string other_parameters = 1

optional bool is_default = 2

message GenericHyperParameterSpecification.Value.Real

optional double minimum = 1

optional double maximum = 2

optional double default_value = 3

message GenericHyperParameters

repeated GenericHyperParameters.Field fields = 1

optional int64 id = 2

message GenericHyperParameters.Field

optional string name = 1

optional Value value = 2