Project alibaba/TorchEasyRec

optional uint32 thresholds = 1

optional float threshold = 1
probs threshold
optional uint32 top_k = 2
top k accuracy when num_class > 1

required float lr = 1
optional float rho = 2
optional float eps = 3
optional float weight_decay = 4

required float lr = 1
optional float weight_decay = 2
optional float initial_accumulator_value = 3
optional float eps = 4
optional bool fused = 5

required float lr = 1
optional float beta1 = 2
optional float beta2 = 3
optional float weight_decay = 4
optional float eps = 5
optional bool amsgrad = 6
optional bool fused = 7

required float lr = 1
optional float beta1 = 2
optional float beta2 = 3
optional float weight_decay = 4
optional float eps = 5
optional bool amsgrad = 6
optional bool fused = 7

Used in: CombineFeature, CustomFeature, ExprFeature, KvDotProduct, LookupFeature, MatchFeature, OverlapFeature, RawFeature

required uint32 num_channels = 2
number of embedding channels
optional float temperature = 3
temperature coefficient for softmax
optional float keep_prob = 4

Used in: MINDUserTower

optional uint32 max_k = 1
max number of high capsules Default: 5
required uint32 max_seq_len = 2
max behaviour sequence length
required uint32 high_dim = 3
high capsule embedding vector dimension
optional uint32 num_iters = 4
dynamic routing iterations, Default: 3
optional float routing_logits_scale = 5
routing logits scale Default: 20
optional float routing_logits_stddev = 6
routing logits initial stddev Default: 1
optional float squash_pow = 7
squash power Default: 1
optional bool const_caps_num = 8
whether to use constant capsule number, Default: false
optional string routing_init_method = 9
the initialization method for routing logits, Default: normal, available: zeros.

Used in: DBMTL

required string tower_name = 1
task name for the task tower
optional string label_name = 2
label for the task, default is label_fields by order
repeated MetricConfig metrics = 3
metrics for the task
repeated TrainMetricConfig train_metrics = 20
log train merics for task
repeated LossConfig losses = 4
loss for the task
optional uint32 num_class = 5
num_class for multi-class classification loss
optional MLP mlp = 6
task specific mlp
optional float weight = 7
training loss weights
repeated string relation_tower_names = 8
related tower names
optional MLP relation_mlp = 9
relation mlp
optional string sample_weight_name = 10
sample weight for the task
optional string task_space_indicator_label = 11
label name for indicating the sample space for the task tower
optional float in_task_space_weight = 12
the loss weight for sample in the task space
optional float out_task_space_weight = 13
the loss weight for sample out the task space
optional float pareto_min_loss_weight = 14
use pareto front minimal loss weight, ge 0 and lt 1

Used in: LossConfig

optional float label_smoothing = 1

Used in: LossConfig

optional float gamma = 1
optional float alpha = 2

required string feature_name = 1
feature name.
repeated string expression = 2
feature input, e.g. item:item_id
optional string embedding_name = 3
embedding name, feature with same embedding name will share embedding
required uint32 embedding_dim = 4
embedding dimension
optional uint64 hash_bucket_size = 5
number of hash size
optional uint64 num_buckets = 6
number of id enumerators
repeated string vocab_list = 7
id vocabulary list
map<string, uint64> vocab_dict = 8
id vocabulary dict
optional uint32 value_dim = 9
id value dimensions, default = 0, when use in seq, default = 1 if value_dim = 0, it supports id with multi-value
optional string pooling = 10
embedding pooling type, available is {sum | mean}
optional string default_value = 11
fg default value, default value before bucktize
optional string separator = 12
fg multi-value separator
optional string init_fn = 14
embedding init function, e.g. "nn.init.uniform_,a=-0.01,b=0.01"
optional bool use_mask = 15
mask value in training progress
optional ZeroCollisionHash zch = 16
zero collision hash
optional string vocab_file = 17
id vocabulary file path
optional string asset_dir = 18
vocab file relative directory
optional DynamicEmbedding dynamicemb = 19
dynamic embedding
optional string fg_encoded_default_value = 30
default value when fg_mode = FG_NONE, when use pai-fg, you do not need to set the param. when use own fg and data contain null value, you can set the param for fill null
optional uint64 default_bucketize_value = 31
out-of-vocab(OOV) id bucketize value when use vocab_list or vocab_dict when use default_bucketize_value, we will not add additional bucketize_value of `default_value`=0, bucketize_value of <OOV>=1 into vocab_list or vocab_dict
optional string fg_value_type = 32
value_type after fg before bucketize, you can specify it for better performance. e.g. fg_value_type = int64 when use num_buckets
optional bool trainable = 33
embedding param trainable or not
optional bool stub_type = 34
only used as fg dag intermediate result or not
optional string data_type = 35
embedding data type
optional ParameterConstraints embedding_constraints = 50
embedding param constraints
optional uint32 sequence_length = 101
max sequence length, only take effect when use it as sequence
optional string sequence_delim = 102
sequence delimiter, only take effect when use it as sequence
repeated string sequence_fields = 103
specify sequence type fields in inputs. default is item side inputs.

Used in: xDeepFM

repeated uint32 cin_layer_size = 1
every layer size

required string feature_name = 1
feature name, e.g. tag_feat
required string expression = 2
feature input, e.g. user:tag
optional string embedding_name = 3
embedding name, feature with same embedding name will share embedding
optional uint32 embedding_dim = 4
embedding dimension
map<string, float> value_map = 6
value map for mapping input string values to float values
repeated float boundaries = 7
boundaries for bucktize numeric combine value
optional uint64 num_buckets = 8
number of id enumerators for sparse combine value
optional string pooling = 11
embedding pooling type, available is {sum | mean}. Controls embedding bag aggregation. NOTE: distinct from 'combiner' which controls FG-level multi-value aggregation.
optional string default_value = 12
fg default value, default value before bucktize
optional string separator = 13
fg multi-value separator
optional string normalizer = 14
fg normalizer, e.g. method=log10,threshold=1e-10,default=-10 method=zscore,mean=0.0,standard_deviation=10.0 method=minmax,min=2.1,max=2.2 method=expression,expr=sign(x)
optional string init_fn = 15
embedding init function, e.g. "nn.init.uniform_,a=-0.01,b=0.01"
optional bool use_mask = 16
mask value in training progress
optional string combiner = 22
combine value combiner type, available is {sum | mean | min | max} Controls FG-level multi-value aggregation before bucketization.
optional string fg_encoded_default_value = 30
default value when fg_mode = FG_NONE, when use pai-fg, you do not need to set the param. when use own fg and data contain null value, you can set the param for fill null
optional bool trainable = 33
embedding param trainable or not
optional bool stub_type = 34
only used as fg dag intermediate result or not
optional string data_type = 35
embedding data type
oneof dense_emb
- AutoDisEmbedding autodis = 40
  autodis embedding
- MLPEmbedding mlp = 41
  mlp embedding
optional ParameterConstraints embedding_constraints = 50
embedding param constraints
optional uint32 sequence_length = 101
max sequence length, only take effect when use it as sequence
optional string sequence_delim = 102
sequence delimiter, only take effect when use it as sequence
repeated string sequence_fields = 103
specify sequence type fields in inputs. default is item side inputs.

required string feature_name = 1
feature name, e.g. os_and_cate
repeated string expression = 2
feature input, e.g. [user:os, item:cate]
optional string embedding_name = 3
embedding name, feature with same embedding name will share embedding
optional uint32 embedding_dim = 4
embedding dimension
optional uint64 hash_bucket_size = 5
number of hash size
repeated string vocab_list = 7
id vocabulary list
map<string, uint64> vocab_dict = 8
id vocabulary dict
optional uint32 value_dim = 9
id value dimensions, if value_dim = 0, it supports id with multi-value
optional string pooling = 10
embedding pooling type, available is {sum | mean}
optional string default_value = 11
fg default value
optional string separator = 12
fg multi-value separator
optional string init_fn = 13
embedding init function, e.g. "nn.init.uniform_,a=-0.01,b=0.01"
optional bool use_mask = 14
mask value in training progress
optional ZeroCollisionHash zch = 15
zero collision hash
optional string vocab_file = 16
id vocabulary file path
optional string asset_dir = 17
vocab file relative directory
optional DynamicEmbedding dynamicemb = 18
dynamic embedding
optional string fg_encoded_default_value = 30
default value when fg_mode = FG_NONE, when use pai-fg, you do not need to set the param. when use own fg and data contain null value, you can set the param for fill null
optional uint64 default_bucketize_value = 31
out-of-vocab(OOV) id bucketize value when use vocab_list or vocab_dict when use default_bucketize_value, we will not add additional bucketize_value of `default_value`=0, bucketize_value of <OOV>=1 into vocab_list or vocab_dict
optional bool trainable = 33
embedding param trainable or not
optional bool stub_type = 34
only used as fg dag intermediate result or not
optional string data_type = 35
embedding data type
optional ParameterConstraints embedding_constraints = 50
embedding param constraints
optional uint32 sequence_length = 101
max sequence length, only take effect when use it as sequence
optional string sequence_delim = 102
sequence delimiter, only take effect when use it as sequence
repeated string sequence_fields = 103
specify sequence type fields in inputs. default is item side inputs.

Used in: DenseOptimizer, PartOptimizer, SparseOptimizer

(message has no fields)

Used in: DenseOptimizer, PartOptimizer, SparseOptimizer

optional uint32 T_max = 1
total number of steps or epochs for cosine annealing
optional float min_learning_rate = 2
minimum learning rate
optional float warmup_learning_rate = 3
warmup start learning rate
optional uint32 warmup_size = 4
warmup steps or epochs
optional bool by_epoch = 5
schedule by epoch or by step.

Used in: DenseOptimizer, PartOptimizer, SparseOptimizer

optional uint32 T_0 = 1
number of steps or epochs for the first cosine annealing period
optional uint32 T_mult = 2
factor to grow period length after each restart (1 = fixed period)
optional float min_learning_rate = 3
minimum learning rate
optional float warmup_learning_rate = 4
warmup start learning rate
optional uint32 warmup_size = 5
warmup steps or epochs
optional bool by_epoch = 6
schedule by epoch or by step.

Used in: DCNV1

optional uint32 cross_num = 1
number of cross layers

Used in: DCNV2

optional uint32 cross_num = 1
number of cross layers
optional uint32 low_rank = 2
Matrix decomposition with minimal rank.

required string feature_name = 1
feature name.
required string operator_name = 2
custom operator name.
required string operator_lib_file = 3
custom operator lib file name.
optional google.protobuf.Struct operator_params = 4
operator custom params.
optional bool is_op_thread_safe = 5
custom operator is thread safe or not.
repeated string expression = 6
feature input, e.g. user:os
optional string embedding_name = 7
embedding name, feature with same embedding name will share embedding
optional uint32 embedding_dim = 8
embedding dimension
repeated float boundaries = 9
boundaries for bucktize numeric value
optional uint64 hash_bucket_size = 10
number of hash size for sparse value
optional uint64 num_buckets = 11
number of id enumerators for sparse value
repeated string vocab_list = 12
id vocabulary list for sparse value
map<string, uint64> vocab_dict = 13
id vocabulary dict
optional string pooling = 14
embedding pooling type, available is {sum | mean}
optional string default_value = 15
fg default value
optional string separator = 16
fg multi-value separator
optional string normalizer = 17
fg normalizer, e.g. method=log10,threshold=1e-10,default=-10 method=zscore,mean=0.0,standard_deviation=10.0 method=minmax,min=2.1,max=2.2 method=expression,expr=sign(x)
optional string init_fn = 18
embedding init function, e.g. "nn.init.uniform_,a=-0.01,b=0.01"
optional uint32 value_dim = 19
value dimensions
optional bool use_mask = 20
mask value in training progress
optional ZeroCollisionHash zch = 21
zero collision hash
optional string vocab_file = 22
id vocabulary file path
optional string asset_dir = 23
vocab file relative directory
optional DynamicEmbedding dynamicemb = 24
dynamic embedding
optional string fg_encoded_default_value = 30
default value when fg_mode = FG_NONE, when use pai-fg, you do not need to set the param. when use own fg and data contain null value, you can set the param for fill null
optional uint64 default_bucketize_value = 31
out-of-vocab(OOV) id bucketize value when use vocab_list or vocab_dict when use default_bucketize_value, we will not add additional bucketize_value of `default_value`=0, bucketize_value of <OOV>=1 into vocab_list or vocab_dict
optional bool trainable = 33
embedding param trainable or not
optional bool stub_type = 34
only used as fg dag intermediate result or not
optional string data_type = 35
embedding data type
oneof dense_emb
- AutoDisEmbedding autodis = 40
  autodis embedding
- MLPEmbedding mlp = 41
  mlp embedding
optional ParameterConstraints embedding_constraints = 50
embedding param constraints
optional uint32 sequence_length = 101
max sequence length, only take effect when use it as sequence
optional string sequence_delim = 102
sequence delimiter, only take effect when use it as sequence
repeated string sequence_fields = 103
specify sequence type fields in inputs. default is item side inputs.

Used in: ModelConfig

required DATTower user_tower = 1
required DATTower item_tower = 2
required int32 output_dim = 3
user and item tower output dimension
optional Similarity similarity = 4
similarity method
optional float temperature = 5
similarity scaling factor
optional bool in_batch_negative = 6
use in batch items as negative items.
required float amm_i_weight = 7
loss weight for amm_i
required float amm_u_weight = 8
loss weight for amm_u

Used in: DAT

required string input = 1
input feature group name
required string augment_input = 2
augmented feature group name
required MLP mlp = 3
mlp config

Used in: ModelConfig

optional MaskNetModule mask_net = 6
shared bottom MaskNet module
optional MLP bottom_mlp = 1
shared bottom mlp layer
optional MLP expert_mlp = 2
mmoe expert mlp layer definition
optional MLP gate_mlp = 3
mmoe gate module definition
optional uint32 num_expert = 4
number of mmoe experts
repeated BayesTaskTower task_towers = 5
bayes task tower

Used in: ModelConfig

optional MLP bottom_mlp = 1
shared bottom mlp layer
optional MLP expert_mlp = 2
mmoe expert mlp layer definition
optional MLP gate_mlp = 3
mmoe gate module definition
optional uint32 num_expert = 4
number of mmoe experts
repeated InterventionTaskTower task_towers = 5
task tower

Used in: ModelConfig

required Cross cross = 1
required MLP deep = 2
required MLP final = 3

Used in: ModelConfig

optional MLP backbone = 1
required CrossV2 cross = 2
optional MLP deep = 3
required MLP final = 4

Used in: SeqEncoderConfig

optional string name = 1
seq encoder name
required string input = 2
sequence feature name
required MLP attn_mlp = 3
mlp config for target attention score
optional int32 max_seq_length = 6
maximum sequence length

Used in: MultiTowerDIN

required string input = 1
input feature group name
required MLP attn_mlp = 2
mlp config for target attention score

Used in: ModelConfig

optional MLP dense_mlp = 1
if has dense feature group,must has dense_mlp
optional bool arch_with_sparse = 2
whether to include sparse features after interaction
required MLP final = 3

Used in: ModelConfig

required Tower user_tower = 1
required Tower item_tower = 2
required int32 output_dim = 3
user and item tower output dimension
optional Similarity similarity = 4
similarity method
optional float temperature = 5
similarity scaling factor
optional bool in_batch_negative = 6
use in batch items as negative items.

Used in: ModelConfig

required Tower user_tower = 1
required Tower item_tower = 2
required int32 output_dim = 3
user and item tower output dimension
optional Similarity similarity = 4
similarity method
optional float temperature = 5
similarity scaling factor
optional bool in_batch_negative = 6
use in batch items as negative items.

Used in: EasyRecConfig

optional uint32 batch_size = 1
mini batch size to use for training and evaluation.
required DatasetType dataset_type = 2
dataset type.
optional bool fg_encoded = 3
[deprecated] please use fg_mode. input data is feature generate encoded or not. if fg_encoded = true, you should do fg offline first, and set fg_encoded_multival_sep for split multi-val feature
optional string fg_encoded_multival_sep = 4
separator for multi-val feature in fg encoded input data
repeated string label_fields = 5
labels
optional uint32 num_workers = 6
number of workers for parallel processing raw data
optional bool pin_memory = 7
pin memory for fast cudaMemCopy
repeated Field input_fields = 8
the input fields must be the same number and in the same order as data in csv files
optional string delimiter = 9
delimiter of column features, only used for CsvDataset
optional bool with_header = 10
for csv files, with header or not.
optional uint32 eval_batch_size = 11
mini batch size to use for and evaluation.
optional bool drop_remainder = 12
drop last batch less than batch_size
optional uint32 fg_threads = 13
fg threads for each worker, if fg_threads = 0, will disable fg dag handler, use python run.
optional bool is_orderby_partition = 14
when use OdpsDataset, read data orderby table partitions or not.
optional string odps_data_quota_name = 15
maxcompute storage api & tunnel quota name
optional float sample_mask_prob = 16
mask probability for samples in training progress
optional float negative_sample_mask_prob = 17
mask probability for sampled negatives in training progress
optional bool force_base_data_group = 18
force padding data into same data group with same batch_size
repeated string sample_weight_fields = 19
sample weights
optional FgMode fg_mode = 20
fg run mode.
optional bool shuffle = 22
whether to shuffle data
optional uint32 shuffle_buffer_size = 23
shufffle buffer for better performance, even shuffle buffer is set, it is suggested to do full data shuffle before training especially when the performance of models is not good.
optional string odps_data_compression = 24
maxcompute storage api data compression type, LZ4_FRAME | ZSTD | UNCOMPRESSED
optional string sample_cost_field = 25
sample cost field name
optional uint64 batch_cost_size = 26
batch cost limit size
optional string input_fields_str = 27
simplified input fields string format: input_name1:input_type1;input_name2:input_type2; type names follow ODPS conventions with aliases: BIGINT->INT64, INT->INT32
oneof sampler
negative sampler
- NegativeSampler negative_sampler = 101
- NegativeSamplerV2 negative_sampler_v2 = 102
- HardNegativeSampler hard_negative_sampler = 103
- HardNegativeSamplerV2 hard_negative_sampler_v2 = 104
- TDMSampler tdm_sampler = 105

Used in: DataConfig

OdpsDataset = 1
ParquetDataset = 2
CsvDataset = 3
OdpsDatasetV1 = 4
KafkaDataset = 5

Used in: ModelConfig

required MLP deep = 1
optional MLP final = 2
optional uint32 wide_embedding_dim = 3
optional string wide_init_fn = 4
wide embedding init function, e.g. "nn.init.uniform_,a=-0.01,b=0.01"

Used in: TrainConfig

optional uint32 dump_interval_steps = 1
MC/ZCH features are not supported; use dynamicemb for delta dump. dump touched ids and their latest embedding every N training steps. Larger intervals retain a longer id window in memory; auto compaction reduces per-batch tensor buildup but unique ids still scale with the interval.
optional string output_dir = 2
output directory. default is ${model_dir}/delta_embedding_dump
optional string file_prefix = 3
parquet file prefix

Used in: TrainConfig

oneof optimizer
- SGDOptimizer sgd_optimizer = 1
- AdagradOptimizer adagrad_optimizer = 2
- AdamOptimizer adam_optimizer = 3
- AdamWOptimizer adamw_optimizer = 4
- AdadeltaOptimizer adadelta_optimizer = 5
- RMSpropOptimizer rmsprop_optimizer = 6
oneof learning_rate
- ConstantLR constant_learning_rate = 101
- ExponentialDecayLR exponential_decay_learning_rate = 102
- ManualStepLR manual_step_learning_rate = 103
- CosineAnnealingLR cosine_annealing_learning_rate = 104
- CosineAnnealingWarmRestartsLR cosine_annealing_warm_restarts_learning_rate = 105
repeated PartOptimizer part_optimizers = 201

DistanceLFU: evict_score = access_cnt / pow((current_iter - last_access_iter), decay_exponent)

Used in: ZeroCollisionHash

optional float decay_exponent = 1
decay rate is access step

Used in: ModelConfig

required HSTU hstu = 1
hstu config
required FusionMTLTower fusion_mtl_tower = 2
multi task tower config
required uint32 max_seq_len = 3
max sequence length
optional uint32 item_embedding_hidden_dim = 4
item embedding mlp hidden dimension
optional bool enable_global_average_loss = 5
enables loss averaging computation globally across all ranks (total rank) instead of locally (local rank).
optional bool sequence_timestamp_is_ascending = 6
timestamp of sequence is ascending or descending
optional bool concat_contextual_features = 7
concat all contextual features on channel dim as one token

Used in: DynamicEmbedding

required uint64 threshold = 1
minimum frequency threshold for admission
optional DynamicEmbInitializerArgs initializer_args = 2
determine how to initialize the embedding if the key is not admitted.
optional uint64 counter_capacity = 3
kv counter capacity, if not set, use embedding max_capacity
optional uint64 counter_bucket_capacity = 4
kv counter capacity for each bucket.

Used in: DynamicEmbFrequencyAdmissionStrategy, DynamicEmbedding

optional string mode = 1
the mode of initialization. NORMAL | TRUNCATED_NORMAL | UNIFORM | CONSTANT
optional float mean = 2
the mean value for (truncated) normal distributions
optional float std_dev = 3
the standard deviation for (truncated) normal distributions. default is sqrt(1 / embedding_dim)
optional float lower = 4
the lower bound for uniform/truncated_normal distribution. default is -sqrt(1 / max_capacity)
optional float upper = 5
the upper bound for uniform/truncated_normal distribution. default is sqrt(1 / max_capacity)
optional float value = 6
the constant value for constant initialization.

Used in: BoolMaskFeature, ComboFeature, CustomFeature, IdFeature, LookupFeature, MatchFeature

optional DynamicEmbInitializerArgs initializer_args = 1
arguments for initializing dynamic embedding vector values. default is uniform distribution, and absolute values of upper and lower bound are sqrt(1 / embedding_dim).
optional DynamicEmbInitializerArgs eval_initializer_args = 2
the initializer args for evaluation mode. default is default is constant initialization with value 0.0.
optional string score_strategy = 4
strategy to set the score for each indices in forward and backward per table. TIMESTAMP | STEP | CUSTOMIZED | LFU | NO_EVICTION
required uint64 max_capacity = 5
max number of embedding rows
optional float cache_load_factor = 6
percentage of embedding rows caching on gpu
optional uint64 init_capacity_per_rank = 7
init number of capacity
optional string init_table = 8
init table path
optional uint64 bucket_capacity = 9
hash-table bucket capacity. default 128 (matches dynamicemb DEFAULT_BUCKET_CAPACITY). larger buckets trade probe cost for higher load factor.
oneof admission_strategy
- DynamicEmbFrequencyAdmissionStrategy frequency_admission_strategy = 100

required string train_input_path = 1
required string eval_input_path = 2
required string model_dir = 3
optional TrainConfig train_config = 4
optional EvalConfig eval_config = 5
optional ExportConfig export_config = 6
optional DataConfig data_config = 7
repeated FeatureConfig feature_configs = 8
optional ModelConfig model_config = 9

Used in: EasyRecConfig

optional uint32 num_steps = 1
number of steps to evaluate.
optional uint32 log_step_count_steps = 2
the frequency progress be logged during eval

Used in: DenseOptimizer, PartOptimizer, SparseOptimizer

optional uint32 decay_size = 1
decay steps or epochs
optional float decay_factor = 2
decay rate
optional bool staircase = 3
if true, decay the learning rate at discrete intervals
optional float warmup_learning_rate = 4
warmup start learning rate
optional uint32 warmup_size = 5
warmup steps or epochs
optional float min_learning_rate = 6
minimum learning rate
optional bool by_epoch = 7
schedule by epoch or by step.

Used in: EasyRecConfig

optional string exporter_type = 1
type of exporter [latest | best] when train_and_evaluation latest: regularly exports the serving graph and checkpoints best: export the best model according to best_exporter_metric
optional string best_exporter_metric = 2
the metric used to determine the best checkpoint
optional bool metric_larger_is_better = 3
metric value the bigger the best
optional string mixed_precision = 4
mixed precision mode for inference/export [BF16 | FP16 | ""]. The export-time AMP intent is taken verbatim from this field; if it disagrees with train_config.mixed_precision a warning is logged. When set, the dense sub-graph is wrapped in torch.autocast before torch.export so that AOT Inductor captures dtype-promoting casts as a wrap_with_autocast HOP.
optional bool cudnn_allow_tf32 = 5
whether to use torch.backends.cudnn.allow_tf32
optional bool cuda_matmul_allow_tf32 = 6
whether to use torch.backends.cuda.matmul.allow_tf32

required string feature_name = 1
feature name, e.g. kv_os_click_count
required string expression = 2
expression, e.g. sigmoid(pv/(1+click))
repeated string variables = 3
variables in expression, e,g. ["item:pv", "item:click"]
optional string embedding_name = 4
embedding name, feature with same embedding name will share embedding
optional uint32 embedding_dim = 5
embedding dimension
repeated float boundaries = 6
boundaries for bucktize numeric expr value
optional string separator = 7
fg multi-value separator
optional float fill_missing = 8
fill value when vector length mismatch, default is NaN.
optional string pooling = 10
embedding pooling type, available is {sum | mean}
optional string default_value = 11
fg default value
optional string init_fn = 12
embedding init function, e.g. "nn.init.uniform_,a=-0.01,b=0.01"
optional bool use_mask = 13
mask value in training progress
optional uint32 value_dim = 14
if value_dim = 0, it supports multi-value
optional string fg_encoded_default_value = 30
default value when fg_mode = FG_NONE, when use pai-fg, you do not need to set the param. when use own fg and data contain null value, you can set the param for fill null
optional bool trainable = 33
embedding param trainable or not
optional bool stub_type = 34
only used as fg dag intermediate result or not
optional string data_type = 35
embedding data type
oneof dense_emb
- AutoDisEmbedding autodis = 40
  autodis embedding
- MLPEmbedding mlp = 41
  mlp embedding
optional ParameterConstraints embedding_constraints = 50
embedding param constraints
optional uint32 sequence_length = 101
max sequence length, only take effect when use it as sequence
optional string sequence_delim = 102
sequence delimiter, only take effect when use it as sequence
repeated string sequence_fields = 103
specify sequence type fields in inputs. default is item side inputs.

Used in: PLE

required string network_name = 1
required uint32 expert_num_per_task = 2
number of experts per task
optional uint32 share_num = 3
number of experts for share
required MLP task_expert_net = 4
mlp network of experts per task
optional MLP share_expert_net = 5
mlp network of experts for share

Strictly-typed subset of faiss.Kmeans(D, K, **kwargs) knobs. Unset fields fall back to faiss's own defaults (so it is safe to leave partially set). ``gpu`` is intentionally omitted — the fit is CPU-only (SidRqkmeans refuses a visible CUDA device).

Used in: SidRqkmeans

optional uint32 niter = 1
optional uint32 nredo = 2
optional uint32 seed = 3
optional uint32 max_points_per_centroid = 4
optional uint32 min_points_per_centroid = 5
optional bool spherical = 6
optional bool verbose = 7

Used in: EasyRecConfig

oneof feature
- IdFeature id_feature = 1
- RawFeature raw_feature = 2
- ComboFeature combo_feature = 3
- LookupFeature lookup_feature = 4
- MatchFeature match_feature = 5
- SequenceFeature sequence_feature = 6
- ExprFeature expr_feature = 7
- OverlapFeature overlap_feature = 8
- TokenizeFeature tokenize_feature = 9
- CustomFeature custom_feature = 10
- KvDotProduct kv_dot_product = 11
- BoolMaskFeature bool_mask_feature = 12
- CombineFeature combine_feature = 13
- IdFeature sequence_id_feature = 101
- RawFeature sequence_raw_feature = 102
- ComboFeature sequence_combo_feature = 103
- LookupFeature sequence_lookup_feature = 104
- MatchFeature sequence_match_feature = 105
- ExprFeature sequence_expr_feature = 107
- OverlapFeature sequence_overlap_feature = 108
- TokenizeFeature sequence_tokenize_feature = 109
- CustomFeature sequence_custom_feature = 110
- KvDotProduct sequence_kv_dot_product = 111
- BoolMaskFeature sequence_bool_mask_feature = 112
- CombineFeature sequence_combine_feature = 113

Used in: ModelConfig

required string group_name = 1
repeated string feature_names = 2
required FeatureGroupType group_type = 3
repeated SeqGroupConfig sequence_groups = 4
repeated SeqEncoderConfig sequence_encoders = 5
optional string embedding_name_suffix = 6
Suffix appended to each feature's embedding_name so groups with different suffixes use independent embedding tables. Empty == disabled.

Used in: FeatureGroupConfig

DEEP = 0
WIDE = 1
SEQUENCE = 2
JAGGED_SEQUENCE = 3

Used in: DataConfig

FG_NONE = 1
input data is feature generate encoded, we do not do fg
FG_NORMAL = 2
input data is raw feature, we use python to run feature generate
FG_DAG = 3
input data is raw feature, we use fg_handler to run feature generate
FG_BUCKETIZE = 4
input data is after feature generate but before do bucketize, we do bucketize only

Used in: DataConfig

required string input_name = 1
optional FieldType input_type = 2
only need specify it when use CsvDataset and value dtype can not be inferred (all values in the column are null)

Used in: Field

INT32 = 0
INT64 = 1
STRING = 2
FLOAT = 3
DOUBLE = 4
ARRAY_INT32 = 5
ARRAY_INT64 = 6
ARRAY_STRING = 7
ARRAY_FLOAT = 8
ARRAY_DOUBLE = 9
ARRAY_ARRAY_INT32 = 10
ARRAY_ARRAY_INT64 = 11
ARRAY_ARRAY_STRING = 12
ARRAY_ARRAY_FLOAT = 13
ARRAY_ARRAY_DOUBLE = 14
MAP_STRING_INT32 = 15
MAP_STRING_INT64 = 16
MAP_STRING_STRING = 17
MAP_STRING_FLOAT = 18
MAP_STRING_DOUBLE = 19
MAP_INT64_INT32 = 20
MAP_INT64_INT64 = 21
MAP_INT64_STRING = 22
MAP_INT64_FLOAT = 23
MAP_INT64_DOUBLE = 24
MAP_INT32_INT32 = 25
MAP_INT32_INT64 = 26
MAP_INT32_STRING = 27
MAP_INT32_FLOAT = 28
MAP_INT32_DOUBLE = 29

Used in: SparseOptimizer

required float lr = 1
optional float rho = 2
optional float eps = 3
optional float weight_decay = 4
optional bool gradient_clipping = 5
optional float max_gradient = 6

Used in: SparseOptimizer

required float lr = 1
optional bool gradient_clipping = 2
optional float max_gradient = 3
optional float initial_accumulator_value = 4

Used in: SparseOptimizer

required float lr = 1
optional float beta1 = 2
optional float beta2 = 3
optional float weight_decay = 4
optional bool gradient_clipping = 5
optional float max_gradient = 6

Used in: SparseOptimizer

required float lr = 1
optional float beta1 = 2
optional float beta2 = 3
optional float weight_decay = 4
optional bool gradient_clipping = 5
optional float max_gradient = 6

Used in: SparseOptimizer

required float lr = 1
optional float momentum = 2
optional float weight_decay = 3
optional bool gradient_clipping = 4
optional float max_gradient = 5

Used in: SparseOptimizer

required float lr = 1
optional float beta1 = 2
optional float beta2 = 3
optional float weight_decay = 4
optional bool gradient_clipping = 5
optional float max_gradient = 6

Used in: SparseOptimizer

required float lr = 1
optional float beta1 = 2
optional float beta2 = 3
optional float weight_decay = 4
optional bool gradient_clipping = 5
optional float max_gradient = 6

Used in: SparseOptimizer

required float lr = 1
optional float alpha = 2
optional float eps = 3
optional float weight_decay = 4
optional bool gradient_clipping = 5
optional float max_gradient = 6

Used in: SparseOptimizer

required float lr = 1
optional float weight_decay = 2
optional WeightDecayMode weight_decay_mode = 3
optional bool gradient_clipping = 4
optional float max_gradient = 5

Used in: SparseOptimizer

required float lr = 1
optional bool gradient_clipping = 2
optional float max_gradient = 3

Used in: DlrmHSTU, UltraHSTU

optional MLP mlp = 1
task tower mlp
repeated FusionSubTaskConfig task_configs = 2
sub task configs

Used in: FusionMTLTower

required string task_name = 1
task name for the task
required string label_name = 2
label for the task
optional uint64 task_bitmask = 3
bitmask for get actual label for binary classification task
repeated LossConfig losses = 4
loss for the task
optional uint32 num_class = 5
support multi-class classification loss
repeated MetricConfig metrics = 6
metrics for the task
optional float weight = 7
training loss weight for the task
repeated TrainMetricConfig train_metrics = 20
log train merics for task

Used in: GRContextualInterleavePreprocessor, GRContextualPreprocessor, GRUIHPreprocessor

oneof action_encoder
- GRSimpleActionEncoder simple_action_encoder = 1

message GRContentEncoder

module.proto:142

Used in: GRContextualInterleavePreprocessor, GRContextualPreprocessor

oneof content_encoder
- GRSliceContentEncoder slice_content_encoder = 1
  slice candidate dim to uih dim
- GRPadContentEncoder pad_content_encoder = 2
  padding candidate dim to uih dim
- GRMLPContentEncoder mlp_content_encoder = 3
  linear transform uih and candidate to same dim

message GRContextualInterleavePreprocessor

module.proto:185

Used in: GRInputPreprocessor

optional GRActionEncoder action_encoder = 3
action encoder config
optional bool enable_interleaving = 4
enable interleave target or not
optional GRContextualizedMLP action_mlp = 5
action embedding mlp config
required GRContentEncoder content_encoder = 6
content encoder config
required GRContextualizedMLP content_mlp = 7
content embedding mlp config

message GRContextualPreprocessor

module.proto:174

Used in: GRInputPreprocessor

optional GRActionEncoder action_encoder = 3
action encoder config
optional GRContextualizedMLP action_mlp = 4
action embedding mlp config
required GRContentEncoder content_encoder = 5
content encoder config
required GRContextualizedMLP content_mlp = 6
content embedding mlp config

message GRContextualizedMLP

module.proto:165

Used in: GRContextualInterleavePreprocessor, GRContextualPreprocessor, GRUIHPreprocessor

oneof contextualized_mlp
- GRSimpleContextualizedMLP simple_mlp = 10
  mlp for sequence embedding
- GRParameterizedContextualizedMLP parameterized_mlp = 11
  mlp for sequence and contextual embedding

Used in: HSTU

oneof input_preprocessor
- GRContextualPreprocessor contextual_preprocessor = 20
  input preprocessor with contextual features
- GRContextualInterleavePreprocessor contextual_interleave_preprocessor = 21
  input preprocessor with interleave targets
- GRUIHPreprocessor uih_preprocessor = 22
  input preprocessor for sequence-only models (no candidate concat)

message GRL2NormPostprocessor

module.proto:216

Used in: GROutputPostprocessor

(message has no fields)

message GRLayerNormPostprocessor

module.proto:219

Used in: GROutputPostprocessor

(message has no fields)

message GRMLPContentEncoder

module.proto:135

Used in: GRContentEncoder

required MLP uih_mlp = 1
mlp for uih seq embedding
required MLP target_mlp = 2
mlp for candidate seq embedding

message GROutputPostprocessor

module.proto:229

Used in: HSTU

oneof output_postprocessor
- GRL2NormPostprocessor l2norm_postprocessor = 31
  l2 norm postprocessor
- GRLayerNormPostprocessor layernorm_postprocessor = 32
  layer norm postprocessor
- GRTimestampLayerNormPostprocessor timestamp_layernorm_postprocessor = 33
  timestamp layer norm postprocessor

message GRPadContentEncoder

module.proto:132

Used in: GRContentEncoder

(message has no fields)

message GRParameterizedContextualizedMLP

module.proto:158

Used in: GRContextualizedMLP

required uint32 hidden_dim = 1
mlp hidden dimension
optional float contextual_dropout_ratio = 2
dropout ratio for contextual embedding

Used in: HSTU

required uint32 num_position_buckets = 1
buckets for position embedding
optional uint32 num_time_buckets = 2
buckets for timestamp embedding
optional bool use_time_encoding = 3
use timestamp encoding or not.
optional string time_bucket_fn = 4
transform function for timestamp gap. sqrt | log
optional float time_bucket_increments = 5
timestamp gap will div by time_bucket_increments

Used in: GRActionEncoder

optional uint32 action_embedding_dim = 1
action embedding dim
repeated uint32 action_weights = 3
bitmask of each action
repeated uint32 watchtime_to_action_thresholds = 4
thresholds for watch time to actions
repeated uint32 watchtime_to_action_weights = 5
bitmask for watch time to actions
optional float embedding_init_std = 6
action embedding weights init std

message GRSimpleContextualizedMLP

module.proto:153

Used in: GRContextualizedMLP

required uint32 hidden_dim = 1
mlp hidden dimension

message GRSliceContentEncoder

module.proto:129

Used in: GRContentEncoder

(message has no fields)

message GRTimestampLayerNormPostprocessor

module.proto:222

Used in: GROutputPostprocessor

repeated uint32 time_duration_period_units = 1
time duration period units, e.g. 60 * 60 for hour of day.
repeated uint32 time_duration_units_per_period = 2
time duration units per period, e.g. 24 for hour of day.

Used in: GRInputPreprocessor

optional GRActionEncoder action_encoder = 1
action encoder config (optional - for models with action info)
optional GRContextualizedMLP action_mlp = 2
action embedding mlp config (required if action_encoder is set)

Used in: TrainConfig

optional string clipping_type = 1
Clipping type: "norm", "value", or "none"
optional float max_gradient = 2
Max gradient value/norm threshold
optional float norm_type = 3
Norm type for gradient norm clipping (2.0 for L2, inf for max)
optional bool enable_global_grad_clip = 4
Enable global gradient clipping for distributed training

Used in: TrainConfig

optional float init_scale = 1
Initial scale factor
optional float growth_factor = 2
Factor by which the scale is multiplied during update if no inf/NaN gradients occur for ``growth_interval`` consecutive iterations.
optional float backoff_factor = 3
Factor by which the scale is multiplied during update if inf/NaN gradients occur in an iteration.
optional uint32 growth_interval = 4
Number of consecutive iterations without inf/NaN gradients that must occur for the scale to be multiplied by ``growth_factor``.

Used in: MetricConfig

required string grouping_key = 1

Used in: MetricConfig

required string grouping_key = 1
optional uint64 max_pairs_per_group = 2

Used in: DlrmHSTU, HSTUUserTower, UltraHSTU

required STU stu = 1
stu config
optional float input_dropout_ratio = 2
dropout ratio after preprocessor
optional uint32 attn_num_layers = 3
num stu layers
required GRPositionalEncoder positional_encoder = 4
position encoder
required GRInputPreprocessor input_preprocessor = 5
input preprocessor
required GROutputPostprocessor output_postprocessor = 6
output postprocessor
optional uint32 attn_truncation_split_layer = 7
Attention truncation: after this many full-sequence layers, drop UIH prefix tokens to keep only the last attn_truncation_tail_len per sample. Contextual prefix and targets survive. Must be in (0, attn_num_layers); 0 disables.
optional uint32 attn_truncation_tail_len = 8
Trailing UIH cap; both fields must be > 0 to enable truncation.
optional string name = 9
MoT channel name. When non-empty, replaces the default `uih` prefix on UIH-side keys read from grouped_features (e.g. name="uih_click" -> uih_click.sequence, uih_click_action.sequence, uih_click_watchtime.sequence, uih_click_timestamp.sequence). Empty preserves the original uih.* keys (DlrmHSTU behavior). Channels with the same `embedding_name` on a feature share the underlying embedding table via EmbeddingGroup dedupe; per-channel tables multiply sparse-param + TBE + all-to-all cost by N.

Used in: ModelConfig

required HSTUUserTower user_tower = 1
required Tower item_tower = 6
optional int32 output_dim = 7
user and item tower output dimension; when 0 (default), no output Linear is applied -- the caller must size the user tower's STU output and the item tower's MLP output to match.
optional Similarity similarity = 3
similarity method
optional float temperature = 4
similarity scaling factor
optional bool in_batch_negative = 5
use in batch items as negative items.

Used in: HSTUMatch

required string input = 1
input feature group name (uih group)
required HSTU hstu = 2
HSTU config (STU, positional_encoder, input_preprocessor, output_postprocessor)
required uint32 max_seq_len = 3
max sequence length

Weighted Random Sampling ItemID not in Batch and Sampling Hard Edge

Used in: DataConfig

required string user_input_path = 1
user data path schema => userid:int64 | weight:float
required string item_input_path = 2
item data path schema => itemid:int64 | weight:float | attrs:string
required string hard_neg_edge_input_path = 3
hard negative edge path schema => userid:int64 | itemid:int64 | weight:float
required uint32 num_sample = 4
number of negative sample
required uint32 num_hard_sample = 5
max number of hard negative sample
repeated string attr_fields = 6
field names of attrs in train data or eval data
required string item_id_field = 7
field name of item_id in train data or eval data
required string user_id_field = 8
field name of user_id in train data or eval data
optional string attr_delimiter = 9
attribute delimiter of attrs string
optional uint32 num_eval_sample = 10
number of negative samples for evaluator
optional string field_delimiter = 11
only works on local

Weighted Random Sampling ItemID not with Edge and Sampling Hard Edge

Used in: DataConfig

required string user_input_path = 1
user data path schema => userid:int64 | weight:float
required string item_input_path = 2
item data path schema => itemid:int64 | weight:float | attrs:string
required string pos_edge_input_path = 3
positive edge path schema => userid:int64 | itemid:int64 | weight:float
required string hard_neg_edge_input_path = 4
hard negative edge path schema => userid:int64 | itemid:int64 | weight:float
required uint32 num_sample = 5
number of negative sample
required uint32 num_hard_sample = 6
max number of hard negative sample
repeated string attr_fields = 7
field names of attrs in train data or eval data
required string item_id_field = 8
field name of item_id in train data or eval data
required string user_id_field = 9
field name of user_id in train data or eval data
optional string attr_delimiter = 10
attribute delimiter of attrs string
optional uint32 num_eval_sample = 11
number of negative samples for evaluator
optional string field_delimiter = 12
field delimiter of input data

required string feature_name = 1
feature name, e.g. item_id
required string expression = 2
feature input, e.g. item:item_id
optional string embedding_name = 3
embedding name, feature with same embedding name will share embedding
required uint32 embedding_dim = 4
embedding dimension
optional uint64 hash_bucket_size = 5
number of hash size
optional uint64 num_buckets = 6
number of id enumerators
repeated string vocab_list = 7
id vocabulary list
map<string, uint64> vocab_dict = 8
id vocabulary dict
optional uint32 value_dim = 9
id value dimensions, default = 0, when use in seq, default = 1 if value_dim = 0, it supports id with multi-value
optional string pooling = 10
embedding pooling type, available is {sum | mean}
optional string default_value = 11
fg default value, default value before bucktize
optional string separator = 12
fg multi-value separator
optional bool weighted = 13
fg multi-value with whether has weight
optional string init_fn = 14
embedding init function, e.g. "nn.init.uniform_,a=-0.01,b=0.01"
optional bool use_mask = 15
mask value in training progress
optional ZeroCollisionHash zch = 16
zero collision hash
optional string vocab_file = 17
id vocabulary file path
optional string asset_dir = 18
vocab file relative directory
optional DynamicEmbedding dynamicemb = 19
dynamic embedding
optional string fg_encoded_default_value = 30
default value when fg_mode = FG_NONE, when use pai-fg, you do not need to set the param. when use own fg and data contain null value, you can set the param for fill null
optional uint64 default_bucketize_value = 31
out-of-vocab(OOV) id bucketize value when use vocab_list or vocab_dict when use default_bucketize_value, we will not add additional bucketize_value of `default_value`=0, bucketize_value of <OOV>=1 into vocab_list or vocab_dict
optional string fg_value_type = 32
value_type after fg before bucketize, you can specify it for better performance. e.g. fg_value_type = int64 when use num_buckets
optional bool trainable = 33
embedding param trainable or not
optional bool stub_type = 34
only used as fg dag intermediate result or not
optional string data_type = 35
embedding data type
optional ParameterConstraints embedding_constraints = 50
embedding param constraints
optional uint32 sequence_length = 101
max sequence length, only take effect when use it as sequence
optional string sequence_delim = 102
sequence delimiter, only take effect when use it as sequence
repeated string sequence_fields = 103
specify sequence type fields in inputs. default is item side inputs.

Used in: DC2VR

required string tower_name = 1
task name for the task tower
optional string label_name = 2
label for the task, default is label_fields by order
repeated MetricConfig metrics = 3
metrics for the task
repeated TrainMetricConfig train_metrics = 20
log train merics for task
repeated LossConfig losses = 4
loss for the task
optional uint32 num_class = 5
num_class for multi-class classification loss
optional MLP mlp = 6
task specific mlp
optional float weight = 7
training loss weights
repeated string intervention_tower_names = 8
intervention tower names
required uint32 low_rank_dim = 9
low_rank_dim
optional float dropout_ratio = 10
dropout_ratio
optional string task_space_indicator_label = 11
label name for indicating the sample space for the task tower
optional float in_task_space_weight = 12
the loss weight for sample in the task space
optional float out_task_space_weight = 13
the loss weight for sample out the task space
optional float pareto_min_loss_weight = 14
use pareto front minimal loss weight, ge 0 and lt 1

Used in: LossConfig

required string session_name = 1
optional float alpha = 2

Used in: ModelConfig

TRITON = 0
PYTORCH = 1
CUTLASS = 2

required string feature_name = 1
feature name, e.g. kv_os_click_count
required string query = 2
query, e.g. ["a:0.5", "b:0.5"]
required string document = 3
document, e,g. ["d:0.5", "b:0.5"]
optional string embedding_name = 4
embedding name, feature with same embedding name will share embedding
optional uint32 embedding_dim = 5
embedding dimension
repeated float boundaries = 6
boundaries for bucktize numeric expr value
optional string separator = 7
fg multi-value separator
optional float kv_delimiter = 8
fg kv separator, default is :.
optional string normalizer = 9
fg normalizer, e.g. method=log10,threshold=1e-10,default=-10 method=zscore,mean=0.0,standard_deviation=10.0 method=minmax,min=2.1,max=2.2 method=expression,expr=sign(x)
optional string pooling = 10
embedding pooling type, available is {sum | mean}
optional string default_value = 11
fg default value
optional string init_fn = 12
embedding init function, e.g. "nn.init.uniform_,a=-0.01,b=0.01"
optional bool use_mask = 13
mask value in training progress
optional string fg_encoded_default_value = 30
default value when fg_mode = FG_NONE, when use pai-fg, you do not need to set the param. when use own fg and data contain null value, you can set the param for fill null
optional bool trainable = 33
embedding param trainable or not
optional bool stub_type = 34
only used as fg dag intermediate result or not
optional string data_type = 35
embedding data type
oneof dense_emb
- AutoDisEmbedding autodis = 40
  autodis embedding
- MLPEmbedding mlp = 41
  mlp embedding
optional ParameterConstraints embedding_constraints = 50
embedding param constraints
optional uint32 sequence_length = 101
max sequence length, only take effect when use it as sequence
optional string sequence_delim = 102
sequence delimiter, only take effect when use it as sequence
repeated string sequence_fields = 103
specify sequence type fields in inputs. default is item side inputs.

Used in: LossConfig

(message has no fields)

LFU: evict_score = access_cnt

Used in: ZeroCollisionHash

(message has no fields)

LRU: evict_score = 1 / pow((current_iter - last_access_iter), decay_exponent)

Used in: ZeroCollisionHash

optional float decay_exponent = 1
decay rate is access step

required string feature_name = 1
feature name, e.g. kv_os_click_count
required string map = 2
map input, e.g. item:kv_os_click_count
required string key = 3
key input, e.g. user:os
optional string embedding_name = 4
embedding name, feature with same embedding name will share embedding
optional uint32 embedding_dim = 5
embedding dimension
repeated float boundaries = 6
boundaries for bucktize numeric lookup value
optional uint64 hash_bucket_size = 7
number of hash size for sparse lookup value
optional uint64 num_buckets = 8
number of id enumerators for sparse lookup value
repeated string vocab_list = 9
id vocabulary list for sparse lookup value
map<string, uint64> vocab_dict = 10
id vocabulary dict
optional string pooling = 11
embedding pooling type, available is {sum | mean}
optional string combiner = 12
lookup value combiner type, available is {sum | mean | min | max | count}
optional string default_value = 13
fg default value
optional string separator = 14
fg multi-value separator
optional bool need_discrete = 15
lookup map value is sparse or numeric, when need_discrete is true, combiner will be empty string
optional bool need_key = 16
lookup value need key as prefix or not.
optional string normalizer = 17
fg normalizer, e.g. method=log10,threshold=1e-10,default=-10 method=zscore,mean=0.0,standard_deviation=10.0 method=minmax,min=2.1,max=2.2 method=expression,expr=sign(x)
optional string init_fn = 18
embedding init function, e.g. "nn.init.uniform_,a=-0.01,b=0.01"
optional uint32 value_dim = 19
lookup value dimensions
optional string value_separator = 20
numeric lookup value separator
optional bool use_mask = 21
mask value in training progress
optional ZeroCollisionHash zch = 22
zero collision hash
optional string vocab_file = 23
id vocabulary file path
optional string asset_dir = 24
vocab file relative directory
optional DynamicEmbedding dynamicemb = 25
dynamic embedding
optional string fg_encoded_default_value = 30
default value when fg_mode = FG_NONE, when use pai-fg, you do not need to set the param. when use own fg and data contain null value, you can set the param for fill null
optional uint64 default_bucketize_value = 31
out-of-vocab(OOV) id bucketize value when use vocab_list or vocab_dict when use default_bucketize_value, we will not add additional bucketize_value of `default_value`=0, bucketize_value of <OOV>=1 into vocab_list or vocab_dict
optional string fg_value_type = 32
value_type after fg before bucketize, you can specify it for better performance. e.g. fg_value_type = int64 when use num_buckets
optional bool trainable = 33
embedding param trainable or not
optional bool stub_type = 34
only used as fg dag intermediate result or not
optional string data_type = 35
embedding data type
oneof dense_emb
- AutoDisEmbedding autodis = 40
  autodis embedding
- MLPEmbedding mlp = 41
  mlp embedding
optional ParameterConstraints embedding_constraints = 50
embedding param constraints
optional uint32 sequence_length = 101
max sequence length, only take effect when use it as sequence
optional string sequence_delim = 102
sequence delimiter, only take effect when use it as sequence
repeated string sequence_fields = 103
specify sequence type fields in inputs. default is item side inputs.

Used in: BayesTaskTower, FusionSubTaskConfig, InterventionTaskTower, ModelConfig, TaskTower

oneof loss
- BinaryCrossEntropy binary_cross_entropy = 1
- SoftmaxCrossEntropy softmax_cross_entropy = 2
- L2Loss l2_loss = 3
- JRCLoss jrc_loss = 4
- BinaryFocalLoss binary_focal_loss = 5

Used in: ModelConfig

required MINDUserTower user_tower = 1
required Tower item_tower = 2
optional float simi_pow = 3
optional Similarity similarity = 4
optional bool in_batch_negative = 5
optional float temperature = 6
required int32 output_dim = 7

Used in: MIND

required string input = 1
user feature group name
required string history_input = 2
user history group name
required MLP user_mlp = 3
optional MLP hist_seq_mlp = 4
optional MINDUserTower.UserSeqCombineMethod user_seq_combine = 5
required B2ICapsule capsule_config = 6
capsule config
required MLP concat_mlp = 7
concat mlp config for user interests vector

Used in: MINDUserTower

CONCAT = 0
SUM = 1

Used in: BayesTaskTower, DATTower, DBMTL, DC2VR, DCNV1, DCNV2, DINEncoder, DINTower, DLRM, DeepFM, ExtractionNetwork, FusionMTLTower, GRMLPContentEncoder, InterventionTaskTower, MINDUserTower, MMoE, MaskNetModule, MultiTower, MultiTowerDIN, MultiWindowDINEncoder, MultiWindowDINTower, RocketLaunching, TDM, TaskTower, Tower, WideAndDeep, WuKong, WuKongLayer, xDeepFM

repeated uint32 hidden_units = 1
hidden units for each layer
repeated float dropout_ratio = 2
ratio of dropout
optional string activation = 3
activation function
optional bool use_bn = 4
use batch normalization
optional bool bias = 5
use bias
optional bool use_ln = 6
use layer normalization

Used in: CombineFeature, CustomFeature, ExprFeature, KvDotProduct, LookupFeature, MatchFeature, OverlapFeature, RawFeature

(message has no fields)

Used in: ModelConfig

required MLP expert_mlp = 1
mmoe expert module definition
optional MLP gate_mlp = 2
mmoe gate module definition
required uint32 num_expert = 3
number of mmoe experts
repeated TaskTower task_towers = 4
task tower

Used in: DenseOptimizer, PartOptimizer, SparseOptimizer

repeated uint32 schedule_sizes = 1
a list of global steps or epochs at which to switch learning
repeated float learning_rates = 2
a list of learning rates corresponding to intervals
optional bool warmup = 3
Whether to linearly interpolate learning rates for steps in [0, schedule_steps[0]].
optional bool by_epoch = 4
schedule by epoch or by step.

Used in: MaskNetModule

optional float reduction_ratio = 1
the ratio between aggregation dim and masked input dim
optional uint32 aggregation_dim = 2
the dim of aggregation layer
required uint32 hidden_dim = 3
the dim of hidden ffn layer

Used in: ModelConfig

required MaskNetModule mask_net_module = 1

Used in: DBMTL, MaskNet

required uint32 n_mask_blocks = 1
number of mask blocks
required MaskBlock mask_block = 2
mask block
optional MLP top_mlp = 3
mlp layer on top of mask blocks
optional bool use_parallel = 4
use parallel or serial mask blocks

required string feature_name = 1
feature name, e.g. match_cate_brand_click_count
required string nested_map = 2
nested map input, e.g. user:match_cate_brand_click_count
required string pkey = 3
first layer (primary) key input, e.g. item:cate or ALL
required string skey = 4
second layer (secondary) key input, e.g. item:brand or ALL
optional string embedding_name = 5
embedding name, feature with same embedding name will share embedding
optional uint32 embedding_dim = 6
embedding dimension
repeated float boundaries = 7
boundaries for bucktize numeric match value
optional uint64 hash_bucket_size = 8
number of hash size for sparse match value
optional uint64 num_buckets = 9
number of id enumerators for sparse match value
repeated string vocab_list = 10
id vocabulary list for sparse match value
map<string, uint64> vocab_dict = 11
id vocabulary dict
optional string pooling = 12
embedding pooling type, available is {sum | mean}
optional string default_value = 13
match value combiner type, available is {sum | mean | min | max | count} optional string combiner = 12 [default = "sum"]; fg default value
optional string separator = 14
fg multi-value separator
optional bool need_discrete = 15
match map value is sparse or numeric, when need_discrete is true, combiner will be empty string
optional bool show_pkey = 16
match value need pkey value as prefix or not.
optional bool show_skey = 17
match value need skey valueas prefix or not.
optional string normalizer = 18
fg normalizer, e.g. method=log10,threshold=1e-10,default=-10 method=zscore,mean=0.0,standard_deviation=10.0 method=minmax,min=2.1,max=2.2 method=expression,expr=sign(x)
optional string init_fn = 19
embedding init function, e.g. "nn.init.uniform_,a=-0.01,b=0.01"
optional uint32 value_dim = 20
match value dimensions
optional bool use_mask = 21
mask value in training progress
optional ZeroCollisionHash zch = 22
zero collision hash
optional string vocab_file = 23
id vocabulary file path
optional string asset_dir = 24
vocab file relative directory
optional DynamicEmbedding dynamicemb = 25
dynamic embedding
optional string fg_encoded_default_value = 30
default value when fg_mode = FG_NONE, when use pai-fg, you do not need to set the param. when use own fg and data contain null value, you can set the param for fill null
optional uint64 default_bucketize_value = 31
out-of-vocab(OOV) id bucketize value when use vocab_list or vocab_dict when use default_bucketize_value, we will not add additional bucketize_value of `default_value`=0, bucketize_value of <OOV>=1 into vocab_list or vocab_dict
optional string fg_value_type = 32
value_type after fg before bucketize, you can specify it for better performance. e.g. fg_value_type = int64 when use num_buckets
optional bool trainable = 33
embedding param trainable or not
optional bool stub_type = 34
only used as fg dag intermediate result or not
optional string data_type = 35
embedding data type
oneof dense_emb
- AutoDisEmbedding autodis = 40
  autodis embedding
- MLPEmbedding mlp = 41
  mlp embedding
optional ParameterConstraints embedding_constraints = 50
embedding param constraints
optional uint32 sequence_length = 101
max sequence length, only take effect when use it as sequence
optional string sequence_delim = 102
sequence delimiter, only take effect when use it as sequence
repeated string sequence_fields = 103
specify sequence type fields in inputs. default is item side inputs.

(message has no fields)

(message has no fields)

Used in: BayesTaskTower, FusionSubTaskConfig, InterventionTaskTower, ModelConfig, TaskTower

oneof metric
- AUC auc = 1
- MulticlassAUC multiclass_auc = 2
- RecallAtK recall_at_k = 3
- MeanAbsoluteError mean_absolute_error = 4
- MeanSquaredError mean_squared_error = 5
- Accuracy accuracy = 6
- GroupedAUC grouped_auc = 7
- XAUC xauc = 8
- GroupedXAUC grouped_xauc = 9
- NormalizedEntropy normalized_entropy = 10

Used in: EasyRecConfig

repeated FeatureGroupConfig feature_groups = 1
oneof model
- DLRM dlrm = 100
- DeepFM deepfm = 101
- MultiTower multi_tower = 102
- MultiTowerDIN multi_tower_din = 103
- MaskNet mask_net = 104
- WideAndDeep wide_and_deep = 105
- DCNV1 dcn_v1 = 106
- DCNV2 dcn_v2 = 107
- xDeepFM xdeepfm = 108
- WuKong wukong = 109
- SimpleMultiTask simple_multi_task = 200
- MMoE mmoe = 201
- DBMTL dbmtl = 202
- PLE ple = 203
- DC2VR dc2vr = 204
- DlrmHSTU dlrm_hstu = 205
- PEPNet pepnet = 206
- UltraHSTU ultra_hstu = 207
- DSSM dssm = 301
- DSSMV2 dssm_v2 = 302
- DAT dat = 303
- HSTUMatch hstu_match = 304
- MIND mind = 305
- TDM tdm = 400
- RocketLaunching rocket_launching = 500
- SidRqkmeans sid_rqkmeans = 601
  SID generation models (600 is reserved for SidRqvae, arriving in the follow-up PR)
optional uint32 num_class = 2
repeated LossConfig losses = 3
repeated MetricConfig metrics = 4
repeated TrainMetricConfig train_metrics = 5
optional VariationalDropout variational_dropout = 11
optional Kernel kernel = 12
optional bool use_pareto_loss_weight = 13
whether use pareto loss weight

Used in: ModelConfig

repeated Tower towers = 1
required MLP final = 2

Used in: ModelConfig

repeated Tower towers = 1
repeated DINTower din_towers = 2
required MLP final = 3

Used in: SeqEncoderConfig

optional string name = 1
seq encoder name
required string input = 2
sequence feature name
required MLP attn_mlp = 3
time windows len
repeated uint32 windows_len = 4
mlp config for target attention score

Used in: TDM

repeated uint32 windows_len = 1
time windows len
required MLP attn_mlp = 2
mlp config for target attention score

Used in: MetricConfig

optional uint32 thresholds = 1
optional string average = 2
macro: calculate score for each class and average them weighted: calculates score for each class and computes weighted average using their support

Weighted Random Sampling ItemID not in Batch

Used in: DataConfig

required string input_path = 1
sample data path schema => id:int64 | weight:float | attrs:string
required uint32 num_sample = 2
number of negative sample
repeated string attr_fields = 3
field names of attrs in train data or eval data
required string item_id_field = 4
field name of item_id in train data or eval data
optional string attr_delimiter = 5
attribute delimiter of attrs string
optional uint32 num_eval_sample = 6
number of negative samples for evaluator
optional string field_delimiter = 7
field delimiter of input data
optional string item_id_delim = 8
item id delimiter

Weighted Random Sampling ItemID not with Edge

Used in: DataConfig

required string user_input_path = 1
user data path schema => userid:int64 | weight:float
required string item_input_path = 2
item data path schema => itemid:int64 | weight:float | attrs:string
required string pos_edge_input_path = 3
positive edge path schema => userid:int64 | itemid:int64 | weight:float
required uint32 num_sample = 4
number of negative sample
repeated string attr_fields = 5
field names of attrs in train data or eval data
required string item_id_field = 6
field name of item_id in train data or eval data
required string user_id_field = 7
field name of user_id in train data or eval data
optional string attr_delimiter = 8
attribute delimiter of attrs string
optional uint32 num_eval_sample = 9
number of negative samples for evaluator
optional string field_delimiter = 10
field delimiter of input data

Used in: MetricConfig

optional float eta = 1
small epsilon clamping the population mean label rate away from {0, 1}.

required string feature_name = 1
feature name, e.g. overlap_ratio
required string query = 2
query input name, e.g. user:query
required string title = 3
title input name, e,g. item:title
required string method = 4
overlap calculate method, available is {query_common_ratio | title_common_ratio | is_contain | is_equal}
optional string embedding_name = 5
embedding name, feature with same embedding name will share embedding
optional uint32 embedding_dim = 6
embedding dimension
repeated float boundaries = 7
boundaries for bucktize numeric expr value
optional string normalizer = 8
fg normalizer, e.g. method=log10,threshold=1e-10,default=-10 method=zscore,mean=0.0,standard_deviation=10.0 method=minmax,min=2.1,max=2.2 method=expression,expr=sign(x)
optional string pooling = 10
embedding pooling type, available is {sum | mean}
optional string separator = 12
fg default value optional string default_value = 11 [default = "0"]; fg multi-value separator
optional string init_fn = 13
embedding init function, e.g. "nn.init.uniform_,a=-0.01,b=0.01"
optional bool use_mask = 14
mask value in training progress
optional string fg_encoded_default_value = 30
default value when fg_mode = FG_NONE, when use pai-fg, you do not need to set the param. when use own fg and data contain null value, you can set the param for fill null
optional bool trainable = 33
embedding param trainable or not
optional bool stub_type = 34
only used as fg dag intermediate result or not
optional string data_type = 35
embedding data type
oneof dense_emb
- AutoDisEmbedding autodis = 40
  autodis embedding
- MLPEmbedding mlp = 41
  mlp embedding
optional ParameterConstraints embedding_constraints = 50
embedding param constraints
optional uint32 sequence_length = 101
max sequence length, only take effect when use it as sequence
optional string sequence_delim = 102
sequence delimiter, only take effect when use it as sequence
repeated string sequence_fields = 103
specify sequence type fields in inputs. default is item side inputs.

Used in: ModelConfig

optional uint32 epnet_hidden_unit = 1
epnet hidden units
optional float epnet_gamma = 2
activation function for epnet
repeated uint32 ppnet_hidden_units = 3
ppnet hidden units
optional string ppnet_activation = 4
activation function for ppnet
repeated float ppnet_dropout_ratio = 5
ratio of dropout
optional float ppnet_gamma = 6
optional string domain_input_name = 7
domain feature name, must is num bucket
optional uint32 task_domain_num = 8
domain number for each task
repeated TaskTower task_towers = 9
task tower

Used in: ModelConfig

repeated ExtractionNetwork extraction_networks = 1
extraction network
repeated TaskTower task_towers = 2
task tower

Used in: BoolMaskFeature, CombineFeature, ComboFeature, CustomFeature, ExprFeature, IdFeature, KvDotProduct, LookupFeature, MatchFeature, OverlapFeature, RawFeature, TokenizeFeature, TrainConfig

repeated string sharding_types = 1
embedding sharding type constraints data_parallel | table_wise | column_wise | row_wise | table_row_wise | table_column_wise | grid_shard
repeated string compute_kernels = 2
embedding compute kernel constraints dense | fused | fused_uvm | fused_uvm_caching | key_value

Used in: DenseOptimizer

oneof optimizer
- SGDOptimizer sgd_optimizer = 1
- AdagradOptimizer adagrad_optimizer = 2
- AdamOptimizer adam_optimizer = 3
- AdamWOptimizer adamw_optimizer = 4
- AdadeltaOptimizer adadelta_optimizer = 6
- RMSpropOptimizer rmsprop_optimizer = 7
required string regex_pattern = 5
oneof learning_rate
- ConstantLR constant_learning_rate = 101
- ExponentialDecayLR exponential_decay_learning_rate = 102
- ManualStepLR manual_step_learning_rate = 103
- CosineAnnealingLR cosine_annealing_learning_rate = 104
- CosineAnnealingWarmRestartsLR cosine_annealing_warm_restarts_learning_rate = 105

Used in: SeqEncoderConfig

optional string name = 1
seq encoder name
required string input = 2
sequence feature name
optional string pooling_type = 3
pooling type, sum or mean
optional int32 max_seq_length = 6
maximum sequence length

required float lr = 1
optional float alpha = 2
optional float eps = 3
optional float weight_decay = 4

required string feature_name = 1
feature name, e.g. click_count
required string expression = 2
feature input, e.g. item:click_count
optional string embedding_name = 3
embedding name, feature with same embedding name will share embedding
optional uint32 embedding_dim = 4
embedding dimension
repeated float boundaries = 5
boundaries for bucktize numeric feature
optional uint32 value_dim = 6
raw feature of multiple dimensions
optional string normalizer = 7
fg normalizer, e.g. method=log10,threshold=1e-10,default=-10 method=zscore,mean=0.0,standard_deviation=10.0 method=minmax,min=2.1,max=2.2 method=expression,expr=sign(x)
optional string pooling = 10
embedding pooling type, available is {sum | mean}
optional string default_value = 11
fg default value
optional string separator = 12
fg multi-value separator
optional string init_fn = 13
embedding init function, e.g. "nn.init.uniform_,a=-0.01,b=0.01"
optional bool use_mask = 14
mask value in training progress
optional string fg_encoded_default_value = 30
default value when fg_mode = FG_NONE, when use pai-fg, you do not need to set the param. when use own fg and data contain null value, you can set the param for fill null
optional bool trainable = 33
embedding param trainable or not
optional bool stub_type = 34
only used as fg dag intermediate result or not
optional string data_type = 35
embedding data type
oneof dense_emb
- AutoDisEmbedding autodis = 40
  autodis embedding
- MLPEmbedding mlp = 41
  mlp embedding
optional ParameterConstraints embedding_constraints = 50
embedding param constraints
optional uint32 sequence_length = 101
max sequence length, only take effect when use it as sequence
optional string sequence_delim = 102
sequence delimiter, only take effect when use it as sequence
repeated string sequence_fields = 103
specify sequence type fields in inputs. default is item side inputs.

optional uint32 top_k = 1

Used in: ModelConfig

optional MLP share_mlp = 1
required MLP booster_mlp = 2
required MLP light_mlp = 3
optional bool feature_based_distillation = 5
optional Similarity feature_distillation_function = 6
COSINE = 0; EUCLID = 1;

required float lr = 1
optional float momentum = 2
optional float weight_decay = 3
optional float dampening = 4
optional bool nesterov = 5
optional bool fused = 6

Used in: HSTU

required uint32 embedding_dim = 1
dimension of input embeddings
required uint32 num_heads = 2
number of attention heads
required uint32 hidden_dim = 3
dimension of hidden linear layers
required uint32 attention_dim = 4
dimension of attention mechanism
optional float output_dropout_ratio = 5
dropout probability for linear layers
optional uint32 max_attn_len = 6
maximum length of attention window
optional float attn_alpha = 7
alpha for mha attention
optional bool use_group_norm = 8
use group normalization or layer normalization.
optional bool recompute_normed_x = 9
whether to recompute normed_x in backward
optional bool recompute_uvqk = 10
whether to recompute_uvqk in backward
optional bool recompute_y = 11
whether to recompute y in backward
optional bool sort_by_length = 12
whether to sort by sequence length when forwarding
optional int32 contextual_seq_len = 13
sequence length of contextual feature. Sentinel: < 0 (default) = use input_preprocessor.contextual_seq_len().
optional uint32 sla_k1 = 14
Semi-Local Attention: local causal window size (0 = disabled)
optional uint32 sla_k2 = 15
Semi-Local Attention: global prefix length (0 = disabled)
optional int32 scaling_seqlen = 16
attention output scaling divisor (denominator of the SiLU(QK)/N term). Sentinel: < 0 (default) = use runtime max_seq_len.

Used in: SeqEncoderConfig

optional string name = 1
seq encoder name
required string input = 2
sequence feature name
optional int32 multihead_attn_dim = 3
multihead_attn_dim must be divisible by num_heads
optional int32 num_heads = 4
self attention num heads
optional float dropout = 5
dropout for attn_output_weights
optional int32 max_seq_length = 6
maximum sequence length

Used in: FeatureGroupConfig

oneof seq_module
- DINEncoder din_encoder = 1
- SimpleAttention simple_attention = 2
- PoolingEncoder pooling_encoder = 3
- MultiWindowDINEncoder multi_window_din_encoder = 4
- SelfAttentionEncoder self_attention_encoder = 6

Used in: SequenceFeature

oneof feature
- IdFeature id_feature = 1
- RawFeature raw_feature = 2
- ComboFeature combo_feature = 3
- LookupFeature lookup_feature = 4
- MatchFeature match_feature = 5
- ExprFeature expr_feature = 7
- OverlapFeature overlap_feature = 8
- TokenizeFeature tokenize_feature = 9
- CustomFeature custom_feature = 10
- KvDotProduct kv_dot_product = 11
- BoolMaskFeature bool_mask_feature = 12
- CombineFeature combine_feature = 13

Used in: FeatureGroupConfig

optional string group_name = 1
repeated string feature_names = 2
optional string embedding_name_suffix = 3
Suffix appended to each feature's embedding_name; same semantics as FeatureGroupConfig.embedding_name_suffix. Empty == disabled.

Used in: FeatureConfig

required string sequence_name = 1
sequence name
required uint32 sequence_length = 2
max sequence length, only take effect in fg
required string sequence_delim = 3
sequence delimiter
optional string sequence_pk = 4
sequence primary key name for serving, default will be user:{sequence_name}
repeated SeqFeatureConfig features = 5
sub feature config

Used in: ModelConfig

optional uint32 input_dim = 1
Input embedding dimension (K-Means runs directly on raw embeddings, no encoder).
repeated uint32 codebook = 3
Per-layer cluster counts, e.g. [256, 256, 256]. List length is the number of residual quantization layers. Entries may differ per layer (non-uniform codebooks such as [256, 512, 1024] are supported — the FAISS backend fits a separate ``faiss.Kmeans`` per layer).
optional bool normalize_residuals = 4
L2-normalize residuals before each layer.
optional FaissKmeansConfig faiss_kmeans_kwargs = 5
Strictly-typed extra kwargs forwarded to faiss.Kmeans(D, K, **kwargs).
optional uint32 train_sample_size = 6
Target number of embeddings to reservoir-sample for the FAISS fit. Bounds host memory regardless of corpus size. 0 (the default) auto-derives it as max(K) * max_points_per_centroid (the largest per-layer codebook, for non-uniform codebooks) — exactly what FAISS subsamples to internally (default 256), so no training points are wasted.
optional string embedding_feature_name = 40
Name of the item embedding feature inside the input Batch.

Used in: DAT, DSSM, DSSMV2, HSTUMatch, MIND, RocketLaunching

COSINE = 0
INNER_PRODUCT = 1
EUCLID = 2

Used in: SeqEncoderConfig

optional string name = 1
seq encoder name
required string input = 2
sequence feature name
optional int32 max_seq_length = 6
maximum sequence length

Used in: ModelConfig

repeated TaskTower task_towers = 1

Used in: LossConfig

optional float label_smoothing = 1

Used in: TrainConfig

oneof optimizer
- FusedSGDOptimizer sgd_optimizer = 1
- FusedAdagradOptimizer adagrad_optimizer = 2
- FusedAdamOptimizer adam_optimizer = 3
- FusedLarsSGDOptimizer lars_sgd_optimizer = 4
- FusedLAMBOptimizer lamb_optimizer = 5
- FusedPartialRowWiseLAMBOptimizer partial_rowwise_lamb_optimizer = 6
- FusedPartialRowWiseAdamOptimizer partial_rowwise_adam_optimizer = 7
- FusedRowWiseAdagradOptimizer rowwise_adagrad_optimizer = 8
- FusedAdadeltaOptimizer adadelta_optimizer = 9
- FusedRMSpropOptimizer rmsprop_optimizer = 10
oneof learning_rate
- ConstantLR constant_learning_rate = 101
- ExponentialDecayLR exponential_decay_learning_rate = 102
- ManualStepLR manual_step_learning_rate = 103
- CosineAnnealingLR cosine_annealing_learning_rate = 104
- CosineAnnealingWarmRestartsLR cosine_annealing_warm_restarts_learning_rate = 105

Used in: ModelConfig

required MultiWindowDINTower multiwindow_din = 1
required MLP final = 2

Used in: DataConfig

required string item_input_path = 1
schema => itemid:int64 | weight:float | attrs:string
required string edge_input_path = 2
scheme => src_id:int64 | dst_id:int64 | weight:float edge for train.
required string predict_edge_input_path = 3
scheme => src_id:int64 | dst_id:int64 | weight:float edge for retrieval beam search.
repeated string attr_fields = 4
field names of attrs in train data or eval data
required string item_id_field = 5
field name of item_id in train data or eval data
repeated uint32 layer_num_sample = 6
the number of negative samples per layer
optional string attr_delimiter = 7
attribute delimiter of attrs string
optional uint32 num_eval_sample = 8
number of negative samples for evaluator
optional string field_delimiter = 9
field delimiter of input data
optional float remain_ratio = 10
The training process only trains a randomly selected proportion of nodes in the middle layers of the tree
optional string probability_type = 11
The type of probability for selecting and retaining each layer in the middle layers of the tree

Used in: MMoE, PEPNet, PLE, SimpleMultiTask

required string tower_name = 1
task name for the task tower
required string label_name = 2
label for the task
repeated MetricConfig metrics = 3
metrics for the task
repeated TrainMetricConfig train_metrics = 20
log train merics for task
repeated LossConfig losses = 4
loss for the task
optional uint32 num_class = 5
num_class for multi-class classification loss
optional MLP mlp = 6
task specific mlp
optional float weight = 7
training loss weights
optional string sample_weight_name = 8
sample weight for the task
optional string task_space_indicator_label = 9
label name for indicating the sample space for the task tower
optional float in_task_space_weight = 10
the loss weight for sample in the task space
optional float out_task_space_weight = 11
the loss weight for sample out the task space
optional float pareto_min_loss_weight = 12
use pareto front minimal loss weight, ge 0 and lt 1

enum TextNormalizeOption

feature.proto:677

Used in: TextNormalizer

TEXT_LOWER2UPPER = 0
lower case to upper case
TEXT_UPPER2LOWER = 1
upper case to lower case
TEXT_SBC2DBC = 2
sbc case to dbc case
TEXT_CHT2CHS = 3
traditional chinese to simple chinese
TEXT_FILTER = 4
filter speicial chars
TEXT_SPLITCHRS = 5
chinese split to chars with blanks
TEXT_REMOVE_SPACE = 6
remove space

message TextNormalizer

feature.proto:694

Used in: TokenizeFeature

optional uint32 max_length = 1
if text_length greater than max_length, will not do normalize
optional string stop_char_file = 2
stop char file path, default will use built-in stop char
repeated TextNormalizeOption norm_options = 3
text normalize options, default is TEXT_LOWER2UPPER & TEXT_SBC2DBC & TEXT_CHT2CHS & TEXT_FILTER

required string feature_name = 1
feature name, e.g. title_token
required string expression = 2
feature input, e.g. item:title
optional string embedding_name = 3
embedding name, feature with same embedding name will share embedding
required uint32 embedding_dim = 4
embedding dimension
optional TextNormalizer text_normalizer = 6
text normalizer
required string vocab_file = 7
tokenizer vocabulary file path
optional string asset_dir = 8
vocab file relative directory
optional string pooling = 10
embedding pooling type, available is {sum | mean}
optional string default_value = 11
fg default value, default value before bucktize
optional string tokenizer_type = 12
tokenizer_type type, available is {bpe | sentencepiece}
optional string init_fn = 14
embedding init function, e.g. "nn.init.uniform_,a=-0.01,b=0.01"
optional bool use_mask = 15
mask value in training progress
optional string fg_encoded_default_value = 30
default value when fg_mode = FG_NONE, when use pai-fg, you do not need to set the param. when use own fg and data contain null value, you can set the param for fill null
optional bool trainable = 33
embedding param trainable or not
optional bool stub_type = 34
only used as fg dag intermediate result or not
optional string data_type = 35
embedding data type
optional ParameterConstraints embedding_constraints = 50
embedding param constraints
optional uint32 sequence_length = 101
max sequence length, only take effect when use it as sequence
optional string sequence_delim = 102
sequence delimiter, only take effect when use it as sequence
repeated string sequence_fields = 103
specify sequence type fields in inputs. default is item side inputs.
optional bool tokens_as_sequence = 104
When true, treat the tokenized output of a single text as a sequence of tokens (each token is one sequence element, value_dim=1) so the feature can be fed into sequence_encoder modules (pooling_encoder, self_attention_encoder, etc.) via EmbeddingCollection instead of being pooled by EmbeddingBagCollection. This is distinct from the `sequence_tokenize_feature` oneof entry, which interprets the input as a `sequence_delim`-separated list of texts.

Used in: DSSM, DSSMV2, HSTUMatch, MIND, MultiTower, MultiTowerDIN

required string input = 1
input feature group name
optional MLP mlp = 2
mlp config (optional; when unset the tower applies no projection)

Used in: EasyRecConfig

required SparseOptimizer sparse_optimizer = 1
embedding part optimizer
required DenseOptimizer dense_optimizer = 2
dense part optimizer
optional uint32 num_steps = 3
number of steps to train models
optional uint32 num_epochs = 4
number of epochs to train models
optional uint32 save_checkpoints_steps = 5
step interval for saving checkpoint
optional string fine_tune_checkpoint = 6
checkpoint to restore parameters from
optional string fine_tune_ckpt_param_map = 7
checkpoint to restore parameters mapping, each line is {param name in current model}\\t{param name in old ckpt}
optional uint32 log_step_count_steps = 8
the frequency the loss and lr will be logged during training
optional bool is_profiling = 9
profiling or not
optional bool use_tensorboard = 10
use tensorboard or not.
optional uint32 save_checkpoints_epochs = 11
epoch interval for saving checkpoint
repeated string tensorboard_summaries = 12
the summaries to be saved in tensorboard, activated only when use_tensorboard=true, possible values are: "loss", "learning_rate", "parameter", "global_gradient_norm", "gradient_norm", "gradient" default values are ["loss", "learning_rate"]
optional bool cudnn_allow_tf32 = 13
where to use torch.backends.cudnn.allow_tf32
optional bool cuda_matmul_allow_tf32 = 14
where to use torch.backends.cuda.matmul.allow_tf32
optional ParameterConstraints global_embedding_constraints = 15
global embedding param constraints
optional string mixed_precision = 16
mixed precision dtype.
optional GradScaler grad_scaler = 17
grad_scaler dynamically estimates the scale factor each iteration.
optional uint32 gradient_accumulation_steps = 18
gradient accumulation steps
optional GradClipping grad_clipping = 19
dense gradient clipping config
optional uint32 keep_checkpoint_max = 20
maximum number of recent checkpoints to keep; 0 keeps all.
optional uint32 save_checkpoints_timestamp_interval = 21
save every N seconds of consumed event-time (e.g. kafka message timestamp), aligned to the Unix epoch (not training epochs). 0 disables.
repeated uint64 save_checkpoints_timestamps = 22
absolute event-time targets (Unix-epoch seconds); save once when consumed data crosses each. empty disables.
optional float save_checkpoints_timestamp_quorum = 23
fraction of workers (0,1] that must pass a boundary/target before a timestamp checkpoint fires; default 0.5 (1.0 = all). outlier-robust.
optional DeltaEmbeddingDumpConfig delta_embedding_dump_config = 24
Configuring this field dumps changed sparse embedding rows during CUDA training. Multi-GPU training writes one parquet shard per rank; column-wise embedding sharding is not supported.
TBD: qcomm config

Used in: BayesTaskTower, FusionSubTaskConfig, InterventionTaskTower, ModelConfig, TaskTower

oneof metric
- AUC auc = 1
- RecallAtK recall_at_k = 3
- MeanAbsoluteError mean_absolute_error = 4
- MeanSquaredError mean_squared_error = 5
- Accuracy accuracy = 6
- XAUC xauc = 8
optional float decay_rate = 20
metric decay rate
optional uint32 decay_step = 21
train_config.log_step_count_steps can divide decay_steps evenly.

Used in: ModelConfig

repeated HSTU hstu = 1
Mixture of Transducers: one HSTU per channel; per-candidate outputs are concatenated on the embedding dim. >= 2 entries requires every entry to set a unique non-empty `name` plus the matching `<name>` / `<name>_action` / `<name>_watchtime` / `<name>_timestamp` feature_groups. Candidate-side and contextual groups are shared across all channels.
required FusionMTLTower fusion_mtl_tower = 2
multi task tower config
required uint32 max_seq_len = 3
max sequence length
optional uint32 item_embedding_hidden_dim = 4
item embedding mlp hidden dimension
optional bool enable_global_average_loss = 5
enables loss averaging computation globally across all ranks (total rank) instead of locally (local rank).
optional bool sequence_timestamp_is_ascending = 6
timestamp of sequence is ascending or descending
optional bool concat_contextual_features = 7
concat all contextual features on channel dim as one token

Used in: ModelConfig

optional float regularization_lambda = 1
regularization coefficient lambda
optional bool embedding_wise_variational_dropout = 2
variational_dropout dimension

Used in: FusedRowWiseAdagradOptimizer

NONE = 0
L2 = 1
DECOUPLE = 2

Used in: ModelConfig

required MLP deep = 1
optional MLP final = 2
optional uint32 wide_embedding_dim = 3
optional string wide_init_fn = 4
wide embedding init function, e.g. "nn.init.uniform_,a=-0.01,b=0.01"

Used in: ModelConfig

optional MLP dense_mlp = 1
repeated WuKongLayer wukong_layers = 2
required MLP final = 3

Used in: WuKong

required uint32 lcb_feature_num = 1
LinearCompressBlock output feature num
required uint32 fmb_feature_num = 2
FactorizationMachineBlock output feature num
optional uint32 compressed_feature_num = 3
number of compressed features in optimized FM.
required MLP feature_num_mlp = 4
feature num mlp