package carls

Get desktop application:
View/edit binary Protocol Buffers messages

KnowledgeBankService defines the service for handling embedding lookup, updates and samples.

rpc StartSession (StartSessionRequest, StartSessionResponse)
knowledge_bank_service.proto:144
Starts a session with the DES server.
message StartSessionRequest
knowledge_bank_service.proto:10
- optional DynamicEmbeddingConfig config = 1
  The config for processing embedding lookup/update.
- string name = 2
  The name of the Op that uses dynamic embedding. A name and a config should uniquely identify a session.
message StartSessionResponse
knowledge_bank_service.proto:19
- bytes session_handle = 1
  A unique key for DES to identify this session.
rpc Lookup (LookupRequest, LookupResponse)
knowledge_bank_service.proto:147
Lookups the embeddings from a given batch of keys.
message LookupRequest
knowledge_bank_service.proto:24
- bytes session_handle = 1
  A handle to identify which session to use.
- repeated string key = 2
  List of lookup keys.
- bool update = 3
  If update = true, Lookup should allocate a new entry for a new key and also count frequency, often used in feedforward path in training. Otherwise, it should just be a lookup without changing any internal information, often used in inference.
message LookupResponse
knowledge_bank_service.proto:38
- map<string, EmbeddingVectorProto> embedding_table = 1
  Maps from keys to their embedding.
rpc Update (UpdateRequest, UpdateResponse)
knowledge_bank_service.proto:150
Updates the embedding value for a given batch of keys.
message UpdateRequest
knowledge_bank_service.proto:43
- bytes session_handle = 1
  A handle to identify which session to use.
- map<string, EmbeddingVectorProto> values = 2
  A batch of keys and their new values to be updated.
- map<string, EmbeddingVectorProto> gradients = 3
  A batch of keys and gradients to be updated.
message UpdateResponse
knowledge_bank_service.proto:54
(message has no fields)
rpc Sample (SampleRequest, SampleResponse)
knowledge_bank_service.proto:153
Samples the embeddings value from given context.
message SampleRequest
knowledge_bank_service.proto:56
- bytes session_handle = 1
  A handle to identify which session to use.
- repeated candidate_sampling.SampleContext sample_context = 2
  A batch of sample contexts.
- int32 num_samples = 3
  Number of returned samples per sample context.
- bool update = 4
  If true, allocate new embeddings for positive keys that are not in the knowledge bank.
message SampleResponse
knowledge_bank_service.proto:71
- repeated SampleResponse.Samples samples = 1
  A batch of results corresponding to the input sample context.
rpc MemoryLookup (MemoryLookupRequest, MemoryLookupResponse)
knowledge_bank_service.proto:156
Looks up a dynamic memory layer and returns the corresponding memory data.
message MemoryLookupRequest
knowledge_bank_service.proto:81
- bytes session_handle = 1
  A handle to identify which session to use.
- repeated EmbeddingVectorProto input = 2
  List of lookup activations.
- MemoryLookupRequest.LookupMode mode = 3
message MemoryLookupResponse
knowledge_bank_service.proto:102
- repeated memory_store.MemoryLookupResult memory_lookup_result = 1
  Memory lookup result for each input.
rpc Export (ExportRequest, ExportResponse)
knowledge_bank_service.proto:159
Exports current model to a given directory with timestamped subdir.
message ExportRequest
knowledge_bank_service.proto:107
- bytes session_handle = 1
  A handle to identify which session to use.
- string export_directory = 2
  Path to the export directory.
message ExportResponse
knowledge_bank_service.proto:115
- oneof saved_path
  - string knowledge_bank_saved_path = 1
    The saved path for knowledge bank.
  - string memory_store_saved_path = 2
    The saved path for memory store. Used by DynamicMemory.
rpc Import (ImportRequest, ImportResponse)
knowledge_bank_service.proto:162
Imports the state of DES for a given session_handle.
message ImportRequest
knowledge_bank_service.proto:125
- bytes session_handle = 1
  A handle to identify which session to use.
- oneof saved_path
  - string knowledge_bank_saved_path = 2
    The saved path for knowledge bank.
  - string memory_store_saved_path = 3
    The saved path for memory store. Used by DynamicMemory.
message ImportResponse
knowledge_bank_service.proto:138
(message has no fields)

bytes value = 1
repeated float weight = 2
Weights associated with this feature vlaue.
string debug_info = 3
Human-readable informaiton for each of the feature values.

Configurations for a DynamicEmbedding. This is used to decide which storage system is used for storing embeddings and also the corresponding lookup/update/sampling strategy.

Used in: StartSessionRequest

int32 embedding_dimension = 1
optional KnowledgeBankConfig knowledge_bank_config = 2
optional GradientDescentConfig gradient_descent_config = 3
optional candidate_sampling.CandidateSamplerConfig candidate_sampler_config = 4
optional memory_store.MemoryStoreConfig memory_store_config = 5

Used in: KnowledgeBankConfig

oneof initializer
- EmbeddingVectorProto default_embedding = 1
  Default embedding for new data.
- EmbeddingInitializer.ZeroInitializer zero_initializer = 2
  Set all to zero, same as tf.zero_initializer.
- EmbeddingInitializer.RandomUniformInitializer random_uniform_initializer = 3
  Set a random number between low and high. Same as tf.random_uniform_initializer.
- EmbeddingInitializer.RandomNormalInitializer random_normal_initializer = 4
  Set a random Gaussian distribution for given (mean, stddev). Same as tf.random_normal_initializer.
bool use_deterministic_seed = 5
Used for random number generation. If true, use deterministic seed.

Used in: EmbeddingInitializer

float mean = 1
float stddev = 2

Used in: EmbeddingInitializer

float low = 1
float high = 2

Used in: EmbeddingInitializer

(message has no fields)

Representation of an embedding vector and its related information.

Used in: EmbeddingInitializer, InProtoKnowledgeBankConfig.EmbeddingData, LookupResponse, MemoryLookupRequest, UpdateRequest, candidate_sampling.NegativeSamplingResult, candidate_sampling.SampleContext, candidate_sampling.TopkSamplingResult, memory_store.GaussianCluster, memory_store.GaussianMemoryCheckpointMetaData.ClusterData

bytes tag = 1
The lookup key associated with this embedding vector, e.g., normalized keywords or sample IDs.
float weight = 3
Weight of the embedding vector, usually contains global information such as word frequency or probability P(w).
repeated float value = 2
Embedding vector.
optional InputContext meta_data = 4
Metadata, contains more information for the embedding, e.g. category_id.
optional google.protobuf.Timestamp timestamp = 5
Timestamp of the embedding, usually used for recording the last time this embedding is updated. Value is in microseconds elapsed since 1/1/1970.

Used in: InputFeature

oneof feature
- BytesFeature bytes_feature = 1
- FloatFeature float_feature = 2
- Int64Feature int64_feature = 3
- Uint64Feature uint64_feature = 4

Used in: FeatureValue

float value = 1
repeated float weight = 2
Weights associated with this feature vlaue.
string debug_info = 3
Human-readable informaiton for each of the feature values.

Config for gradient descent algorithms used in knowledge bank service. Each time the server receives the gradients of the embedding data, it applies the corresponding optimizer to update the embedding data. Gradient update is conducted on the server side to facilitate asynchronous update.

Used in: DynamicEmbeddingConfig

float learning_rate = 1
Learning rate is used by most algorithms.
oneof optimizer
- GradientDescentConfig.SGD sgd = 2
- GradientDescentConfig.AdaGrad adagrad = 3

Used in: GradientDescentConfig

float init_accumulator_value = 1
Initial value for Adagrad accumulator. Recommended value is 0.1.

Used in: GradientDescentConfig

(message has no fields)

Stores the embedding in the proto directly. Note that protocol buffer only allows a small number of entries so only use this for model testing.

optional InProtoKnowledgeBankConfig.EmbeddingData embedding_data = 1

Represent the embedding data as a map from string to EmbeddingVectorProto.

Used in: InProtoKnowledgeBankConfig

map<string, EmbeddingVectorProto> embedding_table = 1

An InputContext is a list of features that provides the context of an input.

Used in: EmbeddingVectorProto, SparseFeatureEmbeddingMetaData

map<string, InputFeature> feature = 1
Map from feature name to InputFeature.

A generic sparse/dense feature representation. Each feature must have a unique value list, be it string, float or int. To include addition information for debugging, one can use debug_info.

Used in: InputContext

repeated FeatureValue feature_value = 1

Used in: FeatureValue

int64 value = 1
repeated float weight = 2
Weights associated with this feature vlaue.
string debug_info = 3
Human-readable informaiton for each of the feature values.

MetaData for restoring the state of a KnowledgeBank.

optional KnowledgeBankConfig config = 1
config from the base KnowledgeBank class.
string checkpoint_saved_path = 2
This is used by the subclass implementation of KnowledgeBank to restore its state.

Used in: DynamicEmbeddingConfig, KnowledgeBankCheckpointMetaData

optional EmbeddingInitializer initializer = 1
Method to initialize a new embedding.
optional google.protobuf.Any extension = 1000
Implementation is encoded in extension.

Stores the embedding in the LevelDB which facilitates efficient key-value lookup and update. The KnowledgeBankServer first loads all the embedding data from the DB into memory then only updates the data in the DB when Export() is called.

string leveldb_address = 1
THe address of the LevelDB file.
int32 num_in_memory_partitions = 2
Number of in-memory partitions for embedding data lookup/update.
int32 max_in_memory_write_buffer_size = 3
Maximal in-memory write buffer size for embedding update. Used for asynchronuous training. If the training is synchronuous, set to 1.

Used in: MemoryLookupRequest

DEFAULT_UNKNOWN = 0
LOOKUP_WITHOUT_UPDATE = 1
Lookup without changing existing memory data.
LOOKUP_WITH_UPDATE = 2
Lookup keys are used to update existing memory data, but new clusters would not be created.
LOOKUP_WITH_GROW = 3
Lookup keys are used to update existing memory data, and a new cluster can be created if the lookup input is too far away from existing ones.

Config for constructing a phrase EmbeddingLookup used for sentence embedding.

Used in: SparseFeatureEmbeddingConfig

oneof embedding_config
- PhraseEmbeddingLookup.TFRecordConfig tfrecord_config = 1
- PhraseEmbeddingLookup.LevelDbConfig leveldb_config = 2

Config for the embedding data that is stored in the LevelDB.

Used in: PhraseEmbeddingLookup

string leveldb_address = 1

Config for the embedding data that are stored in a TFRecord file, The key of each embedding is stored in the EmbeddingVectorProto.tag field.

Used in: PhraseEmbeddingLookup

string tfrecord_path = 1
The path to the TFRecord file.

Used in: SampleResponse

repeated candidate_sampling.SampledResult sampled_result = 1
Sampled results.

A sparse features embedding consists of a phrase embedding lookup and meta_data for composition.

optional PhraseEmbeddingLookup phrase_embedding_lookup = 1
Config for phrase embedding lookup.
optional SparseFeatureEmbeddingMetaData meta_data = 2
MetaData for computing the sparse features embedding.

Information used during serving of sentence embedding. It should be paired with a PhraseEmbeddingLookup in serving.

Used in: SparseFeatureEmbeddingConfig

int32 embedding_dimension = 1
Embedding dimension, should be the same as keyword embedding.
int32 sigma_dimension = 2
Used for computing sigma(x) from the last sigma_dimension of the embedding data and if this is set, params should contain "sigma_kernel" and "sigma_bias" for the computation.
SparseFeatureEmbeddingMetaData.CombineMethod combine_method = 3
optional InputContext params = 4
Parameters for computing the sparse features embedding. For example, in the case of the paper (https://openreview.net/pdf?id=SyK00v5xx), the parameter should be params.feature_map["a"] as used for computing weight $a / (P(w) + a)$ for each phrase w.
repeated float context_free_embedding = 5
Context free embedding or also known as v_c in CAML, can be used for OOV embedding in NN model. If combine_method = WEIGHTED_BY_FREQ, it represents the principal component computed from all sentence embeddings. The size should equal to embedding_dimension.

Combine method for computing the embedding of a sentence or a phrase.

Used in: SparseFeatureEmbeddingMetaData

UNKNOWN = 0
WEIGHTED_BY_FREQ = 1
Compute the weight of a phrase by $a / (P(w) + a)$ and require params['a'] to be set. P(w) (phrase frequency) is stored in the weight field of the embedding. See the paper https://openreview.net/pdf?id=SyK00v5xx for more details.
CAML_PROJECT_TO_VC = 2
Simply $\sum_w (1 - \sigma(w)) emb_s(w)$ and $\sigma(w)$ is computed by projecting `emb(w)` to the line segment connecting `emb_i(w)` and `v_c`, where `emb_s(w)` is the component left by remove vc component from `emb(w)`. This is used for reembedding.
CAML_BSFE = 3
Computes the full $\sum_X ((1 - \sigma(x)) emb_s(x) + \sigma(x) v_c)$ where $\sigma(x)$ is computed by using the sigma_dimension in MetaData, namely $1 / (1 + exp(-(sigma_kernel * w[-sigma_dimension:-1]+ sigma_bias)))$. `emb_s(x)` is `w[:embedding_dimension]`. This is used by embeddings learned from training.
MEAN = 4
Simply compute the mean of segmented phrase embeddings.

optional string name = 1

optional google.protobuf.Any extension = 1000

(message has no fields)

int32 num = 1

Used in: FeatureValue

uint64 value = 1
repeated float weight = 2
Weights associated with this feature vlaue.
string debug_info = 3
Human-readable information for each of the feature values.

package carls

service KnowledgeBankService

rpc StartSession (StartSessionRequest, StartSessionResponse)

message StartSessionRequest

optional DynamicEmbeddingConfig config = 1

string name = 2

message StartSessionResponse

bytes session_handle = 1

rpc Lookup (LookupRequest, LookupResponse)

message LookupRequest

bytes session_handle = 1

repeated string key = 2

bool update = 3

message LookupResponse

map<string, EmbeddingVectorProto> embedding_table = 1

rpc Update (UpdateRequest, UpdateResponse)

message UpdateRequest

bytes session_handle = 1

map<string, EmbeddingVectorProto> values = 2

map<string, EmbeddingVectorProto> gradients = 3

message UpdateResponse

rpc Sample (SampleRequest, SampleResponse)

message SampleRequest

bytes session_handle = 1

repeated candidate_sampling.SampleContext sample_context = 2

int32 num_samples = 3

bool update = 4

message SampleResponse

repeated SampleResponse.Samples samples = 1

rpc MemoryLookup (MemoryLookupRequest, MemoryLookupResponse)

message MemoryLookupRequest

bytes session_handle = 1

repeated EmbeddingVectorProto input = 2

MemoryLookupRequest.LookupMode mode = 3

message MemoryLookupResponse

repeated memory_store.MemoryLookupResult memory_lookup_result = 1

rpc Export (ExportRequest, ExportResponse)

message ExportRequest

bytes session_handle = 1

string export_directory = 2

message ExportResponse

oneof saved_path

string knowledge_bank_saved_path = 1

string memory_store_saved_path = 2

rpc Import (ImportRequest, ImportResponse)

message ImportRequest

bytes session_handle = 1

oneof saved_path

string knowledge_bank_saved_path = 2

string memory_store_saved_path = 3

message ImportResponse

message BytesFeature

bytes value = 1

repeated float weight = 2

string debug_info = 3

message DynamicEmbeddingConfig

int32 embedding_dimension = 1

optional KnowledgeBankConfig knowledge_bank_config = 2

optional GradientDescentConfig gradient_descent_config = 3

optional candidate_sampling.CandidateSamplerConfig candidate_sampler_config = 4

optional memory_store.MemoryStoreConfig memory_store_config = 5

message EmbeddingInitializer

oneof initializer

EmbeddingVectorProto default_embedding = 1

EmbeddingInitializer.ZeroInitializer zero_initializer = 2

EmbeddingInitializer.RandomUniformInitializer random_uniform_initializer = 3

EmbeddingInitializer.RandomNormalInitializer random_normal_initializer = 4

bool use_deterministic_seed = 5

message EmbeddingInitializer.RandomNormalInitializer

float mean = 1

float stddev = 2

message EmbeddingInitializer.RandomUniformInitializer

float low = 1

float high = 2

message EmbeddingInitializer.ZeroInitializer

message EmbeddingVectorProto

bytes tag = 1

float weight = 3

repeated float value = 2

optional InputContext meta_data = 4