package rerun.manifest_registry.v1alpha1

Get desktop application:
View/edit binary Protocol Buffers messages

--- Write data ---

rpc RegisterWithDataset (RegisterWithDatasetRequest, RegisterWithDatasetResponse)
manifest_registry.proto:11
Register new partitions with the Dataset (asynchronously)
message RegisterWithDatasetRequest
manifest_registry.proto:127
- optional common.v1alpha1.DatasetHandle entry = 1
- repeated DataSource data_sources = 2
- common.v1alpha1.IfDuplicateBehavior on_duplicate = 3
rpc RegisterWithDatasetBlocking (RegisterWithDatasetBlockingRequest, RegisterWithDatasetBlockingResponse)
manifest_registry.proto:14
Register new partitions with the Dataset (blocking)
message RegisterWithDatasetBlockingRequest
manifest_registry.proto:140
TODO(andrea): This is a copy of RegisterWithDatasetRequest. Eventually we _may_ get rid of the sync version; until then, we should make sure that the two objects are in sync.
- optional common.v1alpha1.DatasetHandle entry = 1
- repeated DataSource data_sources = 2
- common.v1alpha1.IfDuplicateBehavior on_duplicate = 3
message RegisterWithDatasetBlockingResponse
manifest_registry.proto:146
- optional common.v1alpha1.DataframePart data = 1
rpc WriteChunks (stream WriteChunksRequest, stream WriteChunksResponse)
manifest_registry.proto:17
Unimplemented.
message WriteChunksRequest
manifest_registry.proto:150
- optional common.v1alpha1.DatasetHandle entry = 1
rpc GetPartitionTableSchema (GetPartitionTableSchemaRequest, GetPartitionTableSchemaResponse)
manifest_registry.proto:25
Returns the schema of the partition table (i.e. the dataset manifest) itself, *not* the underlying dataset. * To inspect the data of the partition table, use `ScanPartitionTable`. * To retrieve the schema of the underlying dataset, use `GetDatasetSchema` instead.
message GetPartitionTableSchemaRequest
manifest_registry.proto:158
- optional common.v1alpha1.DatasetHandle entry = 1
rpc ScanPartitionTable (ScanPartitionTableRequest, stream ScanPartitionTableResponse)
manifest_registry.proto:30
Inspect the contents of the partition table (i.e. the dataset manifest). The returned data will follow the schema specified by `GetPartitionTableSchema`.
message ScanPartitionTableRequest
manifest_registry.proto:166
- optional common.v1alpha1.DatasetHandle entry = 1
- optional common.v1alpha1.ScanParameters scan_parameters = 2
rpc GetDatasetSchema (GetDatasetSchemaRequest, GetDatasetSchemaResponse)
manifest_registry.proto:36
Returns the schema of the dataset. This is the union of all the schemas from all the underlying partitions. It will contain all the indexes, entities and components present in the dataset.
message GetDatasetSchemaRequest
manifest_registry.proto:176
- optional common.v1alpha1.DatasetHandle entry = 1
rpc CreateIndex (CreateIndexRequest, CreateIndexResponse)
manifest_registry.proto:46
Creates a custom index for a specific column (vector search, full-text search, etc). Index can be created for all or specific partitions. Creating an index will create a new index-specific chunk manifest for the Dataset. Chunk manifest contains information about individual chunk rows for all chunks containing relevant index data.
message CreateIndexRequest
manifest_registry.proto:186
- optional common.v1alpha1.DatasetHandle entry = 1
- repeated common.v1alpha1.PartitionId partition_ids = 2
  List of specific partitions that will be indexed (all if left empty).
- optional IndexConfig config = 3
- common.v1alpha1.IfDuplicateBehavior on_duplicate = 4
  Specify behavior when index for a partition was already created.
rpc ReIndex (ReIndexRequest, ReIndexResponse)
manifest_registry.proto:49
Recreate an index with the same configuration but (potentially) new data.
message ReIndexRequest
manifest_registry.proto:256
- optional common.v1alpha1.DatasetHandle entry = 1
rpc SearchDataset (SearchDatasetRequest, stream SearchDatasetResponse)
manifest_registry.proto:72
Search a previously created index. Do a full text, vector or scalar search. Currently only an Indexed search is supported, user must first call `CreateIndex` for the relevant column. The response is a RecordBatch with 4 columns: - 'partition_id': which partition the data is from - 'timepoint': represents the points in time where index query matches. What time points are matched depends on the type of index that is queried. For example: for vector search it might be timepoints where top-K matches are found within *each* partition in the indexed entry. For inverted index it might be timepoints where the query string is found in the indexed column - instance column: if index column contains a batch of values (for example a list of embeddings), then each instance of the batch is a separate row in the resulting RecordBatch - instance_id: this is a simple element index in the batch array. For example if indexed column is a list of embeddings \[a,b,c\] (where each embedding is of same length) then 'instance_id' of embedding 'a' is 0, 'instance_id' of 'b' is 1, etc. TODO(zehiko) add support for "brute force" search.
message SearchDatasetRequest
manifest_registry.proto:266
- optional common.v1alpha1.DatasetHandle entry = 1
  Dataset for which we want to search index
- optional IndexColumn column = 2
  Index column that is queried
- optional common.v1alpha1.DataframePart query = 3
  Query data - type of data is index specific. Caller must ensure to provide the right type. For vector search this should be a vector of appropriate size, for inverted index this should be a string. Query data is represented as a unit (single row) RecordBatch with 1 column.
- optional IndexQueryProperties properties = 4
  Index type specific properties
- optional common.v1alpha1.ScanParameters scan_parameters = 5
  Scan parameters
rpc QueryDataset (QueryDatasetRequest, stream QueryDatasetResponse)
manifest_registry.proto:84
Perform Rerun-native queries on a dataset, returning the matching chunk IDs. These Rerun-native queries include: * Filtering by specific partition and chunk IDs. * Latest-at, range and dataframe queries. * Arbitrary Lance filters. To fetch the actual chunks themselves, see `GetChunks`. Passing chunk IDs to this method effectively acts as a IF_EXIST filter.
message QueryDatasetRequest
manifest_registry.proto:312
- optional common.v1alpha1.DatasetHandle entry = 1
  Dataset client wants to query
- repeated common.v1alpha1.PartitionId partition_ids = 2
  Client can specify what partitions are queried. If left unspecified (empty list), all partitions will be queried.
- repeated common.v1alpha1.Tuid chunk_ids = 3
  Client can specify specific chunk ids to include. If left unspecified (empty list), all chunks that match other query parameters will be included.
- repeated common.v1alpha1.EntityPath entity_paths = 4
  Which entity paths are we interested in? Leave empty to query all of them.
- optional common.v1alpha1.ScanParameters scan_parameters = 5
  Generic parameters that will influence the behavior of the Lance scanner.
- optional Query query = 6
  A chunk-level latest-at or range query, or both. This query is AND'd together with the `partition_ids` and `chunk_ids` filters above.
rpc GetChunks (GetChunksRequest, stream GetChunksResponse)
manifest_registry.proto:94
Perform Rerun-native queries on a dataset, returning the underlying chunks. These Rerun-native queries include: * Filtering by specific partition and chunk IDs. * Latest-at, range and dataframe queries. * Arbitrary Lance filters. To fetch only the actual chunk IDs rather than the chunks themselves, see `QueryDataset`.
message GetChunksRequest
manifest_registry.proto:423
- optional common.v1alpha1.DatasetHandle entry = 1
  Dataset for which we want to get chunks
- repeated common.v1alpha1.PartitionId partition_ids = 2
  Client can specify from which partitions to get chunks. If left unspecified (empty list), data from all partition (that match other query parameters) will be included.
- repeated common.v1alpha1.Tuid chunk_ids = 3
  Client can specify chunk ids to include. If left unspecified (empty list), all chunks (that match other query parameters) will be included.
- repeated common.v1alpha1.EntityPath entity_paths = 4
  Which entity paths are we interested in? Leave empty to query all of them.
- optional Query query = 5
  A chunk-level latest-at or range query, or both. This query is AND'd together with the `partition_ids` and `chunk_ids` filters above.
rpc FetchChunkManifest (FetchChunkManifestRequest, stream FetchChunkManifestResponse)
manifest_registry.proto:100
Retrieves the chunk manifest for a specific index.
message FetchChunkManifestRequest
manifest_registry.proto:483
- optional common.v1alpha1.DatasetHandle entry = 1
  Dataset for which we want to fetch chunk manifest
- optional IndexColumn column = 2
  Chunk manifest is index specific
- optional common.v1alpha1.ScanParameters scan_parameters = 3
  Scan parameters
message FetchChunkManifestResponse
manifest_registry.proto:494
- optional common.v1alpha1.DataframePart data = 1
  Chunk manifest as arrow RecordBatches
rpc CreatePartitionManifests (CreatePartitionManifestsRequest, CreatePartitionManifestsResponse)
manifest_registry.proto:106
Create manifests for all partitions in the Dataset. Partition manifest contains information about the chunks in the partitions. This is normally automatically done as part of the registration process.
message CreatePartitionManifestsRequest
manifest_registry.proto:451
- optional common.v1alpha1.DatasetHandle entry = 1
  Dataset for which we want to create manifests
- repeated common.v1alpha1.PartitionId partition_ids = 2
  Create manifest for specific partitions. All will be created if left unspecified (empty list)
- repeated DataSource data_sources = 3
  types of partitions and their storage location (same order as partition ids above)
- common.v1alpha1.IfDuplicateBehavior on_duplicate = 4
  Define what happens if create is called multiple times for the same Dataset / partitions
message CreatePartitionManifestsResponse
manifest_registry.proto:468
- optional common.v1alpha1.DataframePart data = 1
rpc FetchPartitionManifest (FetchPartitionManifestRequest, stream FetchPartitionManifestResponse)
manifest_registry.proto:109
Fetch the internal state of a Partition Manifest.
message FetchPartitionManifestRequest
manifest_registry.proto:472
- optional common.v1alpha1.DatasetHandle entry = 1
- optional common.v1alpha1.PartitionId id = 2
- optional common.v1alpha1.ScanParameters scan_parameters = 3
message FetchPartitionManifestResponse
manifest_registry.proto:479
TODO(cmc): this should have response extensions too.
- optional common.v1alpha1.DataframePart data = 1

TODO(zehiko) add properties as needed

Used in: IndexProperties

(message has no fields)

TODO(zehiko) add properties as needed

Used in: IndexQueryProperties

(message has no fields)

Used as response type in: frontend.v1alpha1.FrontendService.CreateIndex, ManifestRegistryService.CreateIndex

optional common.v1alpha1.DataframePart data = 1

Used in: frontend.v1alpha1.RegisterWithDatasetRequest, CreatePartitionManifestsRequest, RegisterWithDatasetBlockingRequest, RegisterWithDatasetRequest

optional string storage_url = 1
Where is the data for this data source stored (e.g. s3://bucket/file or file:///path/to/file)?
DataSourceKind typ = 2
What kind of data is it (e.g. rrd, mcap, Lance, etc)?

Used in: DataSource

DATA_SOURCE_KIND_UNSPECIFIED = 0
DATA_SOURCE_KIND_RRD = 1

Application level error - used as `details` in the `google.rpc.Status` message

ErrorCode code = 1
error code
string id = 2
unique identifier associated with the request (e.g. recording id, recording storage url)
string message = 3
human readable details about the error

Error codes for application level errors

Used in: Error

ERROR_CODE_UNSPECIFIED = 0
unused
ERROR_CODE_OBJECT_STORE_ERROR = 1
object store access error
ERROR_CODE_METADATA_DB_ERROR = 2
metadata database access error
ERROR_CODE_CODEC_ERROR = 3
Encoding / decoding error

Used as response type in: frontend.v1alpha1.FrontendService.GetChunks, ManifestRegistryService.GetChunks

optional common.v1alpha1.RerunChunk chunk = 1

Used as response type in: frontend.v1alpha1.FrontendService.GetDatasetSchema, ManifestRegistryService.GetDatasetSchema

optional common.v1alpha1.Schema schema = 1

Used as response type in: frontend.v1alpha1.FrontendService.GetPartitionTableSchema, ManifestRegistryService.GetPartitionTableSchema

optional common.v1alpha1.Schema schema = 1

used to define which column we want to index

Used in: frontend.v1alpha1.SearchDatasetRequest, FetchChunkManifestRequest, IndexConfig, SearchDatasetRequest

optional common.v1alpha1.EntityPath entity_path = 1
The path of the entity.
optional common.v1alpha1.ComponentDescriptor component = 2
Component details

Used in: frontend.v1alpha1.CreateIndexRequest, CreateIndexRequest

optional IndexProperties properties = 1
what kind of index do we want to create and what are its index specific properties.
optional IndexColumn column = 2
Component / column we want to index.
optional common.v1alpha1.IndexColumnSelector time_index = 3
What is the filter index i.e. timeline for which we will query the timepoints. TODO(zehiko) this might go away and we might just index across all the timelines

Used in: IndexConfig

oneof props
- InvertedIndex inverted = 1
- VectorIvfPqIndex vector = 2
- BTreeIndex btree = 3

Used in: frontend.v1alpha1.SearchDatasetRequest, SearchDatasetRequest

oneof props
specific index query properties based on the index type
- InvertedIndexQuery inverted = 1
- VectorIndexQuery vector = 2
- BTreeIndexQuery btree = 3

Used in: IndexProperties

optional bool store_position = 1
optional string base_tokenizer = 2
TODO(zehiko) add other properties as needed

TODO(zehiko) add properties as needed

Used in: IndexQueryProperties

(message has no fields)

Used in: frontend.v1alpha1.GetChunksRequest, frontend.v1alpha1.QueryDatasetRequest, GetChunksRequest, QueryDatasetRequest

optional QueryLatestAt latest_at = 1
If specified, will perform a latest-at query with the given parameters. You can combine this with a `QueryRange` in order to gather all the relevant chunks for a full-fledged dataframe query (i.e. they get OR'd together).
optional QueryRange range = 2
If specified, will perform a range query with the given parameters. You can combine this with a `QueryLatestAt` in order to gather all the relevant chunks for a full-fledged dataframe query (i.e. they get OR'd together).
bool columns_always_include_everything = 3
If true, `columns` will contain the entire schema.
bool columns_always_include_chunk_ids = 4
If true, `columns` always includes `chunk_id`,
bool columns_always_include_byte_offsets = 5
If true, `columns` always includes `byte_offset` and `byte_size`.
bool columns_always_include_entity_paths = 6
If true, `columns` always includes `entity_path`.
bool columns_always_include_static_indexes = 7
If true, `columns` always includes all static component-level indexes.
bool columns_always_include_global_indexes = 8
If true, `columns` always includes all temporal chunk-level indexes.
bool columns_always_include_component_indexes = 9
If true, `columns` always includes all component-level indexes.

Used as response type in: frontend.v1alpha1.FrontendService.QueryDataset, ManifestRegistryService.QueryDataset

optional common.v1alpha1.DataframePart data = 1

A chunk-level latest-at query, aka `LatestAtRelevantChunks`. This has the exact same semantics as the query of the same name on our `ChunkStore`.

Used in: Query

optional common.v1alpha1.IndexColumnSelector index = 1
Which index column should we perform the query on? E.g. `log_time`.
optional int64 at = 2
What index value are we looking for?
repeated string fuzzy_descriptors = 3
Which components are we interested in? If left unspecified, all existing components are considered of interest. This will perform a basic fuzzy match on the available columns' descriptors. The fuzzy logic is a simple case-sensitive `contains()` query. For example, given a `log_tick__SeriesLines:StrokeWidth#width` index, all of the following would match: `SeriesLines:StrokeWidth#width`, `StrokeWidth`, `Stroke`, `Width`, `width`, `SeriesLines`, etc.
TODO(cmc): I shall bring that back into a more structured form later. repeated rerun.common.v1alpha1.ComponentDescriptor fuzzy_descriptors = 3;

/ A chunk-level range query, aka `RangeRelevantChunks`. This has the exact same semantics as the query of the same name on our `ChunkStore`.

Used in: Query

optional common.v1alpha1.IndexColumnSelector index = 1
Which index column should we perform the query on? E.g. `log_time`.
optional common.v1alpha1.TimeRange index_range = 2
What index range are we looking for?
repeated string fuzzy_descriptors = 3
Which components are we interested in? If left unspecified, all existing components are considered of interest. This will perform a basic fuzzy match on the available columns' descriptors. The fuzzy logic is a simple case-sensitive `contains()` query. For example, given a `log_tick__SeriesLines:StrokeWidth#width` index, all of the following would match: `SeriesLines:StrokeWidth#width`, `StrokeWidth`, `Stroke`, `Width`, `width`, `SeriesLines`, etc.
TODO(cmc): I shall bring that back into a more structured form later. repeated rerun.common.v1alpha1.ComponentDescriptor fuzzy_descriptors = 3;

Used as response type in: frontend.v1alpha1.FrontendService.ReIndex, ManifestRegistryService.ReIndex

optional common.v1alpha1.DataframePart data = 1

Used as response type in: frontend.v1alpha1.FrontendService.RegisterWithDataset, ManifestRegistryService.RegisterWithDataset

optional common.v1alpha1.DataframePart data = 1

Used as response type in: frontend.v1alpha1.FrontendService.ScanPartitionTable, ManifestRegistryService.ScanPartitionTable

optional common.v1alpha1.DataframePart data = 1
Partitions metadata as arrow RecordBatch

Used as response type in: frontend.v1alpha1.FrontendService.SearchDataset, ManifestRegistryService.SearchDataset

optional common.v1alpha1.DataframePart data = 1
Chunks as arrow RecordBatch

Used in: VectorIvfPqIndex

VECTOR_DISTANCE_METRIC_UNSPECIFIED = 0
VECTOR_DISTANCE_METRIC_L2 = 1
VECTOR_DISTANCE_METRIC_COSINE = 2
VECTOR_DISTANCE_METRIC_DOT = 3
VECTOR_DISTANCE_METRIC_HAMMING = 4

Used in: IndexQueryProperties

optional uint32 top_k = 1

Used in: IndexProperties

optional uint32 num_partitions = 1
optional uint32 num_sub_vectors = 2
VectorDistanceMetric distance_metrics = 3

Used as response type in: frontend.v1alpha1.FrontendService.WriteChunks, ManifestRegistryService.WriteChunks

(message has no fields)

package rerun.manifest_registry.v1alpha1

service ManifestRegistryService

rpc RegisterWithDataset (RegisterWithDatasetRequest, RegisterWithDatasetResponse)

message RegisterWithDatasetRequest

optional common.v1alpha1.DatasetHandle entry = 1

repeated DataSource data_sources = 2

common.v1alpha1.IfDuplicateBehavior on_duplicate = 3

rpc RegisterWithDatasetBlocking (RegisterWithDatasetBlockingRequest, RegisterWithDatasetBlockingResponse)

message RegisterWithDatasetBlockingRequest

optional common.v1alpha1.DatasetHandle entry = 1

repeated DataSource data_sources = 2

common.v1alpha1.IfDuplicateBehavior on_duplicate = 3

message RegisterWithDatasetBlockingResponse

optional common.v1alpha1.DataframePart data = 1

rpc WriteChunks (stream WriteChunksRequest, stream WriteChunksResponse)

message WriteChunksRequest

optional common.v1alpha1.DatasetHandle entry = 1

rpc GetPartitionTableSchema (GetPartitionTableSchemaRequest, GetPartitionTableSchemaResponse)

message GetPartitionTableSchemaRequest

optional common.v1alpha1.DatasetHandle entry = 1

rpc ScanPartitionTable (ScanPartitionTableRequest, stream ScanPartitionTableResponse)

message ScanPartitionTableRequest

optional common.v1alpha1.DatasetHandle entry = 1

optional common.v1alpha1.ScanParameters scan_parameters = 2

rpc GetDatasetSchema (GetDatasetSchemaRequest, GetDatasetSchemaResponse)

message GetDatasetSchemaRequest

optional common.v1alpha1.DatasetHandle entry = 1

rpc CreateIndex (CreateIndexRequest, CreateIndexResponse)

message CreateIndexRequest

optional common.v1alpha1.DatasetHandle entry = 1

repeated common.v1alpha1.PartitionId partition_ids = 2

optional IndexConfig config = 3

common.v1alpha1.IfDuplicateBehavior on_duplicate = 4

rpc ReIndex (ReIndexRequest, ReIndexResponse)

message ReIndexRequest

optional common.v1alpha1.DatasetHandle entry = 1

rpc SearchDataset (SearchDatasetRequest, stream SearchDatasetResponse)

message SearchDatasetRequest

optional common.v1alpha1.DatasetHandle entry = 1

optional IndexColumn column = 2

optional common.v1alpha1.DataframePart query = 3

optional IndexQueryProperties properties = 4

optional common.v1alpha1.ScanParameters scan_parameters = 5

rpc QueryDataset (QueryDatasetRequest, stream QueryDatasetResponse)

message QueryDatasetRequest

optional common.v1alpha1.DatasetHandle entry = 1

repeated common.v1alpha1.PartitionId partition_ids = 2

repeated common.v1alpha1.Tuid chunk_ids = 3

repeated common.v1alpha1.EntityPath entity_paths = 4

optional common.v1alpha1.ScanParameters scan_parameters = 5

optional Query query = 6

rpc GetChunks (GetChunksRequest, stream GetChunksResponse)

message GetChunksRequest

optional common.v1alpha1.DatasetHandle entry = 1

repeated common.v1alpha1.PartitionId partition_ids = 2

repeated common.v1alpha1.Tuid chunk_ids = 3

repeated common.v1alpha1.EntityPath entity_paths = 4

optional Query query = 5

rpc FetchChunkManifest (FetchChunkManifestRequest, stream FetchChunkManifestResponse)

message FetchChunkManifestRequest

optional common.v1alpha1.DatasetHandle entry = 1

optional IndexColumn column = 2

optional common.v1alpha1.ScanParameters scan_parameters = 3

message FetchChunkManifestResponse

optional common.v1alpha1.DataframePart data = 1

rpc CreatePartitionManifests (CreatePartitionManifestsRequest, CreatePartitionManifestsResponse)

message CreatePartitionManifestsRequest

optional common.v1alpha1.DatasetHandle entry = 1

repeated common.v1alpha1.PartitionId partition_ids = 2

repeated DataSource data_sources = 3

common.v1alpha1.IfDuplicateBehavior on_duplicate = 4

message CreatePartitionManifestsResponse

optional common.v1alpha1.DataframePart data = 1

rpc FetchPartitionManifest (FetchPartitionManifestRequest, stream FetchPartitionManifestResponse)

message FetchPartitionManifestRequest

optional common.v1alpha1.DatasetHandle entry = 1

optional common.v1alpha1.PartitionId id = 2

optional common.v1alpha1.ScanParameters scan_parameters = 3

message FetchPartitionManifestResponse

optional common.v1alpha1.DataframePart data = 1