package spark.connect

Get desktop application:
View/edit binary Protocol Buffers messages

Main interface for the SparkConnect service.

rpc ExecutePlan (ExecutePlanRequest, stream ExecutePlanResponse)
base.proto:1065
Executes a request that contains the query and returns a stream of [[Response]]. It is guaranteed that there is at least one ARROW batch returned even if the result set is empty.
message ExecutePlanRequest
base.proto:281
A request to be executed by the service.
- string session_id = 1
  (Required) The session_id specifies a spark session for a user id (which is specified by user_context.user_id). The session_id is set by the client to be able to collate streaming responses from different queries within the dedicated session. The id should be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff`
- optional string client_observed_server_side_session_id = 8
  (Optional) Server-side generated idempotency key from the previous responses (if any). Server can use this to validate that the server side session has not changed.
- optional UserContext user_context = 2
  (Required) User context user_context.user_id and session+id both identify a unique remote spark session on the server side.
- optional string operation_id = 6
  (Optional) Provide an id for this request. If not provided, it will be generated by the server. It is returned in every ExecutePlanResponse.operation_id of the ExecutePlan response stream. The id must be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff`
- optional Plan plan = 3
  (Required) The logical plan to be executed / analyzed.
- optional string client_type = 4
  Provides optional information about the client sending the request. This field can be used for language or version specific information and is only intended for logging purposes and will not be interpreted by the server.
- repeated ExecutePlanRequest.RequestOption request_options = 5
  Repeated element for options that can be passed to the request. This element is currently unused but allows to pass in an extension value used for arbitrary options.
- repeated string tags = 7
  Tags to tag the given execution with. Tags cannot contain ',' character and cannot be empty strings. Used by Interrupt with interrupt.tag.
rpc AnalyzePlan (AnalyzePlanRequest, AnalyzePlanResponse)
base.proto:1068
Analyzes a query and returns a [[AnalyzeResponse]] containing metadata about the query.
message AnalyzePlanRequest
base.proto:60
Request to perform plan analyze, optionally to explain the plan.
- string session_id = 1
  (Required) The session_id specifies a spark session for a user id (which is specified by user_context.user_id). The session_id is set by the client to be able to collate streaming responses from different queries within the dedicated session. The id should be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff`
- optional string client_observed_server_side_session_id = 17
  (Optional) Server-side generated idempotency key from the previous responses (if any). Server can use this to validate that the server side session has not changed.
- optional UserContext user_context = 2
  (Required) User context
- optional string client_type = 3
  Provides optional information about the client sending the request. This field can be used for language or version specific information and is only intended for logging purposes and will not be interpreted by the server.
- oneof analyze
  - AnalyzePlanRequest.Schema schema = 4
  - AnalyzePlanRequest.Explain explain = 5
  - AnalyzePlanRequest.TreeString tree_string = 6
  - AnalyzePlanRequest.IsLocal is_local = 7
  - AnalyzePlanRequest.IsStreaming is_streaming = 8
  - AnalyzePlanRequest.InputFiles input_files = 9
  - AnalyzePlanRequest.SparkVersion spark_version = 10
  - AnalyzePlanRequest.DDLParse ddl_parse = 11
  - AnalyzePlanRequest.SameSemantics same_semantics = 12
  - AnalyzePlanRequest.SemanticHash semantic_hash = 13
  - AnalyzePlanRequest.Persist persist = 14
  - AnalyzePlanRequest.Unpersist unpersist = 15
  - AnalyzePlanRequest.GetStorageLevel get_storage_level = 16
message AnalyzePlanResponse
base.proto:207
Response to performing analysis of the query. Contains relevant metadata to be able to reason about the performance. Next ID: 16
- string session_id = 1
- string server_side_session_id = 15
  Server-side generated idempotency key that the client can use to assert that the server side session has not changed.
- oneof result
  - AnalyzePlanResponse.Schema schema = 2
  - AnalyzePlanResponse.Explain explain = 3
  - AnalyzePlanResponse.TreeString tree_string = 4
  - AnalyzePlanResponse.IsLocal is_local = 5
  - AnalyzePlanResponse.IsStreaming is_streaming = 6
  - AnalyzePlanResponse.InputFiles input_files = 7
  - AnalyzePlanResponse.SparkVersion spark_version = 8
  - AnalyzePlanResponse.DDLParse ddl_parse = 9
  - AnalyzePlanResponse.SameSemantics same_semantics = 10
  - AnalyzePlanResponse.SemanticHash semantic_hash = 11
  - AnalyzePlanResponse.Persist persist = 12
  - AnalyzePlanResponse.Unpersist unpersist = 13
  - AnalyzePlanResponse.GetStorageLevel get_storage_level = 14
rpc Config (ConfigRequest, ConfigResponse)
base.proto:1071
Update or fetch the configurations and returns a [[ConfigResponse]] containing the result.
message ConfigRequest
base.proto:476
Request to update or fetch the configurations.
- string session_id = 1
  (Required) The session_id specifies a spark session for a user id (which is specified by user_context.user_id). The session_id is set by the client to be able to collate streaming responses from different queries within the dedicated session. The id should be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff`
- optional string client_observed_server_side_session_id = 8
  (Optional) Server-side generated idempotency key from the previous responses (if any). Server can use this to validate that the server side session has not changed.
- optional UserContext user_context = 2
  (Required) User context
- optional ConfigRequest.Operation operation = 3
  (Required) The operation for the config.
- optional string client_type = 4
  Provides optional information about the client sending the request. This field can be used for language or version specific information and is only intended for logging purposes and will not be interpreted by the server.
message ConfigResponse
base.proto:552
Response to the config request. Next ID: 5
- string session_id = 1
- string server_side_session_id = 4
  Server-side generated idempotency key that the client can use to assert that the server side session has not changed.
- repeated KeyValue pairs = 2
  (Optional) The result key-value pairs. Available when the operation is 'Get', 'GetWithDefault', 'GetOption', 'GetAll'. Also available for the operation 'IsModifiable' with boolean string "true" and "false".
- repeated string warnings = 3
  (Optional) Warning messages for deprecated or unsupported configurations.
rpc AddArtifacts (stream AddArtifactsRequest, AddArtifactsResponse)
base.proto:1075
Add artifacts to the session and returns a [[AddArtifactsResponse]] containing metadata about the added artifacts.
message AddArtifactsRequest
base.proto:571
Request to transfer client-local artifacts.
- string session_id = 1
  (Required) The session_id specifies a spark session for a user id (which is specified by user_context.user_id). The session_id is set by the client to be able to collate streaming responses from different queries within the dedicated session. The id should be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff`
- optional UserContext user_context = 2
  User context
- optional string client_observed_server_side_session_id = 7
  (Optional) Server-side generated idempotency key from the previous responses (if any). Server can use this to validate that the server side session has not changed.
- optional string client_type = 6
  Provides optional information about the client sending the request. This field can be used for language or version specific information and is only intended for logging purposes and will not be interpreted by the server.
- oneof payload
  The payload is either a batch of artifacts or a partial chunk of a large artifact.
  - AddArtifactsRequest.Batch batch = 3
  - AddArtifactsRequest.BeginChunkedArtifact begin_chunk = 4
    The metadata and the initial chunk of a large artifact chunked into multiple requests. The server side is notified about the total size of the large artifact as well as the number of chunks to expect.
  - AddArtifactsRequest.ArtifactChunk chunk = 5
    A chunk of an artifact excluding metadata. This can be any chunk of a large artifact excluding the first chunk (which is included in `BeginChunkedArtifact`).
message AddArtifactsResponse
base.proto:654
Response to adding an artifact. Contains relevant metadata to verify successful transfer of artifact(s). Next ID: 4
- string session_id = 2
  Session id in which the AddArtifact was running.
- string server_side_session_id = 3
  Server-side generated idempotency key that the client can use to assert that the server side session has not changed.
- repeated AddArtifactsResponse.ArtifactSummary artifacts = 1
  The list of artifact(s) seen by the server.
rpc ArtifactStatus (ArtifactStatusesRequest, ArtifactStatusesResponse)
base.proto:1078
Check statuses of artifacts in the session and returns them in a [[ArtifactStatusesResponse]]
message ArtifactStatusesRequest
base.proto:675
Request to get current statuses of artifacts at the server side.
- string session_id = 1
  (Required) The session_id specifies a spark session for a user id (which is specified by user_context.user_id). The session_id is set by the client to be able to collate streaming responses from different queries within the dedicated session. The id should be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff`
- optional string client_observed_server_side_session_id = 5
  (Optional) Server-side generated idempotency key from the previous responses (if any). Server can use this to validate that the server side session has not changed.
- optional UserContext user_context = 2
  User context
- optional string client_type = 3
  Provides optional information about the client sending the request. This field can be used for language or version specific information and is only intended for logging purposes and will not be interpreted by the server.
- repeated string names = 4
  The name of the artifact is expected in the form of a "Relative Path" that is made up of a sequence of directories and the final file element. Examples of "Relative Path"s: "jars/test.jar", "classes/xyz.class", "abc.xyz", "a/b/X.jar". The server is expected to maintain the hierarchy of files as defined by their name. (i.e The relative path of the file on the server's filesystem will be the same as the name of the provided artifact)
message ArtifactStatusesResponse
base.proto:709
Response to checking artifact statuses. Next ID: 4
- string session_id = 2
  Session id in which the ArtifactStatus was running.
- string server_side_session_id = 3
  Server-side generated idempotency key that the client can use to assert that the server side session has not changed.
- map<string, ArtifactStatusesResponse.ArtifactStatus> statuses = 1
  A map of artifact names to their statuses.
rpc Interrupt (InterruptRequest, InterruptResponse)
base.proto:1081
Interrupts running executions
message InterruptRequest
base.proto:724
- string session_id = 1
  (Required) The session_id specifies a spark session for a user id (which is specified by user_context.user_id). The session_id is set by the client to be able to collate streaming responses from different queries within the dedicated session. The id should be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff`
- optional string client_observed_server_side_session_id = 7
  (Optional) Server-side generated idempotency key from the previous responses (if any). Server can use this to validate that the server side session has not changed.
- optional UserContext user_context = 2
  (Required) User context
- optional string client_type = 3
  Provides optional information about the client sending the request. This field can be used for language or version specific information and is only intended for logging purposes and will not be interpreted by the server.
- InterruptRequest.InterruptType interrupt_type = 4
  (Required) The type of interrupt to execute.
- oneof interrupt
  - string operation_tag = 5
    if interrupt_tag == INTERRUPT_TYPE_TAG, interrupt operation with this tag.
  - string operation_id = 6
    if interrupt_tag == INTERRUPT_TYPE_OPERATION_ID, interrupt operation with this operation_id.
message InterruptResponse
base.proto:773
Next ID: 4
- string session_id = 1
  Session id in which the interrupt was running.
- string server_side_session_id = 3
  Server-side generated idempotency key that the client can use to assert that the server side session has not changed.
- repeated string interrupted_ids = 2
  Operation ids of the executions which were interrupted.
rpc ReattachExecute (ReattachExecuteRequest, stream ExecutePlanResponse)
base.proto:1087
Reattach to an existing reattachable execution. The ExecutePlan must have been started with ReattachOptions.reattachable=true. If the ExecutePlanResponse stream ends without a ResultComplete message, there is more to continue. If there is a ResultComplete, the client should use ReleaseExecute with
message ReattachExecuteRequest
base.proto:797
- string session_id = 1
  (Required) The session_id of the request to reattach to. This must be an id of existing session.
- optional string client_observed_server_side_session_id = 6
  (Optional) Server-side generated idempotency key from the previous responses (if any). Server can use this to validate that the server side session has not changed.
- optional UserContext user_context = 2
  (Required) User context user_context.user_id and session+id both identify a unique remote spark session on the server side.
- string operation_id = 3
  (Required) Provide an id of the request to reattach to. This must be an id of existing operation.
- optional string client_type = 4
  Provides optional information about the client sending the request. This field can be used for language or version specific information and is only intended for logging purposes and will not be interpreted by the server.
- optional string last_response_id = 5
  (Optional) Last already processed response id from the response stream. After reattach, server will resume the response stream after that response. If not specified, server will restart the stream from the start. Note: server controls the amount of responses that it buffers and it may drop responses, that are far behind the latest returned response, so this can't be used to arbitrarily scroll back the cursor. If the response is no longer available, this will result in an error.
rpc ReleaseExecute (ReleaseExecuteRequest, ReleaseExecuteResponse)
base.proto:1093
Release an reattachable execution, or parts thereof. The ExecutePlan must have been started with ReattachOptions.reattachable=true. Non reattachable executions are released automatically and immediately after the ExecutePlan RPC and ReleaseExecute may not be used.
message ReleaseExecuteRequest
base.proto:837
- string session_id = 1
  (Required) The session_id of the request to reattach to. This must be an id of existing session.
- optional string client_observed_server_side_session_id = 7
  (Optional) Server-side generated idempotency key from the previous responses (if any). Server can use this to validate that the server side session has not changed.
- optional UserContext user_context = 2
  (Required) User context user_context.user_id and session+id both identify a unique remote spark session on the server side.
- string operation_id = 3
  (Required) Provide an id of the request to reattach to. This must be an id of existing operation.
- optional string client_type = 4
  Provides optional information about the client sending the request. This field can be used for language or version specific information and is only intended for logging purposes and will not be interpreted by the server.
- oneof release
  - ReleaseExecuteRequest.ReleaseAll release_all = 5
  - ReleaseExecuteRequest.ReleaseUntil release_until = 6
message ReleaseExecuteResponse
base.proto:886
Next ID: 4
- string session_id = 1
  Session id in which the release was running.
- string server_side_session_id = 3
  Server-side generated idempotency key that the client can use to assert that the server side session has not changed.
- optional string operation_id = 2
  Operation id of the operation on which the release executed. If the operation couldn't be found (because e.g. it was concurrently released), will be unset. Otherwise, it will be equal to the operation_id from request.
rpc ReleaseSession (ReleaseSessionRequest, ReleaseSessionResponse)
base.proto:1099
Release a session. All the executions in the session will be released. Any further requests for the session with that session_id for the given user_id will fail. If the session didn't exist or was already released, this is a noop.
message ReleaseSessionRequest
base.proto:899
- string session_id = 1
  (Required) The session_id of the request to reattach to. This must be an id of existing session.
- optional UserContext user_context = 2
  (Required) User context user_context.user_id and session+id both identify a unique remote spark session on the server side.
- optional string client_type = 3
  Provides optional information about the client sending the request. This field can be used for language or version specific information and is only intended for logging purposes and will not be interpreted by the server.
message ReleaseSessionResponse
base.proto:919
Next ID: 3
- string session_id = 1
  Session id of the session on which the release executed.
- string server_side_session_id = 2
  Server-side generated idempotency key that the client can use to assert that the server side session has not changed.
rpc FetchErrorDetails (FetchErrorDetailsRequest, FetchErrorDetailsResponse)
base.proto:1102
FetchErrorDetails retrieves the matched exception with details based on a provided error id.
message FetchErrorDetailsRequest
base.proto:927
- string session_id = 1
  (Required) The session_id specifies a Spark session for a user identified by user_context.user_id. The id should be a UUID string of the format `00112233-4455-6677-8899-aabbccddeeff`.
- optional string client_observed_server_side_session_id = 5
  (Optional) Server-side generated idempotency key from the previous responses (if any). Server can use this to validate that the server side session has not changed.
- optional UserContext user_context = 2
  User context
- string error_id = 3
  (Required) The id of the error.
- optional string client_type = 4
  Provides optional information about the client sending the request. This field can be used for language or version specific information and is only intended for logging purposes and will not be interpreted by the server.
message FetchErrorDetailsResponse
base.proto:954
Next ID: 5
- string server_side_session_id = 3
  Server-side generated idempotency key that the client can use to assert that the server side session has not changed.
- string session_id = 4
- optional int32 root_error_idx = 1
  The index of the root error in errors. The field will not be set if the error is not found.
- repeated FetchErrorDetailsResponse.Error errors = 2
  A list of errors.

A chunk of an Artifact.

Used in: AddArtifactsRequest, BeginChunkedArtifact, SingleChunkArtifact

bytes data = 1
Data chunk.
int64 crc = 2
CRC to allow server to verify integrity of the chunk.

A number of `SingleChunkArtifact` batched into a single RPC.

Used in: AddArtifactsRequest

repeated SingleChunkArtifact artifacts = 1

Signals the beginning/start of a chunked artifact. A large artifact is transferred through a payload of `BeginChunkedArtifact` followed by a sequence of `ArtifactChunk`s.

Used in: AddArtifactsRequest

string name = 1
Name of the artifact undergoing chunking. Follows the same conventions as the `name` in the `Artifact` message.
int64 total_bytes = 2
Total size of the artifact in bytes.
int64 num_chunks = 3
Number of chunks the artifact is split into. This includes the `initial_chunk`.
optional ArtifactChunk initial_chunk = 4
The first/initial chunk.

An artifact that is contained in a single `ArtifactChunk`. Generally, this message represents tiny artifacts such as REPL-generated class files.

Used in: Batch

string name = 1
The name of the artifact is expected in the form of a "Relative Path" that is made up of a sequence of directories and the final file element. Examples of "Relative Path"s: "jars/test.jar", "classes/xyz.class", "abc.xyz", "a/b/X.jar". The server is expected to maintain the hierarchy of files as defined by their name. (i.e The relative path of the file on the server's filesystem will be the same as the name of the provided artifact)
optional ArtifactChunk data = 2
A single data chunk.

Metadata of an artifact.

Used in: AddArtifactsResponse

string name = 1
bool is_crc_successful = 2
Whether the CRC (Cyclic Redundancy Check) is successful on server verification. The server discards any artifact that fails the CRC. If false, the client may choose to resend the artifact specified by `name`.

Relation of type [[Aggregate]].

Used in: Relation

optional Relation input = 1
(Required) Input relation for a RelationalGroupedDataset.
Aggregate.GroupType group_type = 2
(Required) How the RelationalGroupedDataset was built.
repeated Expression grouping_expressions = 3
(Required) Expressions for grouping keys
repeated Expression aggregate_expressions = 4
(Required) List of values that will be translated to columns in the output DataFrame.
optional Aggregate.Pivot pivot = 5
(Optional) Pivots a column of the current `DataFrame` and performs the specified aggregation.
repeated Aggregate.GroupingSets grouping_sets = 6
(Optional) List of values that will be translated to columns in the output DataFrame.

Used in: Aggregate

GROUP_TYPE_UNSPECIFIED = 0
GROUP_TYPE_GROUPBY = 1
GROUP_TYPE_ROLLUP = 2
GROUP_TYPE_CUBE = 3
GROUP_TYPE_PIVOT = 4
GROUP_TYPE_GROUPING_SETS = 5

Used in: Aggregate

repeated Expression grouping_set = 1
(Required) Individual grouping set

Used in: Aggregate

optional Expression col = 1
(Required) The column to pivot
repeated Expression.Literal values = 2
(Optional) List of values that will be translated to columns in the output DataFrame. Note that if it is empty, the server side will immediately trigger a job to collect the distinct values of the column.

Used in: AnalyzePlanRequest

string ddl_string = 1
(Required) The DDL formatted string to be parsed.

Explains the input plan based on a configurable mode.

Used in: AnalyzePlanRequest

optional Plan plan = 1
(Required) The logical plan to be analyzed.
Explain.ExplainMode explain_mode = 2
(Required) For analyzePlan rpc calls, configure the mode to explain plan in strings.

Plan explanation mode.

Used in: Explain

EXPLAIN_MODE_UNSPECIFIED = 0
EXPLAIN_MODE_SIMPLE = 1
Generates only physical plan.
EXPLAIN_MODE_EXTENDED = 2
Generates parsed logical plan, analyzed logical plan, optimized logical plan and physical plan. Parsed Logical plan is a unresolved plan that extracted from the query. Analyzed logical plans transforms which translates unresolvedAttribute and unresolvedRelation into fully typed objects. The optimized logical plan transforms through a set of optimization rules, resulting in the physical plan.
EXPLAIN_MODE_CODEGEN = 3
Generates code for the statement, if any and a physical plan.
EXPLAIN_MODE_COST = 4
If plan node statistics are available, generates a logical plan and also the statistics.
EXPLAIN_MODE_FORMATTED = 5
Generates a physical plan outline and also node details.

Used in: AnalyzePlanRequest

optional Relation relation = 1
(Required) The logical plan to get the storage level.

Used in: AnalyzePlanRequest

optional Plan plan = 1
(Required) The logical plan to be analyzed.

Used in: AnalyzePlanRequest

optional Plan plan = 1
(Required) The logical plan to be analyzed.

Used in: AnalyzePlanRequest

optional Plan plan = 1
(Required) The logical plan to be analyzed.

Used in: AnalyzePlanRequest

optional Relation relation = 1
(Required) The logical plan to persist.
optional StorageLevel storage_level = 2
(Optional) The storage level.

Returns `true` when the logical query plans are equal and therefore return same results.

Used in: AnalyzePlanRequest

optional Plan target_plan = 1
(Required) The plan to be compared.
optional Plan other_plan = 2
(Required) The other plan to be compared.

Used in: AnalyzePlanRequest

optional Plan plan = 1
(Required) The logical plan to be analyzed.

Used in: AnalyzePlanRequest

optional Plan plan = 1
(Required) The logical plan to get a hashCode.

Used in: AnalyzePlanRequest

(message has no fields)

Used in: AnalyzePlanRequest

optional Plan plan = 1
(Required) The logical plan to be analyzed.
optional int32 level = 2
(Optional) Max level of the schema.

Used in: AnalyzePlanRequest

optional Relation relation = 1
(Required) The logical plan to unpersist.
optional bool blocking = 2
(Optional) Whether to block until all blocks are deleted.

Used in: AnalyzePlanResponse

optional DataType parsed = 1

Used in: AnalyzePlanResponse

string explain_string = 1

Used in: AnalyzePlanResponse

optional StorageLevel storage_level = 1
(Required) The StorageLevel as a result of get_storage_level request.

Used in: AnalyzePlanResponse

repeated string files = 1
A best-effort snapshot of the files that compose this Dataset

Used in: AnalyzePlanResponse

bool is_local = 1

Used in: AnalyzePlanResponse

bool is_streaming = 1

Used in: AnalyzePlanResponse

(message has no fields)

Used in: AnalyzePlanResponse

bool result = 1

Used in: AnalyzePlanResponse

optional DataType schema = 1

Used in: AnalyzePlanResponse

int32 result = 1

Used in: AnalyzePlanResponse

string version = 1

Used in: AnalyzePlanResponse

string tree_string = 1

Used in: AnalyzePlanResponse

(message has no fields)

Used in: Relation

optional Relation input = 1
(Required) Input relation for applyInPandasWithState.
repeated Expression grouping_expressions = 2
(Required) Expressions for grouping keys.
optional CommonInlineUserDefinedFunction func = 3
(Required) Input user-defined function.
string output_schema = 4
(Required) Schema for the output DataFrame.
string state_schema = 5
(Required) Schema for the state.
string output_mode = 6
(Required) The output mode of the function.
string timeout_conf = 7
(Required) Timeout configuration for groups that do not receive data for a while.

Used in: ArtifactStatusesResponse

bool exists = 1
Exists or not particular artifact at the server.

Relation of type [[AsOfJoin]]. `left` and `right` must be present.

Used in: Relation

optional Relation left = 1
(Required) Left input relation for a Join.
optional Relation right = 2
(Required) Right input relation for a Join.
optional Expression left_as_of = 3
(Required) Field to join on in left DataFrame
optional Expression right_as_of = 4
(Required) Field to join on in right DataFrame
optional Expression join_expr = 5
(Optional) The join condition. Could be unset when `using_columns` is utilized. This field does not co-exist with using_columns.
repeated string using_columns = 6
Optional. using_columns provides a list of columns that should present on both sides of the join inputs that this Join will join on. For example A JOIN B USING col_name is equivalent to A JOIN B on A.col_name = B.col_name. This field does not co-exist with join_condition.
string join_type = 7
(Required) The join type.
optional Expression tolerance = 8
(Optional) The asof tolerance within this range.
bool allow_exact_matches = 9
(Required) Whether allow matching with the same value or not.
string direction = 10
(Required) Whether to search for prior, subsequent, or closest matches.

See `spark.catalog.cacheTable`

Used in: Catalog

string table_name = 1
(Required)
optional StorageLevel storage_level = 2
(Optional)

A local relation that has been cached already.

Used in: Relation

string hash = 3
(Required) A sha-256 hash of the serialized local relation in proto, see LocalRelation.

Represents a remote relation that has been cached on server.

Used in: CheckpointCommandResult, Relation, RemoveCachedRemoteRelationCommand

string relation_id = 1
(Required) ID of the remote related (assigned by the service).

Used in: Expression

string function_name = 1
(Required) Unparsed name of the SQL function.
repeated Expression arguments = 2
(Optional) Function arguments. Empty arguments are allowed.

Catalog messages are marked as unstable.

Used in: Relation

oneof cat_type
- CurrentDatabase current_database = 1
- SetCurrentDatabase set_current_database = 2
- ListDatabases list_databases = 3
- ListTables list_tables = 4
- ListFunctions list_functions = 5
- ListColumns list_columns = 6
- GetDatabase get_database = 7
- GetTable get_table = 8
- GetFunction get_function = 9
- DatabaseExists database_exists = 10
- TableExists table_exists = 11
- FunctionExists function_exists = 12
- CreateExternalTable create_external_table = 13
- CreateTable create_table = 14
- DropTempView drop_temp_view = 15
- DropGlobalTempView drop_global_temp_view = 16
- RecoverPartitions recover_partitions = 17
- IsCached is_cached = 18
- CacheTable cache_table = 19
- UncacheTable uncache_table = 20
- ClearCache clear_cache = 21
- RefreshTable refresh_table = 22
- RefreshByPath refresh_by_path = 23
- CurrentCatalog current_catalog = 24
- SetCurrentCatalog set_current_catalog = 25
- ListCatalogs list_catalogs = 26

Used in: Command

optional Relation relation = 1
(Required) The logical plan to checkpoint.
bool local = 2
(Required) Locally checkpoint using a local temporary directory in Spark Connect server (Spark Driver)
bool eager = 3
(Required) Whether to checkpoint this dataframe immediately.

Used in: ExecutePlanResponse

optional CachedRemoteRelation relation = 1
(Required) The logical plan checkpointed.

See `spark.catalog.clearCache`

Used in: Catalog

(message has no fields)

Used in: Relation

optional Relation input = 1
(Required) One input relation for CoGroup Map API - applyInPandas.
repeated Expression input_grouping_expressions = 2
Expressions for grouping keys of the first input relation.
optional Relation other = 3
(Required) The other input relation.
repeated Expression other_grouping_expressions = 4
Expressions for grouping keys of the other input relation.
optional CommonInlineUserDefinedFunction func = 5
(Required) Input user-defined function.
repeated Expression input_sorting_expressions = 6
(Optional) Expressions for sorting. Only used by Scala Sorted CoGroup Map API.
repeated Expression other_sorting_expressions = 7
(Optional) Expressions for sorting. Only used by Scala Sorted CoGroup Map API.

Collect arbitrary (named) metrics from a dataset.

Used in: Relation

optional Relation input = 1
(Required) The input relation.
string name = 2
(Required) Name of the metrics.
repeated Expression metrics = 3
(Required) The metric sequence.

A [[Command]] is an operation that is executed by the server that does not directly consume or produce a relational result.

Used in: Plan

oneof command_type
- CommonInlineUserDefinedFunction register_function = 1
- WriteOperation write_operation = 2
- CreateDataFrameViewCommand create_dataframe_view = 3
- WriteOperationV2 write_operation_v2 = 4
- SqlCommand sql_command = 5
- WriteStreamOperationStart write_stream_operation_start = 6
- StreamingQueryCommand streaming_query_command = 7
- GetResourcesCommand get_resources_command = 8
- StreamingQueryManagerCommand streaming_query_manager_command = 9
- CommonInlineUserDefinedTableFunction register_table_function = 10
- StreamingQueryListenerBusCommand streaming_query_listener_bus_command = 11
- CommonInlineUserDefinedDataSource register_data_source = 12
- CreateResourceProfileCommand create_resource_profile_command = 13
- CheckpointCommand checkpoint_command = 14
- RemoveCachedRemoteRelationCommand remove_cached_remote_relation_command = 15
- MergeIntoTableCommand merge_into_table_command = 16
- google.protobuf.Any extension = 999
  This field is used to mark extensions to the protocol. When plugins generate arbitrary Commands they can add them here. During the planning the correct resolution is done.

Used in: Command, Relation

string name = 1
(Required) Name of the data source.
oneof data_source
(Required) The data source type.
- PythonDataSource python_data_source = 2

Used in: ApplyInPandasWithState, CoGroupMap, Command, Expression, GroupMap, MapPartitions

string function_name = 1
(Required) Name of the user-defined function.
bool deterministic = 2
(Optional) Indicate if the user-defined function is deterministic.
repeated Expression arguments = 3
(Optional) Function arguments. Empty arguments are allowed.
oneof function
(Required) Indicate the function type of the user-defined function.
- PythonUDF python_udf = 4
- ScalarScalaUDF scalar_scala_udf = 5
- JavaUDF java_udf = 6

Used in: Command, Relation

string function_name = 1
(Required) Name of the user-defined table function.
bool deterministic = 2
(Optional) Whether the user-defined table function is deterministic.
repeated Expression arguments = 3
(Optional) Function input arguments. Empty arguments are allowed.
oneof function
(Required) Type of the user-defined table function.
- PythonUDTF python_udtf = 4

Used in: Operation

repeated string keys = 1
(Required) The config keys to get.

Used in: Operation

optional string prefix = 1
(Optional) The prefix of the config key to get.

Used in: Operation

repeated string keys = 1
(Required) The config keys to get optionally.

Used in: Operation

repeated KeyValue pairs = 1
(Required) The config key-value pairs to get. The value will be used as the default value.

Used in: Operation

repeated string keys = 1
(Required) The config keys to check the config is modifiable.

Used in: ConfigRequest

oneof op_type
- Set set = 1
- Get get = 2
- GetWithDefault get_with_default = 3
- GetOption get_option = 4
- GetAll get_all = 5
- Unset unset = 6
- IsModifiable is_modifiable = 7

Used in: Operation

repeated KeyValue pairs = 1
(Required) The config key-value pairs to set.

Used in: Operation

repeated string keys = 1
(Required) The config keys to unset.

A command that can create DataFrame global temp view or local temp view.

Used in: Command

optional Relation input = 1
(Required) The relation that this view will be built on.
string name = 2
(Required) View name.
bool is_global = 3
(Required) Whether this is global temp view or local temp view.
bool replace = 4
(Required) If true, and if the view already exists, updates it; if false, and if the view already exists, throws exception.

See `spark.catalog.createExternalTable`

Used in: Catalog

string table_name = 1
(Required)
optional string path = 2
(Optional)
optional string source = 3
(Optional)
optional DataType schema = 4
(Optional)
map<string, string> options = 5
Options could be empty for valid data source format. The map key is case insensitive.

Command to create ResourceProfile

Used in: Command

optional ResourceProfile profile = 1
(Required) The ResourceProfile to be built on the server-side.

Response for command 'CreateResourceProfileCommand'.

Used in: ExecutePlanResponse

int32 profile_id = 1
(Required) Server-side generated resource profile id.

See `spark.catalog.createTable`

Used in: Catalog

string table_name = 1
(Required)
optional string path = 2
(Optional)
optional string source = 3
(Optional)
optional string description = 4
(Optional)
optional DataType schema = 5
(Optional)
map<string, string> options = 6
Options could be empty for valid data source format. The map key is case insensitive.

See `spark.catalog.currentCatalog`

Used in: Catalog

(message has no fields)

See `spark.catalog.currentDatabase`

Used in: Catalog

(message has no fields)

This message describes the logical [[DataType]] of something. It does not carry the value itself but only describes it.

Used in: AnalyzePlanResponse.DDLParse, AnalyzePlanResponse.Schema, CreateExternalTable, CreateTable, DataType.Array, DataType.Map, DataType.StructField, DataType.UDT, ExecutePlanResponse, Expression.Cast, Expression.Literal, Expression.Literal.Array, Expression.Literal.Map, Expression.Literal.Struct, JavaUDF, Parse, PythonUDF, PythonUDTF, ScalarScalaUDF, ToSchema

oneof kind
- DataType.NULL null = 1
- DataType.Binary binary = 2
- DataType.Boolean boolean = 3
- DataType.Byte byte = 4
  Numeric types
- DataType.Short short = 5
- DataType.Integer integer = 6
- DataType.Long long = 7
- DataType.Float float = 8
- DataType.Double double = 9
- DataType.Decimal decimal = 10
- DataType.String string = 11
  String types
- DataType.Char char = 12
- DataType.VarChar var_char = 13
- DataType.Date date = 14
  Datatime types
- DataType.Timestamp timestamp = 15
- DataType.TimestampNTZ timestamp_ntz = 16
- DataType.CalendarInterval calendar_interval = 17
  Interval types
- DataType.YearMonthInterval year_month_interval = 18
- DataType.DayTimeInterval day_time_interval = 19
- DataType.Array array = 20
  Complex types
- DataType.Struct struct = 21
- DataType.Map map = 22
- DataType.Variant variant = 25
- DataType.UDT udt = 23
  UserDefinedType
- DataType.Unparsed unparsed = 24
  UnparsedDataType

Used in: DataType

optional DataType element_type = 1
bool contains_null = 2
uint32 type_variation_reference = 3

Used in: DataType

uint32 type_variation_reference = 1

Used in: DataType

uint32 type_variation_reference = 1

Used in: DataType

uint32 type_variation_reference = 1

Used in: DataType

uint32 type_variation_reference = 1

Start compound types.

Used in: DataType

int32 length = 1
uint32 type_variation_reference = 2

Used in: DataType

uint32 type_variation_reference = 1

Used in: DataType

optional int32 start_field = 1
optional int32 end_field = 2
uint32 type_variation_reference = 3

Used in: DataType

optional int32 scale = 1
optional int32 precision = 2
uint32 type_variation_reference = 3

Used in: DataType

uint32 type_variation_reference = 1

Used in: DataType

uint32 type_variation_reference = 1

Used in: DataType

uint32 type_variation_reference = 1

Used in: DataType

uint32 type_variation_reference = 1

Used in: DataType

optional DataType key_type = 1
optional DataType value_type = 2
bool value_contains_null = 3
uint32 type_variation_reference = 4

Used in: DataType

uint32 type_variation_reference = 1

Used in: DataType

uint32 type_variation_reference = 1

Used in: DataType

uint32 type_variation_reference = 1
string collation = 2

Used in: DataType

repeated StructField fields = 1
uint32 type_variation_reference = 2

Used in: Struct

string name = 1
optional DataType data_type = 2
bool nullable = 3
optional string metadata = 4

Used in: DataType

uint32 type_variation_reference = 1

Used in: DataType

uint32 type_variation_reference = 1

Used in: DataType

string type = 1
optional string jvm_class = 2
optional string python_class = 3
optional string serialized_python_class = 4
optional DataType sql_type = 5

Used in: DataType

string data_type_string = 1
(Required) The unparsed data type string

Used in: DataType

int32 length = 1
uint32 type_variation_reference = 2

Used in: DataType

uint32 type_variation_reference = 1

Used in: DataType

optional int32 start_field = 1
optional int32 end_field = 2
uint32 type_variation_reference = 3

See `spark.catalog.databaseExists`

Used in: Catalog

string db_name = 1
(Required)

Relation of type [[Deduplicate]] which have duplicate rows removed, could consider either only the subset of columns or all the columns.

Used in: Relation

optional Relation input = 1
(Required) Input relation for a Deduplicate.
repeated string column_names = 2
(Optional) Deduplicate based on a list of column names. This field does not co-use with `all_columns_as_keys`.
optional bool all_columns_as_keys = 3
(Optional) Deduplicate based on all the columns of the input relation. This field does not co-use with `column_names`.
optional bool within_watermark = 4
(Optional) Deduplicate within the time range of watermark.

Drop specified columns.

Used in: Relation

optional Relation input = 1
(Required) The input relation.
repeated Expression columns = 2
(Optional) columns to drop.
repeated string column_names = 3
(Optional) names of columns to drop.

See `spark.catalog.dropGlobalTempView`

Used in: Catalog

string view_name = 1
(Required)

See `spark.catalog.dropTempView`

Used in: Catalog

string view_name = 1
(Required)

string custom_field = 1

optional Expression child = 1
string custom_field = 2

optional Relation input = 1
string custom_field = 2

Used in: ExecutePlanRequest

oneof request_option
- ReattachOptions reattach_options = 1
- google.protobuf.Any extension = 999
  Extension type for request options

The response of a query, can be one or more for each request. Responses belonging to the same input query, carry the same `session_id`. Next ID: 17

Used as response type in: SparkConnectService.ExecutePlan, SparkConnectService.ReattachExecute

string session_id = 1
string server_side_session_id = 15
Server-side generated idempotency key that the client can use to assert that the server side session has not changed.
string operation_id = 12
Identifies the ExecutePlan execution. If set by the client in ExecutePlanRequest.operationId, that value is returned. Otherwise generated by the server. It is an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff`
string response_id = 13
Identified the response in the stream. The id is an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff`
oneof response_type
Union type for the different response messages.
- ExecutePlanResponse.ArrowBatch arrow_batch = 2
- ExecutePlanResponse.SqlCommandResult sql_command_result = 5
  Special case for executing SQL commands.
- WriteStreamOperationStartResult write_stream_operation_start_result = 8
  Response for a streaming query.
- StreamingQueryCommandResult streaming_query_command_result = 9
  Response for commands on a streaming query.
- GetResourcesCommandResult get_resources_command_result = 10
  Response for 'SparkContext.resources'.
- StreamingQueryManagerCommandResult streaming_query_manager_command_result = 11
  Response for commands on the streaming query manager.
- StreamingQueryListenerEventsResult streaming_query_listener_events_result = 16
  Response for commands on the client side streaming query listener.
- ExecutePlanResponse.ResultComplete result_complete = 14
  Response type informing if the stream is complete in reattachable execution.
- CreateResourceProfileCommandResult create_resource_profile_command_result = 17
  Response for command that creates ResourceProfile.
- ExecutePlanResponse.ExecutionProgress execution_progress = 18
  (Optional) Intermediate query progress reports.
- CheckpointCommandResult checkpoint_command_result = 19
  Response for command that checkpoints a DataFrame.
- google.protobuf.Any extension = 999
  Support arbitrary result objects.
optional ExecutePlanResponse.Metrics metrics = 4
Metrics for the query execution. Typically, this field is only present in the last batch of results and then represent the overall state of the query execution.
repeated ExecutePlanResponse.ObservedMetrics observed_metrics = 6
The metrics observed during the execution of the query plan.
optional DataType schema = 7
(Optional) The Spark schema. This field is available when `collect` is called.

Batch results of metrics.

Used in: ExecutePlanResponse

int64 row_count = 1
Count rows in `data`. Must match the number of rows inside `data`.
bytes data = 2
Serialized Arrow data.
optional int64 start_offset = 3
If set, row offset of the start of this ArrowBatch in execution results.

This message is used to communicate progress about the query progress during the execution.

Used in: ExecutePlanResponse

repeated ExecutionProgress.StageInfo stages = 1
Captures the progress of each individual stage.
int64 num_inflight_tasks = 2
Captures the currently in progress tasks.

Used in: ExecutionProgress

int64 stage_id = 1
int64 num_tasks = 2
int64 num_completed_tasks = 3
int64 input_bytes_read = 4
bool done = 5

Used in: ExecutePlanResponse

repeated Metrics.MetricObject metrics = 1

Used in: Metrics

string name = 1
int64 plan_id = 2
int64 parent = 3
map<string, MetricValue> execution_metrics = 4

Used in: MetricObject

string name = 1
int64 value = 2
string metric_type = 3

Used in: ExecutePlanResponse

string name = 1
repeated Expression.Literal values = 2
repeated string keys = 3
int64 plan_id = 4

If present, in a reattachable execution this means that after server sends onComplete, the execution is complete. If the server sends onComplete without sending a ResultComplete, it means that there is more, and the client should use ReattachExecute RPC to continue.

Used in: ExecutePlanResponse

(message has no fields)

A SQL command returns an opaque Relation that can be directly used as input for the next call.

Used in: ExecutePlanResponse

optional Relation relation = 1

An executor resource request.

Used in: ResourceProfile

string resource_name = 1
(Required) resource name.
int64 amount = 2
(Required) resource amount requesting.
optional string discovery_script = 3
Optional script used to discover the resources.
optional string vendor = 4
Optional vendor, required for some cluster managers.

Expression used to refer to fields, functions and similar. This can be used everywhere expressions in SQL appear.

Used in: Aggregate, Aggregate.GroupingSets, Aggregate.Pivot, ApplyInPandasWithState, AsOfJoin, CallFunction, CoGroupMap, CollectMetrics, CommonInlineUserDefinedFunction, CommonInlineUserDefinedTableFunction, Drop, ExamplePluginExpression, Expression.Alias, Expression.Cast, Expression.LambdaFunction, Expression.SortOrder, Expression.UnresolvedExtractValue, Expression.UnresolvedFunction, Expression.UpdateFields, Expression.Window, Expression.Window.WindowFrame.FrameBoundary, Filter, GroupMap, Hint, Join, MergeAction, MergeAction.Assignment, MergeIntoTableCommand, NamedArgumentExpression, Project, RepartitionByExpression, SQL, SqlCommand, StatSampleBy, Transpose, Unpivot, Unpivot.Values, WriteOperationV2

optional ExpressionCommon common = 18
oneof expr_type
- Expression.Literal literal = 1
- Expression.UnresolvedAttribute unresolved_attribute = 2
- Expression.UnresolvedFunction unresolved_function = 3
- Expression.ExpressionString expression_string = 4
- Expression.UnresolvedStar unresolved_star = 5
- Expression.Alias alias = 6
- Expression.Cast cast = 7
- Expression.UnresolvedRegex unresolved_regex = 8
- Expression.SortOrder sort_order = 9
- Expression.LambdaFunction lambda_function = 10
- Expression.Window window = 11
- Expression.UnresolvedExtractValue unresolved_extract_value = 12
- Expression.UpdateFields update_fields = 13
- Expression.UnresolvedNamedLambdaVariable unresolved_named_lambda_variable = 14
- CommonInlineUserDefinedFunction common_inline_user_defined_function = 15
- CallFunction call_function = 16
- NamedArgumentExpression named_argument_expression = 17
- MergeAction merge_action = 19
- TypedAggregateExpression typed_aggregate_expression = 20
- google.protobuf.Any extension = 999
  This field is used to mark extensions to the protocol. When plugins generate arbitrary relations they can add them here. During the planning the correct resolution is done.

Used in: Expression, WithColumns

optional Expression expr = 1
(Required) The expression that alias will be added on.
repeated string name = 2
(Required) a list of name parts for the alias. Scalar columns only has one name that presents.
optional string metadata = 3
(Optional) Alias metadata expressed as a JSON map.

Used in: Expression

optional Expression expr = 1
(Required) the expression to be casted.
oneof cast_to_type
(Required) the data type that the expr to be casted to.
- DataType type = 2
- string type_str = 3
  If this is set, Server will use Catalyst parser to parse this string to DataType.
Cast.EvalMode eval_mode = 4
(Optional) The expression evaluation mode.

Used in: Cast

EVAL_MODE_UNSPECIFIED = 0
EVAL_MODE_LEGACY = 1
EVAL_MODE_ANSI = 2
EVAL_MODE_TRY = 3

Expression as string.

Used in: Expression

string expression = 1
(Required) A SQL expression that will be parsed by Catalyst parser.

Used in: Expression

optional Expression function = 1
(Required) The lambda function. The function body should use 'UnresolvedAttribute' as arguments, the sever side will replace 'UnresolvedAttribute' with 'UnresolvedNamedLambdaVariable'.
repeated UnresolvedNamedLambdaVariable arguments = 2
(Required) Function variables. Must contains 1 ~ 3 variables.

Used in: Aggregate.Pivot, ExecutePlanResponse.ObservedMetrics, Expression, Literal.Array, Literal.Map, Literal.Struct, NAFill, NAReplace.Replacement, SQL, SqlCommand, StatSampleBy.Fraction

oneof literal_type
- DataType null = 1
- bytes binary = 2
- bool boolean = 3
- int32 byte = 4
- int32 short = 5
- int32 integer = 6
- int64 long = 7
- float float = 10
- double double = 11
- Literal.Decimal decimal = 12
- string string = 13
- int32 date = 16
  Date in units of days since the UNIX epoch.
- int64 timestamp = 17
  Timestamp in units of microseconds since the UNIX epoch.
- int64 timestamp_ntz = 18
  Timestamp in units of microseconds since the UNIX epoch (without timezone information).
- Literal.CalendarInterval calendar_interval = 19
- int32 year_month_interval = 20
- int64 day_time_interval = 21
- Literal.Array array = 22
- Literal.Map map = 23
- Literal.Struct struct = 24

Used in: Literal

optional DataType element_type = 1
repeated Literal elements = 2

Used in: Literal

int32 months = 1
int32 days = 2
int64 microseconds = 3

Used in: Literal

string value = 1
the string representation.
optional int32 precision = 2
The maximum number of digits allowed in the value. the maximum precision is 38.
optional int32 scale = 3
declared scale of decimal literal

Used in: Literal

optional DataType key_type = 1
optional DataType value_type = 2
repeated Literal keys = 3
repeated Literal values = 4

Used in: Literal

optional DataType struct_type = 1
repeated Literal elements = 2

SortOrder is used to specify the data ordering, it is normally used in Sort and Window. It is an unevaluable expression and cannot be evaluated, so can not be used in Projection.

Used in: Expression, Window, Sort

optional Expression child = 1
(Required) The expression to be sorted.
SortOrder.SortDirection direction = 2
(Required) The sort direction, should be ASCENDING or DESCENDING.
SortOrder.NullOrdering null_ordering = 3
(Required) How to deal with NULLs, should be NULLS_FIRST or NULLS_LAST.

Used in: SortOrder

SORT_NULLS_UNSPECIFIED = 0
SORT_NULLS_FIRST = 1
SORT_NULLS_LAST = 2

Used in: SortOrder

SORT_DIRECTION_UNSPECIFIED = 0
SORT_DIRECTION_ASCENDING = 1
SORT_DIRECTION_DESCENDING = 2

An unresolved attribute that is not explicitly bound to a specific column, but the column is resolved during analysis by name.

Used in: Expression

string unparsed_identifier = 1
(Required) An identifier that will be parsed by Catalyst parser. This should follow the Spark SQL identifier syntax.
optional int64 plan_id = 2
(Optional) The id of corresponding connect plan.
optional bool is_metadata_column = 3
(Optional) The requested column is a metadata column.

Extracts a value or values from an Expression

Used in: Expression

optional Expression child = 1
(Required) The expression to extract value from, can be Map, Array, Struct or array of Structs.
optional Expression extraction = 2
(Required) The expression to describe the extraction, can be key of Map, index of Array, field name of Struct.

An unresolved function is not explicitly bound to one explicit function, but the function is resolved during analysis following Sparks name resolution rules.

Used in: Expression

string function_name = 1
(Required) name (or unparsed name for user defined function) for the unresolved function.
repeated Expression arguments = 2
(Optional) Function arguments. Empty arguments are allowed.
bool is_distinct = 3
(Required) Indicate if this function should be applied on distinct values.
bool is_user_defined_function = 4
(Required) Indicate if this is a user defined function. When it is not a user defined function, Connect will use the function name directly. When it is a user defined function, Connect will parse the function name first.

Used in: Expression, LambdaFunction

repeated string name_parts = 1
(Required) a list of name parts for the variable. Must not be empty.

Represents all of the input attributes to a given relational operator, for example in "SELECT `(id)?+.+` FROM ...".

Used in: Expression

string col_name = 1
(Required) The column name used to extract column with regex.
optional int64 plan_id = 2
(Optional) The id of corresponding connect plan.

UnresolvedStar is used to expand all the fields of a relation or struct.

Used in: Expression

optional string unparsed_target = 1
(Optional) The target of the expansion. If set, it should end with '.*' and will be parsed by 'parseAttributeName' in the server side.
optional int64 plan_id = 2
(Optional) The id of corresponding connect plan.

Add, replace or drop a field of `StructType` expression by name.

Used in: Expression

optional Expression struct_expression = 1
(Required) The struct expression.
string field_name = 2
(Required) The field name.
optional Expression value_expression = 3
(Optional) The expression to add or replace. When not set, it means this field will be dropped.

Expression for the OVER clause or WINDOW clause.

Used in: Expression

optional Expression window_function = 1
(Required) The window function.
repeated Expression partition_spec = 2
(Optional) The way that input rows are partitioned.
repeated SortOrder order_spec = 3
(Optional) Ordering of rows in a partition.
optional Window.WindowFrame frame_spec = 4
(Optional) Window frame in a partition. If not set, it will be treated as 'UnspecifiedFrame'.

The window frame

Used in: Window

WindowFrame.FrameType frame_type = 1
(Required) The type of the frame.
optional WindowFrame.FrameBoundary lower = 2
(Required) The lower bound of the frame.
optional WindowFrame.FrameBoundary upper = 3
(Required) The upper bound of the frame.

Used in: WindowFrame

oneof boundary
- bool current_row = 1
  CURRENT ROW boundary
- bool unbounded = 2
  UNBOUNDED boundary. For lower bound, it will be converted to 'UnboundedPreceding'. for upper bound, it will be converted to 'UnboundedFollowing'.
- Expression value = 3
  This is an expression for future proofing. We are expecting literals on the server side.

Used in: WindowFrame

FRAME_TYPE_UNDEFINED = 0
FRAME_TYPE_ROW = 1
RowFrame treats rows in a partition individually.
FRAME_TYPE_RANGE = 2
RangeFrame treats rows in a partition as groups of peers. All rows having the same 'ORDER BY' ordering are considered as peers.

Used in: Expression

optional Origin origin = 1
(Required) Keep the information of the origin for this expression such as stacktrace.

Error defines the schema for the representing exception.

Used in: FetchErrorDetailsResponse

repeated string error_type_hierarchy = 1
The fully qualified names of the exception class and its parent classes.
string message = 2
The detailed message of the exception.
repeated StackTraceElement stack_trace = 3
The stackTrace of the exception. It will be set if the SQLConf spark.sql.connect.serverStacktrace.enabled is true.
optional int32 cause_idx = 4
The index of the cause error in errors.
optional SparkThrowable spark_throwable = 5
The structured data of a SparkThrowable exception.

QueryContext defines the schema for the query context of a SparkThrowable. It helps users understand where the error occurs while executing queries.

Used in: SparkThrowable

QueryContext.ContextType context_type = 10
string object_type = 1
The object type of the query which throws the exception. If the exception is directly from the main query, it should be an empty string. Otherwise, it should be the exact object type in upper case. For example, a "VIEW".
string object_name = 2
The object name of the query which throws the exception. If the exception is directly from the main query, it should be an empty string. Otherwise, it should be the object name. For example, a view name "V1".
int32 start_index = 3
The starting index in the query text which throws the exception. The index starts from 0.
int32 stop_index = 4
The stopping index in the query which throws the exception. The index starts from 0.
string fragment = 5
The corresponding fragment of the query which throws the exception.
string call_site = 6
The user code (call site of the API) that caused throwing the exception.
string summary = 7
Summary of the exception cause.

The type of this query context.

Used in: QueryContext

SQL = 0
DATAFRAME = 1

SparkThrowable defines the schema for SparkThrowable exceptions.

Used in: Error

optional string error_class = 1
Succinct, human-readable, unique, and consistent representation of the error category.
map<string, string> message_parameters = 2
The message parameters for the error framework.
repeated QueryContext query_contexts = 3
The query context of a SparkThrowable.
optional string sql_state = 4
Portable error identifier across SQL engines If null, error class or SQLSTATE is not set.

Used in: Error

string declaring_class = 1
The fully qualified name of the class containing the execution point.
string method_name = 2
The name of the method containing the execution point.
optional string file_name = 3
The name of the file containing the execution point.
int32 line_number = 4
The line number of the source line containing the execution point.

Relation that applies a boolean expression `condition` on each row of `input` to produce the output result.

Used in: Relation

optional Relation input = 1
(Required) Input relation for a Filter.
optional Expression condition = 2
(Required) A Filter must have a condition expression.

See `spark.catalog.functionExists`

Used in: Catalog

string function_name = 1
(Required)
optional string db_name = 2
(Optional)

See `spark.catalog.getDatabase`

Used in: Catalog

string db_name = 1
(Required)

See `spark.catalog.getFunction`

Used in: Catalog

string function_name = 1
(Required)
optional string db_name = 2
(Optional)

Command to get the output of 'SparkContext.resources'

Used in: Command

(message has no fields)

Response for command 'GetResourcesCommand'.

Used in: ExecutePlanResponse

map<string, ResourceInformation> resources = 1

See `spark.catalog.getTable`

Used in: Catalog

string table_name = 1
(Required)
optional string db_name = 2
(Optional)

Used in: Relation

optional Relation input = 1
(Required) Input relation for Group Map API: apply, applyInPandas.
repeated Expression grouping_expressions = 2
(Required) Expressions for grouping keys.
optional CommonInlineUserDefinedFunction func = 3
(Required) Input user-defined function.
repeated Expression sorting_expressions = 4
(Optional) Expressions for sorting. Only used by Scala Sorted Group Map API.
optional Relation initial_input = 5
Below fields are only used by (Flat)MapGroupsWithState (Optional) Input relation for initial State.
repeated Expression initial_grouping_expressions = 6
(Optional) Expressions for grouping keys of the initial state input relation.
optional bool is_map_groups_with_state = 7
(Optional) True if MapGroupsWithState, false if FlatMapGroupsWithState.
optional string output_mode = 8
(Optional) The output mode of the function.
optional string timeout_conf = 9
(Optional) Timeout configuration for groups that do not receive data for a while.

Specify a hint over a relation. Hint should have a name and optional parameters.

Used in: Relation

optional Relation input = 1
(Required) The input relation.
string name = 2
(Required) Hint name. Supported Join hints include BROADCAST, MERGE, SHUFFLE_HASH, SHUFFLE_REPLICATE_NL. Supported partitioning hints include COALESCE, REPARTITION, REPARTITION_BY_RANGE.
repeated Expression parameters = 3
(Optional) Hint parameters.

Compose the string representing rows for output. It will invoke 'Dataset.htmlString' to compute the results.

Used in: Relation

optional Relation input = 1
(Required) The input relation.
int32 num_rows = 2
(Required) Number of rows to show.
int32 truncate = 3
(Required) If set to more than 0, truncates strings to `truncate` characters and all cells will be aligned right.

Used in: InterruptRequest

INTERRUPT_TYPE_UNSPECIFIED = 0
INTERRUPT_TYPE_ALL = 1
Interrupt all running executions within the session with the provided session_id.
INTERRUPT_TYPE_TAG = 2
Interrupt all running executions within the session with the provided operation_tag.
INTERRUPT_TYPE_OPERATION_ID = 3
Interrupt the running execution within the session with the provided operation_id.

See `spark.catalog.isCached`

Used in: Catalog

string table_name = 1
(Required)

Used in: CommonInlineUserDefinedFunction

string class_name = 1
(Required) Fully qualified name of Java class
optional DataType output_type = 2
(Optional) Output type of the Java UDF
bool aggregate = 3
(Required) Indicate if the Java user-defined function is an aggregate function

Relation of type [[Join]]. `left` and `right` must be present.

Used in: Relation

optional Relation left = 1
(Required) Left input relation for a Join.
optional Relation right = 2
(Required) Right input relation for a Join.
optional Expression join_condition = 3
(Optional) The join condition. Could be unset when `using_columns` is utilized. This field does not co-exist with using_columns.
Join.JoinType join_type = 4
(Required) The join type.
repeated string using_columns = 5
Optional. using_columns provides a list of columns that should present on both sides of the join inputs that this Join will join on. For example A JOIN B USING col_name is equivalent to A JOIN B on A.col_name = B.col_name. This field does not co-exist with join_condition.
optional Join.JoinDataType join_data_type = 6
(Optional) Only used by joinWith. Set the left and right join data types.

Used in: Join

bool is_left_struct = 1
If the left data type is a struct.
bool is_right_struct = 2
If the right data type is a struct.

Used in: Join

JOIN_TYPE_UNSPECIFIED = 0
JOIN_TYPE_INNER = 1
JOIN_TYPE_FULL_OUTER = 2
JOIN_TYPE_LEFT_OUTER = 3
JOIN_TYPE_RIGHT_OUTER = 4
JOIN_TYPE_LEFT_ANTI = 5
JOIN_TYPE_LEFT_SEMI = 6
JOIN_TYPE_CROSS = 7

The key-value pair for the config request and response.

Used in: ConfigRequest.GetWithDefault, ConfigRequest.Set, ConfigResponse

string key = 1
(Required) The key.
optional string value = 2
(Optional) The value.

Relation of type [[Limit]] that is used to `limit` rows from the input relation.

Used in: Relation

optional Relation input = 1
(Required) Input relation for a Limit.
int32 limit = 2
(Required) the limit.

See `spark.catalog.listCatalogs`

Used in: Catalog

optional string pattern = 1
(Optional) The pattern that the catalog name needs to match

See `spark.catalog.listColumns`

Used in: Catalog

string table_name = 1
(Required)
optional string db_name = 2
(Optional)

See `spark.catalog.listDatabases`

Used in: Catalog

optional string pattern = 1
(Optional) The pattern that the database name needs to match

See `spark.catalog.listFunctions`

Used in: Catalog

optional string db_name = 1
(Optional)
optional string pattern = 2
(Optional) The pattern that the function name needs to match

See `spark.catalog.listTables`

Used in: Catalog

optional string db_name = 1
(Optional)
optional string pattern = 2
(Optional) The pattern that the table name needs to match

A relation that does not need to be qualified by name.

Used in: Relation

optional bytes data = 1
(Optional) Local collection data serialized into Arrow IPC streaming format which contains the schema of the data.
optional string schema = 2
(Optional) The schema of local data. It should be either a DDL-formatted type string or a JSON string. The server side will update the column names and data types according to this schema. If the 'data' is not provided, then this schema will be required.

Used in: Relation

optional Relation input = 1
(Required) Input relation for a mapPartitions-equivalent API: mapInPandas, mapInArrow.
optional CommonInlineUserDefinedFunction func = 2
(Required) Input user-defined function.
optional bool is_barrier = 3
(Optional) Whether to use barrier mode execution or not.
optional int32 profile_id = 4
(Optional) ResourceProfile id used for the stage level scheduling.

Used in: Expression

MergeAction.ActionType action_type = 1
(Required) The action type of the merge action.
optional Expression condition = 2
(Optional) The condition expression of the merge action.
repeated MergeAction.Assignment assignments = 3
(Optional) The assignments of the merge action. Required for ActionTypes INSERT and UPDATE.

Used in: MergeAction

ACTION_TYPE_INVALID = 0
ACTION_TYPE_DELETE = 1
ACTION_TYPE_INSERT = 2
ACTION_TYPE_INSERT_STAR = 3
ACTION_TYPE_UPDATE = 4
ACTION_TYPE_UPDATE_STAR = 5

Used in: MergeAction

optional Expression key = 1
(Required) The key of the assignment.
optional Expression value = 2
(Required) The value of the assignment.

Used in: Command

string target_table_name = 1
(Required) The name of the target table.
optional Relation source_table_plan = 2
(Required) The relation of the source table.
optional Expression merge_condition = 3
(Required) The condition to match the source and target.
repeated Expression match_actions = 4
(Optional) The actions to be taken when the condition is matched.
repeated Expression not_matched_actions = 5
(Optional) The actions to be taken when the condition is not matched.
repeated Expression not_matched_by_source_actions = 6
(Optional) The actions to be taken when the condition is not matched by source.
bool with_schema_evolution = 7
(Required) Whether to enable schema evolution.

Drop rows containing null values. It will invoke 'Dataset.na.drop' (same as 'DataFrameNaFunctions.drop') to compute the results.

Used in: Relation

optional Relation input = 1
(Required) The input relation.
repeated string cols = 2
(Optional) Optional list of column names to consider. When it is empty, all the columns in the input relation will be considered.
optional int32 min_non_nulls = 3
(Optional) The minimum number of non-null and non-NaN values required to keep. When not set, it is equivalent to the number of considered columns, which means a row will be kept only if all columns are non-null. 'how' options ('all', 'any') can be easily converted to this field: - 'all' -> set 'min_non_nulls' 1; - 'any' -> keep 'min_non_nulls' unset;

Replaces null values. It will invoke 'Dataset.na.fill' (same as 'DataFrameNaFunctions.fill') to compute the results. Following 3 parameter combinations are supported: 1, 'values' only contains 1 item, 'cols' is empty: replaces null values in all type-compatible columns. 2, 'values' only contains 1 item, 'cols' is not empty: replaces null values in specified columns. 3, 'values' contains more than 1 items, then 'cols' is required to have the same length: replaces each specified column with corresponding value.

Used in: Relation

optional Relation input = 1
(Required) The input relation.
repeated string cols = 2
(Optional) Optional list of column names to consider.
repeated Expression.Literal values = 3
(Required) Values to replace null values with. Should contain at least 1 item. Only 4 data types are supported now: bool, long, double, string

Replaces old values with the corresponding values. It will invoke 'Dataset.na.replace' (same as 'DataFrameNaFunctions.replace') to compute the results.

Used in: Relation

optional Relation input = 1
(Required) The input relation.
repeated string cols = 2
(Optional) List of column names to consider. When it is empty, all the type-compatible columns in the input relation will be considered.
repeated NAReplace.Replacement replacements = 3
(Optional) The value replacement mapping.

Used in: NAReplace

optional Expression.Literal old_value = 1
(Required) The old value. Only 4 data types are supported now: null, bool, double, string.
optional Expression.Literal new_value = 2
(Required) The new value. Should be of the same data type with the old value.

Used in: Expression

string key = 1
(Required) The key of the named argument.
optional Expression value = 2
(Required) The value expression of the named argument.

Relation of type [[Offset]] that is used to read rows staring from the `offset` on the input relation.

Used in: Relation

optional Relation input = 1
(Required) Input relation for an Offset.
int32 offset = 2
(Required) the limit.

Used in: ExpressionCommon, RelationCommon

oneof function
(Required) Indicate the origin type.
- PythonOrigin python_origin = 1

Used in: Relation

optional Relation input = 1
(Required) Input relation to Parse. The input is expected to have single text column.
Parse.ParseFormat format = 2
(Required) The expected format of the text.
optional DataType schema = 3
(Optional) DataType representing the schema. If not set, Spark will infer the schema.
map<string, string> options = 4
Options for the csv/json parser. The map key is case insensitive.

Used in: Parse

PARSE_FORMAT_UNSPECIFIED = 0
PARSE_FORMAT_CSV = 1
PARSE_FORMAT_JSON = 2

A [[Plan]] is the structure that carries the runtime information for the execution from the client to the server. A [[Plan]] can either be of the type [[Relation]] which is a reference to the underlying logical plan or it can be of the [[Command]] type that is used to execute commands on the server.

Used in: AnalyzePlanRequest.Explain, AnalyzePlanRequest.InputFiles, AnalyzePlanRequest.IsLocal, AnalyzePlanRequest.IsStreaming, AnalyzePlanRequest.SameSemantics, AnalyzePlanRequest.Schema, AnalyzePlanRequest.SemanticHash, AnalyzePlanRequest.TreeString, ExecutePlanRequest

oneof op_type
- Relation root = 1
- Command command = 2

Projection of a bag of expressions for a given input relation. The input relation must be specified. The projected expression can be an arbitrary expression.

Used in: Relation

optional Relation input = 1
(Optional) Input relation is optional for Project. For example, `SELECT ABS(-1)` is valid plan without an input plan.
repeated Expression expressions = 3
(Required) A Project requires at least one expression.

Used in: CommonInlineUserDefinedDataSource

bytes command = 1
(Required) The encoded commands of the Python data source.
string python_ver = 2
(Required) Python version being used in the client.

Used in: Origin

string fragment = 1
(Required) Name of the origin, for example, the name of the function
string call_site = 2
(Required) Callsite to show to end users, for example, stacktrace.

Used in: CommonInlineUserDefinedFunction, StreamingForeachFunction, StreamingQueryManagerCommand.StreamingQueryListenerCommand

optional DataType output_type = 1
(Required) Output type of the Python UDF
int32 eval_type = 2
(Required) EvalType of the Python UDF
bytes command = 3
(Required) The encoded commands of the Python UDF
string python_ver = 4
(Required) Python version being used in the client.
repeated string additional_includes = 5
(Optional) Additional includes for the Python UDF.

Used in: CommonInlineUserDefinedTableFunction

optional DataType return_type = 1
(Optional) Return type of the Python UDTF.
int32 eval_type = 2
(Required) EvalType of the Python UDTF.
bytes command = 3
(Required) The encoded commands of the Python UDTF.
string python_ver = 4
(Required) Python version being used in the client.

Relation of type [[Range]] that generates a sequence of integers.

Used in: Relation

optional int64 start = 1
(Optional) Default value = 0
int64 end = 2
(Required)
int64 step = 3
(Required)
optional int32 num_partitions = 4
Optional. Default value is assigned by 1) SQL conf "spark.sql.leafNodeDefaultParallelism" if it is set, or 2) spark default parallelism.

Relation that reads from a file / table or other data source. Does not have additional inputs.

Used in: Relation

oneof read_type
- Read.NamedTable named_table = 1
- Read.DataSource data_source = 2
bool is_streaming = 3
(Optional) Indicates if this is a streaming read.

Used in: Read

optional string format = 1
(Optional) Supported formats include: parquet, orc, text, json, parquet, csv, avro. If not set, the value from SQL conf 'spark.sql.sources.default' will be used.
optional string schema = 2
(Optional) If not set, Spark will infer the schema. This schema string should be either DDL-formatted or JSON-formatted.
map<string, string> options = 3
Options for the data source. The context of this map varies based on the data source format. This options could be empty for valid data source format. The map key is case insensitive.
repeated string paths = 4
(Optional) A list of path for file-system backed data sources.
repeated string predicates = 5
(Optional) Condition in the where clause for each partition. This is only supported by the JDBC data source.

Used in: Read

string unparsed_identifier = 1
(Required) Unparsed identifier for the table.
map<string, string> options = 2
Options for the named table. The map key is case insensitive.

Used in: ExecutePlanRequest.RequestOption

bool reattachable = 1
If true, the request can be reattached to using ReattachExecute. ReattachExecute can be used either if the stream broke with a GRPC network error, or if the server closed the stream without sending a response with StreamStatus.complete=true. The server will keep a buffer of responses in case a response is lost, and ReattachExecute needs to back-track. If false, the execution response stream will will not be reattachable, and all responses are immediately released by the server after being sent.

See `spark.catalog.recoverPartitions`

Used in: Catalog

string table_name = 1
(Required)

See `spark.catalog.refreshByPath`

Used in: Catalog

string path = 1
(Required)

See `spark.catalog.refreshTable`

Used in: Catalog

string table_name = 1
(Required)

The main [[Relation]] type. Fundamentally, a relation is a typed container that has exactly one explicit relation type set. When adding new relation types, they have to be registered here.

Used in: Aggregate, AnalyzePlanRequest.GetStorageLevel, AnalyzePlanRequest.Persist, AnalyzePlanRequest.Unpersist, ApplyInPandasWithState, AsOfJoin, CheckpointCommand, CoGroupMap, CollectMetrics, CreateDataFrameViewCommand, Deduplicate, Drop, ExamplePluginRelation, ExecutePlanResponse.SqlCommandResult, Filter, GroupMap, Hint, HtmlString, Join, Limit, MapPartitions, MergeIntoTableCommand, NADrop, NAFill, NAReplace, Offset, Parse, Plan, Project, Repartition, RepartitionByExpression, Sample, SetOperation, ShowString, Sort, SqlCommand, StatApproxQuantile, StatCorr, StatCov, StatCrosstab, StatDescribe, StatFreqItems, StatSampleBy, StatSummary, SubqueryAlias, Tail, ToDF, ToSchema, Transpose, Unpivot, WithColumns, WithColumnsRenamed, WithRelations, WithWatermark, WriteOperation, WriteOperationV2, WriteStreamOperationStart

optional RelationCommon common = 1
oneof rel_type
- Read read = 2
- Project project = 3
- Filter filter = 4
- Join join = 5
- SetOperation set_op = 6
- Sort sort = 7
- Limit limit = 8
- Aggregate aggregate = 9
- SQL sql = 10
- LocalRelation local_relation = 11
- Sample sample = 12
- Offset offset = 13
- Deduplicate deduplicate = 14
- Range range = 15
- SubqueryAlias subquery_alias = 16
- Repartition repartition = 17
- ToDF to_df = 18
- WithColumnsRenamed with_columns_renamed = 19
- ShowString show_string = 20
- Drop drop = 21
- Tail tail = 22
- WithColumns with_columns = 23
- Hint hint = 24
- Unpivot unpivot = 25
- ToSchema to_schema = 26
- RepartitionByExpression repartition_by_expression = 27
- MapPartitions map_partitions = 28
- CollectMetrics collect_metrics = 29
- Parse parse = 30
- GroupMap group_map = 31
- CoGroupMap co_group_map = 32
- WithWatermark with_watermark = 33
- ApplyInPandasWithState apply_in_pandas_with_state = 34
- HtmlString html_string = 35
- CachedLocalRelation cached_local_relation = 36
- CachedRemoteRelation cached_remote_relation = 37
- CommonInlineUserDefinedTableFunction common_inline_user_defined_table_function = 38
- AsOfJoin as_of_join = 39
- CommonInlineUserDefinedDataSource common_inline_user_defined_data_source = 40
- WithRelations with_relations = 41
- Transpose transpose = 42
- NAFill fill_na = 90
  NA functions
- NADrop drop_na = 91
- NAReplace replace = 92
- StatSummary summary = 100
  stat functions
- StatCrosstab crosstab = 101
- StatDescribe describe = 102
- StatCov cov = 103
- StatCorr corr = 104
- StatApproxQuantile approx_quantile = 105
- StatFreqItems freq_items = 106
- StatSampleBy sample_by = 107
- Catalog catalog = 200
  Catalog API (experimental / unstable)
- google.protobuf.Any extension = 998
  This field is used to mark extensions to the protocol. When plugins generate arbitrary relations they can add them here. During the planning the correct resolution is done.
- Unknown unknown = 999

Common metadata of all relations.

Used in: Relation

string source_info = 1
(Required) Shared relation metadata.
optional int64 plan_id = 2
(Optional) A per-client globally unique id for a given connect plan.
optional Origin origin = 3
(Optional) Keep the information of the origin for this expression such as stacktrace.

Release and close operation completely. This will also interrupt the query if it is running execution, and wait for it to be torn down.

Used in: ReleaseExecuteRequest

(message has no fields)

Release all responses from the operation response stream up to and including the response with the given by response_id. While server determines by itself how much of a buffer of responses to keep, client providing explicit release calls will help reduce resource consumption. Noop if response_id not found in cached responses.

Used in: ReleaseExecuteRequest

string response_id = 1

Command to remove `CashedRemoteRelation`

Used in: Command

optional CachedRemoteRelation relation = 1
(Required) The remote to be related

Relation repartition.

Used in: Relation

optional Relation input = 1
(Required) The input relation of Repartition.
int32 num_partitions = 2
(Required) Must be positive.
optional bool shuffle = 3
(Optional) Default value is false.

Used in: Relation

optional Relation input = 1
(Required) The input relation.
repeated Expression partition_exprs = 2
(Required) The partitioning expressions.
optional int32 num_partitions = 3
(Optional) number of partitions, must be positive.

ResourceInformation to hold information about a type of Resource. The corresponding class is 'org.apache.spark.resource.ResourceInformation'

Used in: GetResourcesCommandResult

string name = 1
(Required) The name of the resource
repeated string addresses = 2
(Required) An array of strings describing the addresses of the resource.

Used in: CreateResourceProfileCommand

map<string, ExecutorResourceRequest> executor_resources = 1
(Optional) Resource requests for executors. Mapped from the resource name (e.g., cores, memory, CPU) to its specific request.
map<string, TaskResourceRequest> task_resources = 2
(Optional) Resource requests for tasks. Mapped from the resource name (e.g., cores, memory, CPU) to its specific request.

Relation that uses a SQL query to generate the output.

Used in: Relation

string query = 1
(Required) The SQL query.
map<string, Expression.Literal> args = 2
(Optional) A map of parameter names to literal expressions.
repeated Expression.Literal pos_args = 3
(Optional) A sequence of literal expressions for positional parameters in the SQL query text.
map<string, Expression> named_arguments = 4
(Optional) A map of parameter names to expressions. It cannot coexist with `pos_arguments`.
repeated Expression pos_arguments = 5
(Optional) A sequence of expressions for positional parameters in the SQL query text. It cannot coexist with `named_arguments`.

Relation of type [[Sample]] that samples a fraction of the dataset.

Used in: Relation

optional Relation input = 1
(Required) Input relation for a Sample.
double lower_bound = 2
(Required) lower bound.
double upper_bound = 3
(Required) upper bound.
optional bool with_replacement = 4
(Optional) Whether to sample with replacement.
optional int64 seed = 5
(Required) The random seed. This field is required to avoid generating mutable dataframes (see SPARK-48184 for details), however, still keep it 'optional' here for backward compatibility.
bool deterministic_order = 6
(Required) Explicitly sort the underlying plan to make the ordering deterministic or cache it. This flag is true when invoking `dataframe.randomSplit` to randomly splits DataFrame with the provided weights. Otherwise, it is false.

Used in: CommonInlineUserDefinedFunction, StreamingForeachFunction, TypedAggregateExpression

bytes payload = 1
(Required) Serialized JVM object containing UDF definition, input encoders and output encoder
repeated DataType inputTypes = 2
(Optional) Input type(s) of the UDF
optional DataType outputType = 3
(Required) Output type of the UDF
bool nullable = 4
(Required) True if the UDF can return null value
bool aggregate = 5
(Required) Indicate if the UDF is an aggregate function

See `spark.catalog.setCurrentCatalog`

Used in: Catalog

string catalog_name = 1
(Required)

See `spark.catalog.setCurrentDatabase`

Used in: Catalog

string db_name = 1
(Required)

Relation of type [[SetOperation]]

Used in: Relation

optional Relation left_input = 1
(Required) Left input relation for a Set operation.
optional Relation right_input = 2
(Required) Right input relation for a Set operation.
SetOperation.SetOpType set_op_type = 3
(Required) The Set operation type.
optional bool is_all = 4
(Optional) If to remove duplicate rows. True to preserve all results. False to remove duplicate rows.
optional bool by_name = 5
(Optional) If to perform the Set operation based on name resolution. Only UNION supports this option.
optional bool allow_missing_columns = 6
(Optional) If to perform the Set operation and allow missing columns. Only UNION supports this option.

Used in: SetOperation

SET_OP_TYPE_UNSPECIFIED = 0
SET_OP_TYPE_INTERSECT = 1
SET_OP_TYPE_UNION = 2
SET_OP_TYPE_EXCEPT = 3

Compose the string representing rows for output. It will invoke 'Dataset.showString' to compute the results.

Used in: Relation

optional Relation input = 1
(Required) The input relation.
int32 num_rows = 2
(Required) Number of rows to show.
int32 truncate = 3
(Required) If set to more than 0, truncates strings to `truncate` characters and all cells will be aligned right.
bool vertical = 4
(Required) If set to true, prints output rows vertically (one line per column value).

Relation of type [[Sort]].

Used in: Relation

optional Relation input = 1
(Required) Input relation for a Sort.
repeated Expression.SortOrder order = 2
(Required) The ordering expressions
optional bool is_global = 3
(Optional) if this is a global sort.

A SQL Command is used to trigger the eager evaluation of SQL commands in Spark. When the SQL provide as part of the message is a command it will be immediately evaluated and the result will be collected and returned as part of a LocalRelation. If the result is not a command, the operation will simply return a SQL Relation. This allows the client to be almost oblivious to the server-side behavior.

Used in: Command

string sql = 1
(Required) SQL Query.
map<string, Expression.Literal> args = 2
(Optional) A map of parameter names to literal expressions.
repeated Expression.Literal pos_args = 3
(Optional) A sequence of literal expressions for positional parameters in the SQL query text.
map<string, Expression> named_arguments = 4
(Optional) A map of parameter names to expressions. It cannot coexist with `pos_arguments`.
repeated Expression pos_arguments = 5
(Optional) A sequence of expressions for positional parameters in the SQL query text. It cannot coexist with `named_arguments`.
optional Relation input = 6
(Optional) The relation that this SQL command will be built on.

Calculates the approximate quantiles of numerical columns of a DataFrame. It will invoke 'Dataset.stat.approxQuantile' (same as 'StatFunctions.approxQuantile') to compute the results.

Used in: Relation

optional Relation input = 1
(Required) The input relation.
repeated string cols = 2
(Required) The names of the numerical columns.
repeated double probabilities = 3
(Required) A list of quantile probabilities. Each number must belong to [0, 1]. For example 0 is the minimum, 0.5 is the median, 1 is the maximum.
double relative_error = 4
(Required) The relative target precision to achieve (greater than or equal to 0). If set to zero, the exact quantiles are computed, which could be very expensive. Note that values greater than 1 are accepted but give the same result as 1.

Calculates the correlation of two columns of a DataFrame. Currently only supports the Pearson Correlation Coefficient. It will invoke 'Dataset.stat.corr' (same as 'StatFunctions.pearsonCorrelation') to compute the results.

Used in: Relation

optional Relation input = 1
(Required) The input relation.
string col1 = 2
(Required) The name of the first column.
string col2 = 3
(Required) The name of the second column.
optional string method = 4
(Optional) Default value is 'pearson'. Currently only supports the Pearson Correlation Coefficient.

Calculate the sample covariance of two numerical columns of a DataFrame. It will invoke 'Dataset.stat.cov' (same as 'StatFunctions.calculateCov') to compute the results.

Used in: Relation

optional Relation input = 1
(Required) The input relation.
string col1 = 2
(Required) The name of the first column.
string col2 = 3
(Required) The name of the second column.

Computes a pair-wise frequency table of the given columns. Also known as a contingency table. It will invoke 'Dataset.stat.crosstab' (same as 'StatFunctions.crossTabulate') to compute the results.

Used in: Relation

optional Relation input = 1
(Required) The input relation.
string col1 = 2
(Required) The name of the first column. Distinct items will make the first item of each row.
string col2 = 3
(Required) The name of the second column. Distinct items will make the column names of the DataFrame.

Computes basic statistics for numeric and string columns, including count, mean, stddev, min, and max. If no columns are given, this function computes statistics for all numerical or string columns.

Used in: Relation

optional Relation input = 1
(Required) The input relation.
repeated string cols = 2
(Optional) Columns to compute statistics on.

Finding frequent items for columns, possibly with false positives. It will invoke 'Dataset.stat.freqItems' (same as 'StatFunctions.freqItems') to compute the results.

Used in: Relation

optional Relation input = 1
(Required) The input relation.
repeated string cols = 2
(Required) The names of the columns to search frequent items in.
optional double support = 3
(Optional) The minimum frequency for an item to be considered `frequent`. Should be greater than 1e-4.

Returns a stratified sample without replacement based on the fraction given on each stratum. It will invoke 'Dataset.stat.freqItems' (same as 'StatFunctions.freqItems') to compute the results.

Used in: Relation

optional Relation input = 1
(Required) The input relation.
optional Expression col = 2
(Required) The column that defines strata.
repeated StatSampleBy.Fraction fractions = 3
(Required) Sampling fraction for each stratum. If a stratum is not specified, we treat its fraction as zero.
optional int64 seed = 5
(Required) The random seed. This field is required to avoid generating mutable dataframes (see SPARK-48184 for details), however, still keep it 'optional' here for backward compatibility.

Used in: StatSampleBy

optional Expression.Literal stratum = 1
(Required) The stratum.
double fraction = 2
(Required) The fraction value. Must be in [0, 1].

Computes specified statistics for numeric and string columns. It will invoke 'Dataset.summary' (same as 'StatFunctions.summary') to compute the results.

Used in: Relation

optional Relation input = 1
(Required) The input relation.
repeated string statistics = 2
(Optional) Statistics from to be computed. Available statistics are: count mean stddev min max arbitrary approximate percentiles specified as a percentage (e.g. 75%) count_distinct approx_count_distinct If no statistics are given, this function computes 'count', 'mean', 'stddev', 'min', 'approximate quartiles' (percentiles at 25%, 50%, and 75%), and 'max'.

StorageLevel for persisting Datasets/Tables.

Used in: AnalyzePlanRequest.Persist, AnalyzePlanResponse.GetStorageLevel, CacheTable

bool use_disk = 1
(Required) Whether the cache should use disk or not.
bool use_memory = 2
(Required) Whether the cache should use memory or not.
bool use_off_heap = 3
(Required) Whether the cache should use off-heap or not.
bool deserialized = 4
(Required) Whether the cached data is deserialized or not.
int32 replication = 5
(Required) The number of replicas.

Used in: WriteStreamOperationStart

oneof function
- PythonUDF python_function = 1
- ScalarScalaUDF scala_function = 2

Commands for a streaming query.

Used in: Command

optional StreamingQueryInstanceId query_id = 1
(Required) Query instance. See `StreamingQueryInstanceId`.
oneof command
See documentation for the corresponding API method in StreamingQuery.
- bool status = 2
  status() API.
- bool last_progress = 3
  lastProgress() API.
- bool recent_progress = 4
  recentProgress() API.
- bool stop = 5
  stop() API. Stops the query.
- bool process_all_available = 6
  processAllAvailable() API. Waits till all the available data is processed
- StreamingQueryCommand.ExplainCommand explain = 7
  explain() API. Returns logical and physical plans.
- bool exception = 8
  exception() API. Returns the exception in the query if any.
- StreamingQueryCommand.AwaitTerminationCommand await_termination = 9
  awaitTermination() API. Waits for the termination of the query.

Used in: StreamingQueryCommand

optional int64 timeout_ms = 2

Used in: StreamingQueryCommand

bool extended = 1
TODO: Consider reusing Explain from AnalyzePlanRequest message. We can not do this right now since it base.proto imports this file.

Response for commands on a streaming query.

Used in: ExecutePlanResponse

optional StreamingQueryInstanceId query_id = 1
(Required) Query instance id. See `StreamingQueryInstanceId`.
oneof result_type
- StreamingQueryCommandResult.StatusResult status = 2
- StreamingQueryCommandResult.RecentProgressResult recent_progress = 3
- StreamingQueryCommandResult.ExplainResult explain = 4
- StreamingQueryCommandResult.ExceptionResult exception = 5
- StreamingQueryCommandResult.AwaitTerminationResult await_termination = 6

Used in: StreamingQueryCommandResult

bool terminated = 1

Used in: StreamingQueryCommandResult

optional string exception_message = 1
(Optional) Exception message as string, maps to the return value of original StreamingQueryException's toString method
optional string error_class = 2
(Optional) Exception error class as string
optional string stack_trace = 3
(Optional) Exception stack trace as string

Used in: StreamingQueryCommandResult

string result = 1
Logical and physical plans as string

Used in: StreamingQueryCommandResult

repeated string recent_progress_json = 5
Progress reports as an array of json strings.

Used in: StreamingQueryCommandResult

string status_message = 1
See documentation for these Scala 'StreamingQueryStatus' struct
bool is_data_available = 2
bool is_trigger_active = 3
bool is_active = 4

The enum used for client side streaming query listener event There is no QueryStartedEvent defined here, it is added as a field in WriteStreamOperationStartResult

Used in: StreamingQueryListenerEvent

QUERY_PROGRESS_UNSPECIFIED = 0
QUERY_PROGRESS_EVENT = 1
QUERY_TERMINATED_EVENT = 2
QUERY_IDLE_EVENT = 3

A tuple that uniquely identifies an instance of streaming query run. It consists of `id` that persists across the streaming runs and `run_id` that changes between each run of the streaming query that resumes from the checkpoint.

Used in: StreamingQueryCommand, StreamingQueryCommandResult, StreamingQueryManagerCommandResult.StreamingQueryInstance, WriteStreamOperationStartResult

string id = 1
(Required) The unique id of this query that persists across restarts from checkpoint data. That is, this id is generated when a query is started for the first time, and will be the same every time it is restarted from checkpoint data.
string run_id = 2
(Required) The unique id of this run of the query. That is, every start/restart of a query will generate a unique run_id. Therefore, every time a query is restarted from checkpoint, it will have the same `id` but different `run_id`s.

The protocol for client-side StreamingQueryListener. This command will only be set when either the first listener is added to the client, or the last listener is removed from the client. The add_listener_bus_listener command will only be set true in the first case. The remove_listener_bus_listener command will only be set true in the second case.

Used in: Command

oneof command
- bool add_listener_bus_listener = 1
- bool remove_listener_bus_listener = 2

The protocol for the returned events in the long-running response channel.

Used in: StreamingQueryListenerEventsResult

string event_json = 1
(Required) The json serialized event, all StreamingQueryListener events have a json method
StreamingQueryEventType event_type = 2
(Required) Query event type used by client to decide how to deserialize the event_json

Used in: ExecutePlanResponse

repeated StreamingQueryListenerEvent events = 1
optional bool listener_bus_listener_added = 2

Commands for the streaming query manager.

Used in: Command

oneof command
See documentation for the corresponding API method in StreamingQueryManager.
- bool active = 1
  active() API, returns a list of active queries.
- string get_query = 2
  get() API, returns the StreamingQuery identified by id.
- StreamingQueryManagerCommand.AwaitAnyTerminationCommand await_any_termination = 3
  awaitAnyTermination() API, wait until any query terminates or timeout.
- bool reset_terminated = 4
  resetTerminated() API.
- StreamingQueryManagerCommand.StreamingQueryListenerCommand add_listener = 5
  addListener API.
- StreamingQueryManagerCommand.StreamingQueryListenerCommand remove_listener = 6
  removeListener API.
- bool list_listeners = 7
  listListeners() API, returns a list of streaming query listeners.

Used in: StreamingQueryManagerCommand

optional int64 timeout_ms = 1
(Optional) The waiting time in milliseconds to wait for any query to terminate.

Used in: StreamingQueryManagerCommand

bytes listener_payload = 1
optional PythonUDF python_listener_payload = 2
string id = 3

Response for commands on the streaming query manager.

Used in: ExecutePlanResponse

oneof result_type
- StreamingQueryManagerCommandResult.ActiveResult active = 1
- StreamingQueryManagerCommandResult.StreamingQueryInstance query = 2
- StreamingQueryManagerCommandResult.AwaitAnyTerminationResult await_any_termination = 3
- bool reset_terminated = 4
- bool add_listener = 5
- bool remove_listener = 6
- StreamingQueryManagerCommandResult.ListStreamingQueryListenerResult list_listeners = 7

Used in: StreamingQueryManagerCommandResult

repeated StreamingQueryInstance active_queries = 1

Used in: StreamingQueryManagerCommandResult

bool terminated = 1

Used in: StreamingQueryManagerCommandResult

repeated string listener_ids = 1
(Required) Reference IDs of listener instances.

Used in: StreamingQueryManagerCommandResult, ActiveResult

optional StreamingQueryInstanceId id = 1
(Required) The id and runId of this query.
optional string name = 2
(Optional) The name of this query.

bytes listener_payload = 1

Relation alias.

Used in: Relation

optional Relation input = 1
(Required) The input relation of SubqueryAlias.
string alias = 2
(Required) The alias.
repeated string qualifier = 3
(Optional) Qualifier of the alias.

See `spark.catalog.tableExists`

Used in: Catalog

string table_name = 1
(Required)
optional string db_name = 2
(Optional)

Relation of type [[Tail]] that is used to fetch `limit` rows from the last of the input relation.

Used in: Relation

optional Relation input = 1
(Required) Input relation for an Tail.
int32 limit = 2
(Required) the limit.

A task resource request.

Used in: ResourceProfile

string resource_name = 1
(Required) resource name.
double amount = 2
(Required) resource amount requesting as a double to support fractional resource requests.

Rename columns on the input relation by the same length of names.

Used in: Relation

optional Relation input = 1
(Required) The input relation of RenameColumnsBySameLengthNames.
repeated string column_names = 2
(Required) The number of columns of the input relation must be equal to the length of this field. If this is not true, an exception will be returned.

Used in: Relation

optional Relation input = 1
(Required) The input relation.
optional DataType schema = 2
(Required) The user provided schema. The Sever side will update the dataframe with this schema.

Transpose a DataFrame, switching rows to columns. Transforms the DataFrame such that the values in the specified index column become the new columns of the DataFrame.

Used in: Relation

optional Relation input = 1
(Required) The input relation.
repeated Expression index_columns = 2
(Optional) A list of columns that will be treated as the indices. Only single column is supported now.

Used in: Expression

optional ScalarScalaUDF scalar_scala_udf = 1
(Required) The aggregate function object packed into bytes.

See `spark.catalog.uncacheTable`

Used in: Catalog

string table_name = 1
(Required)

Used for testing purposes only.

Used in: Relation

(message has no fields)

Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns set.

Used in: Relation

optional Relation input = 1
(Required) The input relation.
repeated Expression ids = 2
(Required) Id columns.
optional Unpivot.Values values = 3
(Optional) Value columns to unpivot.
string variable_column_name = 4
(Required) Name of the variable column.
string value_column_name = 5
(Required) Name of the value column.

Used in: Unpivot

repeated Expression values = 1

User Context is used to refer to one particular user session that is executing queries in the backend.

Used in: AddArtifactsRequest, AnalyzePlanRequest, ArtifactStatusesRequest, ConfigRequest, ExecutePlanRequest, FetchErrorDetailsRequest, InterruptRequest, ReattachExecuteRequest, ReleaseExecuteRequest, ReleaseSessionRequest

string user_id = 1
string user_name = 2
repeated google.protobuf.Any extensions = 999
To extend the existing user context message that is used to identify incoming requests, Spark Connect leverages the Any protobuf type that can be used to inject arbitrary other messages into this message. Extensions are stored as a `repeated` type to be able to handle multiple active extensions.

Adding columns or replacing the existing columns that have the same names.

Used in: Relation

optional Relation input = 1
(Required) The input relation.
repeated Expression.Alias aliases = 2
(Required) Given a column name, apply the corresponding expression on the column. If column name exists in the input relation, then replace the column. If the column name does not exist in the input relation, then adds it as a new column. Only one name part is expected from each Expression.Alias. An exception is thrown when duplicated names are present in the mapping.

Rename columns on the input relation by a map with name to name mapping.

Used in: Relation

optional Relation input = 1
(Required) The input relation.
map<string, string> rename_columns_map = 2
(Optional) Renaming column names of input relation from A to B where A is the map key and B is the map value. This is a no-op if schema doesn't contain any A. It does not require that all input relation column names to present as keys. duplicated B are not allowed.
repeated WithColumnsRenamed.Rename renames = 3

Used in: WithColumnsRenamed

string col_name = 1
(Required) The existing column name.
string new_col_name = 2
(Required) The new column name.

Relation of type [[WithRelations]]. This relation contains a root plan, and one or more references that are used by the root plan. There are two ways of referencing a relation, by name (through a subquery alias), or by plan_id (using RelationCommon.plan_id). This relation can be used to implement CTEs, describe DAGs, or to reduce tree depth.

Used in: Relation

optional Relation root = 1
(Required) Plan at the root of the query tree. This plan is expected to contain one or more references. Those references get expanded later on by the engine.
repeated Relation references = 2
(Required) Plans referenced by the root plan. Relations in this list are also allowed to contain references to other relations in this list, as long they do not form cycles.

Used in: Relation

optional Relation input = 1
(Required) The input relation
string event_time = 2
(Required) Name of the column containing event time.
string delay_threshold = 3
(Required)

As writes are not directly handled during analysis and planning, they are modeled as commands.

Used in: Command

optional Relation input = 1
(Required) The output of the `input` relation will be persisted according to the options.
optional string source = 2
(Optional) Format value according to the Spark documentation. Examples are: text, parquet, delta.
oneof save_type
(Optional) The destination of the write operation can be either a path or a table. If the destination is neither a path nor a table, such as jdbc and noop, the `save_type` should not be set.
- string path = 3
- WriteOperation.SaveTable table = 4
WriteOperation.SaveMode mode = 5
(Required) the save mode.
repeated string sort_column_names = 6
(Optional) List of columns to sort the output by.
repeated string partitioning_columns = 7
(Optional) List of columns for partitioning.
optional WriteOperation.BucketBy bucket_by = 8
(Optional) Bucketing specification. Bucketing must set the number of buckets and the columns to bucket by.
map<string, string> options = 9
(Optional) A list of configuration options.
repeated string clustering_columns = 10
(Optional) Columns used for clustering the table.

Used in: WriteOperation

repeated string bucket_column_names = 1
int32 num_buckets = 2

Used in: WriteOperation

SAVE_MODE_UNSPECIFIED = 0
SAVE_MODE_APPEND = 1
SAVE_MODE_OVERWRITE = 2
SAVE_MODE_ERROR_IF_EXISTS = 3
SAVE_MODE_IGNORE = 4

Used in: WriteOperation

string table_name = 1
(Required) The table name.
SaveTable.TableSaveMethod save_method = 2
(Required) The method to be called to write to the table.

Used in: SaveTable

TABLE_SAVE_METHOD_UNSPECIFIED = 0
TABLE_SAVE_METHOD_SAVE_AS_TABLE = 1
TABLE_SAVE_METHOD_INSERT_INTO = 2

As writes are not directly handled during analysis and planning, they are modeled as commands.

Used in: Command

optional Relation input = 1
(Required) The output of the `input` relation will be persisted according to the options.
string table_name = 2
(Required) The destination of the write operation must be either a path or a table.
optional string provider = 3
(Optional) A provider for the underlying output data source. Spark's default catalog supports "parquet", "json", etc.
repeated Expression partitioning_columns = 4
(Optional) List of columns for partitioning for output table created by `create`, `createOrReplace`, or `replace`
map<string, string> options = 5
(Optional) A list of configuration options.
map<string, string> table_properties = 6
(Optional) A list of table properties.
WriteOperationV2.Mode mode = 7
(Required) Write mode.
optional Expression overwrite_condition = 8
(Optional) A condition for overwrite saving mode
repeated string clustering_columns = 9
(Optional) Columns used for clustering the table.

Used in: WriteOperationV2

MODE_UNSPECIFIED = 0
MODE_CREATE = 1
MODE_OVERWRITE = 2
MODE_OVERWRITE_PARTITIONS = 3
MODE_APPEND = 4
MODE_REPLACE = 5
MODE_CREATE_OR_REPLACE = 6

Starts write stream operation as streaming query. Query ID and Run ID of the streaming query are returned.

Used in: Command

optional Relation input = 1
(Required) The output of the `input` streaming relation will be written.
string format = 2
map<string, string> options = 3
repeated string partitioning_column_names = 4
oneof trigger
- string processing_time_interval = 5
- bool available_now = 6
- bool once = 7
- string continuous_checkpoint_interval = 8
string output_mode = 9
string query_name = 10
oneof sink_destination
The destination is optional. When set, it can be a path or a table name.
- string path = 11
- string table_name = 12
optional StreamingForeachFunction foreach_writer = 13
optional StreamingForeachFunction foreach_batch = 14
repeated string clustering_column_names = 15
(Optional) Columns used for clustering the table.

Used in: ExecutePlanResponse

optional StreamingQueryInstanceId query_id = 1
(Required) Query instance. See `StreamingQueryInstanceId`.
string name = 2
An optional query name.
optional string query_started_event_json = 3
Optional query started event if there is any listener registered on the client side.

package spark.connect

service SparkConnectService

rpc ExecutePlan (ExecutePlanRequest, stream ExecutePlanResponse)

message ExecutePlanRequest

string session_id = 1

optional string client_observed_server_side_session_id = 8

optional UserContext user_context = 2

optional string operation_id = 6

optional Plan plan = 3

optional string client_type = 4

repeated ExecutePlanRequest.RequestOption request_options = 5

repeated string tags = 7

rpc AnalyzePlan (AnalyzePlanRequest, AnalyzePlanResponse)

message AnalyzePlanRequest

string session_id = 1

optional string client_observed_server_side_session_id = 17

optional UserContext user_context = 2

optional string client_type = 3

oneof analyze

AnalyzePlanRequest.Schema schema = 4

AnalyzePlanRequest.Explain explain = 5

AnalyzePlanRequest.TreeString tree_string = 6

AnalyzePlanRequest.IsLocal is_local = 7

AnalyzePlanRequest.IsStreaming is_streaming = 8

AnalyzePlanRequest.InputFiles input_files = 9

AnalyzePlanRequest.SparkVersion spark_version = 10

AnalyzePlanRequest.DDLParse ddl_parse = 11

AnalyzePlanRequest.SameSemantics same_semantics = 12

AnalyzePlanRequest.SemanticHash semantic_hash = 13

AnalyzePlanRequest.Persist persist = 14

AnalyzePlanRequest.Unpersist unpersist = 15

AnalyzePlanRequest.GetStorageLevel get_storage_level = 16

message AnalyzePlanResponse

string session_id = 1

string server_side_session_id = 15

oneof result

AnalyzePlanResponse.Schema schema = 2

AnalyzePlanResponse.Explain explain = 3

AnalyzePlanResponse.TreeString tree_string = 4

AnalyzePlanResponse.IsLocal is_local = 5

AnalyzePlanResponse.IsStreaming is_streaming = 6

AnalyzePlanResponse.InputFiles input_files = 7

AnalyzePlanResponse.SparkVersion spark_version = 8

AnalyzePlanResponse.DDLParse ddl_parse = 9

AnalyzePlanResponse.SameSemantics same_semantics = 10

AnalyzePlanResponse.SemanticHash semantic_hash = 11

AnalyzePlanResponse.Persist persist = 12

AnalyzePlanResponse.Unpersist unpersist = 13

AnalyzePlanResponse.GetStorageLevel get_storage_level = 14

rpc Config (ConfigRequest, ConfigResponse)

message ConfigRequest

string session_id = 1

optional string client_observed_server_side_session_id = 8

optional UserContext user_context = 2

optional ConfigRequest.Operation operation = 3

optional string client_type = 4

message ConfigResponse

string session_id = 1

string server_side_session_id = 4

repeated KeyValue pairs = 2

repeated string warnings = 3

rpc AddArtifacts (stream AddArtifactsRequest, AddArtifactsResponse)

message AddArtifactsRequest

string session_id = 1

optional UserContext user_context = 2

optional string client_observed_server_side_session_id = 7

optional string client_type = 6

oneof payload

AddArtifactsRequest.Batch batch = 3

AddArtifactsRequest.BeginChunkedArtifact begin_chunk = 4

AddArtifactsRequest.ArtifactChunk chunk = 5

message AddArtifactsResponse

string session_id = 2

string server_side_session_id = 3

repeated AddArtifactsResponse.ArtifactSummary artifacts = 1

rpc ArtifactStatus (ArtifactStatusesRequest, ArtifactStatusesResponse)

message ArtifactStatusesRequest

string session_id = 1

optional string client_observed_server_side_session_id = 5

optional UserContext user_context = 2