package google.genomics.v1alpha2

Get desktop application:
View/edit binary Protocol Buffers messages

A service for running genomics pipelines.

rpc CreatePipeline (CreatePipelineRequest, Pipeline)
pipelines.proto:40
Creates a pipeline that can be run later. Create takes a Pipeline that has all fields other than `pipelineId` populated, and then returns the same pipeline with `pipelineId` populated. This id can be used to run the pipeline. Caller must have WRITE permission to the project.
message CreatePipelineRequest
pipelines.proto:183
The request to create a pipeline. The pipeline field here should not have `pipelineId` populated, as that will be populated by the server.
- optional Pipeline pipeline = 1
  The pipeline to create. Should not have `pipelineId` populated.
rpc RunPipeline (RunPipelineRequest, longrunning.Operation)
pipelines.proto:54
Runs a pipeline. If `pipelineId` is specified in the request, then run a saved pipeline. If `ephemeralPipeline` is specified, then run that pipeline once without saving a copy. The caller must have READ permission to the project where the pipeline is stored and WRITE permission to the project where the pipeline will be run, as VMs will be created and storage will be used.
message RunPipelineRequest
pipelines.proto:250
The request to run a pipeline. If `pipelineId` is specified, it refers to a saved pipeline created with CreatePipeline and set as the `pipelineId` of the returned Pipeline object. If `ephemeralPipeline` is specified, that pipeline is run once with the given args and not saved. It is an error to specify both `pipelineId` and `ephemeralPipeline`. `pipelineArgs` must be specified.
- oneof pipeline
  - string pipeline_id = 1
    The already created pipeline to run.
  - Pipeline ephemeral_pipeline = 2
    A new pipeline object to run once and then delete.
- optional RunPipelineArgs pipeline_args = 3
  The arguments to use when running this pipeline.
rpc GetPipeline (GetPipelineRequest, Pipeline)
pipelines.proto:64
Retrieves a pipeline based on ID. Caller must have READ permission to the project.
message GetPipelineRequest
pipelines.proto:264
A request to get a saved pipeline by id.
- string pipeline_id = 1
  Caller must have READ access to the project in which this pipeline is defined.
rpc ListPipelines (ListPipelinesRequest, ListPipelinesResponse)
pipelines.proto:73
Lists pipelines. Caller must have READ permission to the project.
message ListPipelinesRequest
pipelines.proto:276
A request to list pipelines in a given project. Pipelines can be filtered by name using `namePrefix`: all pipelines with names that begin with `namePrefix` will be returned. Uses standard pagination: `pageSize` indicates how many pipelines to return, and `pageToken` comes from a previous ListPipelinesResponse to indicate offset.
- string project_id = 1
  Required. The name of the project to search for pipelines. Caller must have READ access to this project.
- string name_prefix = 2
  Pipelines with names that match this prefix should be returned. If unspecified, all pipelines in the project, up to `pageSize`, will be returned.
- int32 page_size = 3
  Number of pipelines to return at once. Defaults to 256, and max is 2048.
- string page_token = 4
  Token to use to indicate where to start getting results. If unspecified, returns the first page of results.
message ListPipelinesResponse
pipelines.proto:300
The response of ListPipelines. Contains at most `pageSize` pipelines. If it contains `pageSize` pipelines, and more pipelines exist, then `nextPageToken` will be populated and should be used as the `pageToken` argument to a subsequent ListPipelines request.
- repeated Pipeline pipelines = 1
  The matched pipelines.
- string next_page_token = 2
  The token to use to get the next page of results.
rpc DeletePipeline (DeletePipelineRequest, protobuf.Empty)
pipelines.proto:82
Deletes a pipeline based on ID. Caller must have WRITE permission to the project.
message DeletePipelineRequest
pipelines.proto:309
The request to delete a saved pipeline by ID.
- string pipeline_id = 1
  Caller must have WRITE access to the project in which this pipeline is defined.
rpc GetControllerConfig (GetControllerConfigRequest, ControllerConfig)
pipelines.proto:90
Gets controller configuration information. Should only be called by VMs created by the Pipelines Service and not by end users.
message GetControllerConfigRequest
pipelines.proto:317
Request to get controller configuation. Should only be used by VMs created by the Pipelines Service and not by end users.
- string operation_id = 1
  The operation to retrieve controller configuration for.
- uint64 validation_token = 2
message ControllerConfig
pipelines.proto:327
Stores the information that the controller will fetch from the server in order to run. Should only be used by VMs created by the Pipelines Service and not by end users.
- string image = 1
- string cmd = 2
- string gcs_log_path = 3
- string machine_type = 4
- map<string, string> vars = 5
- map<string, string> disks = 6
- map<string, ControllerConfig.RepeatedString> gcs_sources = 7
- map<string, ControllerConfig.RepeatedString> gcs_sinks = 8
rpc SetOperationStatus (SetOperationStatusRequest, protobuf.Empty)
pipelines.proto:100
Sets status of a given operation. Any new timestamps (as determined by description) are appended to TimestampEvents. Should only be called by VMs created by the Pipelines Service and not by end users.
message SetOperationStatusRequest
pipelines.proto:361
Request to set operation status. Should only be used by VMs created by the Pipelines Service and not by end users.
- string operation_id = 1
- repeated TimestampEvent timestamp_events = 2
- rpc.Code error_code = 3
- string error_message = 4
- uint64 validation_token = 5

Describes a Compute Engine resource that is being managed by a running [pipeline][google.genomics.v1alpha2.Pipeline].

Used in: RuntimeMetadata

string instance_name = 1
The instance on which the operation is running.
string zone = 2
The availability zone in which the instance resides.
string machine_type = 3
The machine type of the instance.
repeated string disk_names = 4
The names of the disks that were created for this pipeline.

Used in: ControllerConfig

repeated string values = 1

The Docker execuctor specification.

Used in: Pipeline

string image_name = 1
Required. Image name from either Docker Hub or Google Container Registry. Users that run pipelines must have READ access to the image.
string cmd = 2
Required. The command or newline delimited script to run. The command string will be executed within a bash shell. If the command exits with a non-zero exit code, output parameter de-localization will be skipped and the pipeline operation's [`error`][google.longrunning.Operation.error] field will be populated. Maximum command string length is 16384.

The logging options for the pipeline run.

Used in: RunPipelineArgs

string gcs_path = 1
The location in Google Cloud Storage to which the pipeline logs will be copied. Can be specified as a fully qualified directory path, in which case logs will be output with a unique identifier as the filename in that directory, or as a fully specified path, which must end in `.log`, in which case that path will be used, and the user must ensure that logs are not overwritten. Stdout and stderr logs from the run are also generated and output as `-stdout.log` and `-stderr.log`.

The pipeline object. Represents a transformation from a set of input parameters to a set of output parameters. The transformation is defined as a docker image and command to run within that image. Each pipeline is run on a Google Compute Engine VM. A pipeline can be created with the `create` method and then later run with the `run` method, or a pipeline can be defined and run all at once with the `run` method.

Used as response type in: PipelinesV1Alpha2.CreatePipeline, PipelinesV1Alpha2.GetPipeline

Used as field type in: CreatePipelineRequest, ListPipelinesResponse, RunPipelineRequest

string project_id = 1
Required. The project in which to create the pipeline. The caller must have WRITE access.
string name = 2
Required. A user specified pipeline name that does not have to be unique. This name can be used for filtering Pipelines in ListPipelines.
string description = 3
User-specified description.
repeated PipelineParameter input_parameters = 8
Input parameters of the pipeline.
repeated PipelineParameter output_parameters = 9
Output parameters of the pipeline.
oneof executor
Required. The executor indicates in which environment the pipeline runs.
- DockerExecutor docker = 5
  Specifies the docker run information.
optional PipelineResources resources = 6
Required. Specifies resource requirements for the pipeline run. Required fields: * [minimumCpuCores][google.genomics.v1alpha2.PipelineResources.minimum_cpu_cores] * [minimumRamGb][google.genomics.v1alpha2.PipelineResources.minimum_ram_gb]
string pipeline_id = 7
Unique pipeline id that is generated by the service when CreatePipeline is called. Cannot be specified in the Pipeline used in the CreatePipelineRequest, and will be populated in the response to CreatePipeline and all subsequent Get and List calls. Indicates that the service has registered this pipeline.

Parameters facilitate setting and delivering data into the pipeline's execution environment. They are defined at create time, with optional defaults, and can be overridden at run time. If `localCopy` is unset, then the parameter specifies a string that is passed as-is into the pipeline, as the value of the environment variable with the given name. A default value can be optionally specified at create time. The default can be overridden at run time using the inputs map. If no default is given, a value must be supplied at runtime. If `localCopy` is defined, then the parameter specifies a data source or sink, both in Google Cloud Storage and on the Docker container where the pipeline computation is run. The [service account associated with the Pipeline][google.genomics.v1alpha2.RunPipelineArgs.service_account] (by default the project's Compute Engine service account) must have access to the Google Cloud Storage paths. At run time, the Google Cloud Storage paths can be overridden if a default was provided at create time, or must be set otherwise. The pipeline runner should add a key/value pair to either the inputs or outputs map. The indicated data copies will be carried out before/after pipeline execution, just as if the corresponding arguments were provided to `gsutil cp`. For example: Given the following `PipelineParameter`, specified in the `inputParameters` list: ``` {name: "input_file", localCopy: {path: "file.txt", disk: "pd1"}} ``` where `disk` is defined in the `PipelineResources` object as: ``` {name: "pd1", mountPoint: "/mnt/disk/"} ``` We create a disk named `pd1`, mount it on the host VM, and map `/mnt/pd1` to `/mnt/disk` in the docker container. At runtime, an entry for `input_file` would be required in the inputs map, such as: ``` inputs["input_file"] = "gs://my-bucket/bar.txt" ``` This would generate the following gsutil call: ``` gsutil cp gs://my-bucket/bar.txt /mnt/pd1/file.txt ``` The file `/mnt/pd1/file.txt` maps to `/mnt/disk/file.txt` in the Docker container. Acceptable paths are: <table> <thead> <tr><th>Google Cloud storage path</th><th>Local path</th></tr> </thead> <tbody> <tr><td>file</td><td>file</td></tr> <tr><td>glob</td><td>directory</td></tr> </tbody> </table> For outputs, the direction of the copy is reversed: ``` gsutil cp /mnt/disk/file.txt gs://my-bucket/bar.txt ``` Acceptable paths are: <table> <thead> <tr><th>Local path</th><th>Google Cloud Storage path</th></tr> </thead> <tbody> <tr><td>file</td><td>file</td></tr> <tr> <td>file</td> <td>directory - directory must already exist</td> </tr> <tr> <td>glob</td> <td>directory - directory will be created if it doesn't exist</td></tr> </tbody> </table> One restriction due to docker limitations, is that for outputs that are found on the boot disk, the local path cannot be a glob and must be a file.

Used in: Pipeline

string name = 1
Required. Name of the parameter - the pipeline runner uses this string as the key to the input and output maps in RunPipeline.
string description = 2
Human-readable description.
string default_value = 5
The default value for this parameter. Can be overridden at runtime. If `localCopy` is present, then this must be a Google Cloud Storage path beginning with `gs://`.
optional PipelineParameter.LocalCopy local_copy = 6
If present, this parameter is marked for copying to and from the VM. `LocalCopy` indicates where on the VM the file should be. The value given to this parameter (either at runtime or using `defaultValue`) must be the remote path where the file should be.

LocalCopy defines how a remote file should be copied to and from the VM.

Used in: PipelineParameter

string path = 1
Required. The path within the user's docker container where this input should be localized to and from, relative to the specified disk's mount point. For example: file.txt,
string disk = 2
Required. The name of the disk where this parameter is located. Can be the name of one of the disks specified in the Resources field, or "boot", which represents the Docker instance's boot disk and has a mount point of `/`.

The system resources for the pipeline run.

Used in: Pipeline, RunPipelineArgs

int32 minimum_cpu_cores = 1
The minimum number of cores to use. Defaults to 1.
bool preemptible = 2
Whether to use preemptible VMs. Defaults to `false`. In order to use this, must be true for both create time and run time. Cannot be true at run time if false at create time.
double minimum_ram_gb = 3
The minimum amount of RAM to use. Defaults to 3.75 (GB)
repeated PipelineResources.Disk disks = 4
Disks to attach.
repeated string zones = 5
List of Google Compute Engine availability zones to which resource creation will restricted. If empty, any zone may be chosen.
int32 boot_disk_size_gb = 6
The size of the boot disk. Defaults to 10 (GB).
bool no_address = 7
Whether to assign an external IP to the instance. This is an experimental feature that may go away. Defaults to false. Corresponds to `--no_address` flag for [gcloud compute instances create] (https://cloud.google.com/sdk/gcloud/reference/compute/instances/create). In order to use this, must be true for both create time and run time. Cannot be true at run time if false at create time. If you need to ssh into a private IP VM for debugging, you can ssh to a public VM and then ssh into the private VM's Internal IP. If noAddress is set, this pipeline run may only load docker images from Google Container Registry and not Docker Hub. ** Note: To use this option, your project must be in Google Access for Private IPs Early Access Program.**

A Google Compute Engine disk resource specification.

Used in: PipelineResources

string name = 1
Required. The name of the disk that can be used in the pipeline parameters. Must be 1 - 63 characters. The name "boot" is reserved for system use.
Disk.Type type = 2
Required. The type of the disk to create.
int32 size_gb = 3
The size of the disk. Defaults to 500 (GB). This field is not applicable for local SSD.
string source = 4
The full or partial URL of the persistent disk to attach. See https://cloud.google.com/compute/docs/reference/latest/instances#resource and https://cloud.google.com/compute/docs/disks/persistent-disks#snapshots for more details.
bool auto_delete = 6
Deprecated. Disks created by the Pipelines API will be deleted at the end of the pipeline run, regardless of what this field is set to.
string mount_point = 8
Required at create time and cannot be overridden at run time. Specifies the path in the docker container where files on this disk should be located. For example, if `mountPoint` is `/mnt/disk`, and the parameter has `localPath` `inputs/file.txt`, the docker container can access the data at `/mnt/disk/inputs/file.txt`.

The types of disks that may be attached to VMs.

Used in: Disk

TYPE_UNSPECIFIED = 0
Default disk type. Use one of the other options below.
PERSISTENT_HDD = 1
Specifies a Google Compute Engine persistent hard disk. See https://cloud.google.com/compute/docs/disks/#pdspecs for details.
PERSISTENT_SSD = 2
Specifies a Google Compute Engine persistent solid-state disk. See https://cloud.google.com/compute/docs/disks/#pdspecs for details.
LOCAL_SSD = 3
Specifies a Google Compute Engine local SSD. See https://cloud.google.com/compute/docs/disks/local-ssd for details.

The pipeline run arguments.

Used in: RunPipelineRequest

string project_id = 1
Required. The project in which to run the pipeline. The caller must have WRITER access to all Google Cloud services and resources (e.g. Google Compute Engine) will be used.
map<string, string> inputs = 2
Pipeline input arguments; keys are defined in the pipeline documentation. All input parameters that do not have default values must be specified. If parameters with defaults are specified here, the defaults will be overridden.
map<string, string> outputs = 3
Pipeline output arguments; keys are defined in the pipeline documentation. All output parameters of without default values must be specified. If parameters with defaults are specified here, the defaults will be overridden.
optional ServiceAccount service_account = 4
The Google Cloud Service Account that will be used to access data and services. By default, the compute service account associated with `projectId` is used.
string client_id = 5
This field is deprecated. Use `labels` instead. Client-specified pipeline operation identifier.
optional PipelineResources resources = 6
Specifies resource requirements/overrides for the pipeline run.
optional LoggingOptions logging = 7
Required. Logging options. Used by the service to communicate results to the user.
optional protobuf.Duration keep_vm_alive_on_failure_duration = 8
How long to keep the VM up after a failure (for example docker command failed, copying input or output files failed, etc). While the VM is up, one can ssh into the VM to debug. Default is 0; maximum allowed value is 1 day.
map<string, string> labels = 9
Labels to apply to this pipeline run. Labels will also be applied to compute resources (VM, disks) created by this pipeline run. When listing operations, operations can [filtered by labels] [google.longrunning.ListOperationsRequest.filter]. Label keys may not be empty; label values may be empty. Non-empty labels must be 1-63 characters long, and comply with [RFC1035] (https://www.ietf.org/rfc/rfc1035.txt). Specifically, the name must be 1-63 characters long and match the regular expression `[a-z]([-a-z0-9]*[a-z0-9])?` which means the first character must be a lowercase letter, and all following characters must be a dash, lowercase letter, or digit, except the last character, which cannot be a dash.

Runtime metadata that will be populated in the [runtimeMetadata][google.genomics.v1.OperationMetadata.runtime_metadata] field of the Operation associated with a RunPipeline execution.

optional ComputeEngine compute_engine = 1
Execution information specific to Google Compute Engine.

A Google Cloud Service Account.

Used in: RunPipelineArgs

string email = 1
Email address of the service account. Defaults to `default`, which uses the compute service account associated with the project.
repeated string scopes = 2
List of scopes to be enabled for this service account on the VM. The following scopes are automatically included: * https://www.googleapis.com/auth/compute * https://www.googleapis.com/auth/devstorage.full_control * https://www.googleapis.com/auth/genomics * https://www.googleapis.com/auth/logging.write * https://www.googleapis.com/auth/monitoring.write

Stores the list of events and times they occured for major events in job execution.

Used in: SetOperationStatusRequest

string description = 1
String indicating the type of event
optional protobuf.Timestamp timestamp = 2
The time this event occured.

package google.genomics.v1alpha2

service PipelinesV1Alpha2

rpc CreatePipeline (CreatePipelineRequest, Pipeline)

message CreatePipelineRequest

optional Pipeline pipeline = 1

rpc RunPipeline (RunPipelineRequest, longrunning.Operation)

message RunPipelineRequest

oneof pipeline

string pipeline_id = 1

Pipeline ephemeral_pipeline = 2

optional RunPipelineArgs pipeline_args = 3

rpc GetPipeline (GetPipelineRequest, Pipeline)

message GetPipelineRequest

string pipeline_id = 1

rpc ListPipelines (ListPipelinesRequest, ListPipelinesResponse)

message ListPipelinesRequest

string project_id = 1

string name_prefix = 2

int32 page_size = 3

string page_token = 4

message ListPipelinesResponse

repeated Pipeline pipelines = 1

string next_page_token = 2

rpc DeletePipeline (DeletePipelineRequest, protobuf.Empty)

message DeletePipelineRequest

string pipeline_id = 1

rpc GetControllerConfig (GetControllerConfigRequest, ControllerConfig)

message GetControllerConfigRequest

string operation_id = 1

uint64 validation_token = 2

message ControllerConfig

string image = 1

string cmd = 2

string gcs_log_path = 3

string machine_type = 4

map<string, string> vars = 5

map<string, string> disks = 6

map<string, ControllerConfig.RepeatedString> gcs_sources = 7

map<string, ControllerConfig.RepeatedString> gcs_sinks = 8

rpc SetOperationStatus (SetOperationStatusRequest, protobuf.Empty)

message SetOperationStatusRequest

string operation_id = 1

repeated TimestampEvent timestamp_events = 2

rpc.Code error_code = 3

string error_message = 4

uint64 validation_token = 5

message ComputeEngine

string instance_name = 1

string zone = 2

string machine_type = 3

repeated string disk_names = 4

message ControllerConfig.RepeatedString

repeated string values = 1

message DockerExecutor

string image_name = 1

string cmd = 2

message LoggingOptions

string gcs_path = 1

message Pipeline

string project_id = 1

string name = 2

string description = 3

repeated PipelineParameter input_parameters = 8

repeated PipelineParameter output_parameters = 9

oneof executor

DockerExecutor docker = 5

optional PipelineResources resources = 6

string pipeline_id = 7

message PipelineParameter

string name = 1

string description = 2

string default_value = 5

optional PipelineParameter.LocalCopy local_copy = 6

message PipelineParameter.LocalCopy

string path = 1

string disk = 2

message PipelineResources

int32 minimum_cpu_cores = 1

bool preemptible = 2

double minimum_ram_gb = 3