package google.genomics.v1

Get desktop application:
View/edit binary Protocol Buffers messages

This service provides storage and positional retrieval of genomic reference annotations, including variant annotations.

rpc CreateAnnotationSet (CreateAnnotationSetRequest, AnnotationSet)
annotations.proto:45
Creates a new annotation set. Caller must have WRITE permission for the associated dataset. The following fields are required: * [datasetId][google.genomics.v1.AnnotationSet.dataset_id] * [referenceSetId][google.genomics.v1.AnnotationSet.reference_set_id] All other fields may be optionally specified, unless documented as being server-generated (for example, the `id` field).
message CreateAnnotationSetRequest
annotations.proto:506
- optional AnnotationSet annotation_set = 1
  The annotation set to create.
rpc GetAnnotationSet (GetAnnotationSetRequest, AnnotationSet)
annotations.proto:54
Gets an annotation set. Caller must have READ permission for the associated dataset.
message GetAnnotationSetRequest
annotations.proto:511
- string annotation_set_id = 1
  The ID of the annotation set to be retrieved.
rpc UpdateAnnotationSet (UpdateAnnotationSetRequest, AnnotationSet)
annotations.proto:63
Updates an annotation set. The update must respect all mutability restrictions and other invariants described on the annotation set resource. Caller must have WRITE permission for the associated dataset.
message UpdateAnnotationSetRequest
annotations.proto:516
- string annotation_set_id = 1
  The ID of the annotation set to be updated.
- optional AnnotationSet annotation_set = 2
  The new annotation set.
- optional protobuf.FieldMask update_mask = 3
  An optional mask specifying which fields to update. Mutable fields are [name][google.genomics.v1.AnnotationSet.name], [source_uri][google.genomics.v1.AnnotationSet.source_uri], and [info][google.genomics.v1.AnnotationSet.info]. If unspecified, all mutable fields will be updated.
rpc DeleteAnnotationSet (DeleteAnnotationSetRequest, protobuf.Empty)
annotations.proto:72
Deletes an annotation set. Caller must have WRITE permission for the associated annotation set.
message DeleteAnnotationSetRequest
annotations.proto:531
- string annotation_set_id = 1
  The ID of the annotation set to be deleted.
rpc SearchAnnotationSets (SearchAnnotationSetsRequest, SearchAnnotationSetsResponse)
annotations.proto:84
Searches for annotation sets that match the given criteria. Annotation sets are returned in an unspecified order. This order is consistent, such that two queries for the same content (regardless of page size) yield annotation sets in the same order across their respective streams of paginated responses. Caller must have READ permission for the queried datasets.
message SearchAnnotationSetsRequest
annotations.proto:536
- repeated string dataset_ids = 1
  Required. The dataset IDs to search within. Caller must have `READ` access to these datasets.
- string reference_set_id = 2
  If specified, only annotation sets associated with the given reference set are returned.
- string name = 3
  Only return annotations sets for which a substring of the name matches this string (case insensitive).
- repeated AnnotationType types = 4
  If specified, only annotation sets that have any of these types are returned.
- string page_token = 5
  The continuation token, which is used to page through large result sets. To get the next page of results, set this parameter to the value of `nextPageToken` from the previous response.
- int32 page_size = 6
  The maximum number of results to return in a single page. If unspecified, defaults to 128. The maximum value is 1024.
message SearchAnnotationSetsResponse
annotations.proto:563
- repeated AnnotationSet annotation_sets = 1
  The matching annotation sets.
- string next_page_token = 2
  The continuation token, which is used to page through large result sets. Provide this value in a subsequent request to return the next page of results. This field will be empty if there aren't any additional results.
rpc CreateAnnotation (CreateAnnotationRequest, Annotation)
annotations.proto:114
Creates a new annotation. Caller must have WRITE permission for the associated annotation set. The following fields are required: * [annotationSetId][google.genomics.v1.Annotation.annotation_set_id] * [referenceName][google.genomics.v1.Annotation.reference_name] or [referenceId][google.genomics.v1.Annotation.reference_id] ### Transcripts For annotations of type TRANSCRIPT, the following fields of [transcript][google.genomics.v1.Annotation.transcript] must be provided: * [exons.start][google.genomics.v1.Transcript.Exon.start] * [exons.end][google.genomics.v1.Transcript.Exon.end] All other fields may be optionally specified, unless documented as being server-generated (for example, the `id` field). The annotated range must be no longer than 100Mbp (mega base pairs). See the [Annotation resource][google.genomics.v1.Annotation] for additional restrictions on each field.
message CreateAnnotationRequest
annotations.proto:573
- optional Annotation annotation = 1
  The annotation to be created.
rpc BatchCreateAnnotations (BatchCreateAnnotationsRequest, BatchCreateAnnotationsResponse)
annotations.proto:135
Creates one or more new annotations atomically. All annotations must belong to the same annotation set. Caller must have WRITE permission for this annotation set. For optimal performance, batch positionally adjacent annotations together. If the request has a systemic issue, such as an attempt to write to an inaccessible annotation set, the entire RPC will fail accordingly. For lesser data issues, when possible an error will be isolated to the corresponding batch entry in the response; the remaining well formed annotations will be created normally. For details on the requirements for each individual annotation resource, see [CreateAnnotation][google.genomics.v1.AnnotationServiceV1.CreateAnnotation].
message BatchCreateAnnotationsRequest
annotations.proto:578
- repeated Annotation annotations = 1
  The annotations to be created. At most 4096 can be specified in a single request.
- string request_id = 2
  A unique request ID which enables the server to detect duplicated requests. If provided, duplicated requests will result in the same response; if not provided, duplicated requests may result in duplicated data. For a given annotation set, callers should not reuse `request_id`s when writing different batches of annotations - behavior in this case is undefined. A common approach is to use a UUID. For batch jobs where worker crashes are a possibility, consider using some unique variant of a worker or run ID.
message BatchCreateAnnotationsResponse
annotations.proto:593
- repeated BatchCreateAnnotationsResponse.Entry entries = 1
  The resulting per-annotation entries, ordered consistently with the original request.
rpc GetAnnotation (GetAnnotationRequest, Annotation)
annotations.proto:145
Gets an annotation. Caller must have READ permission for the associated annotation set.
message GetAnnotationRequest
annotations.proto:607
- string annotation_id = 1
  The ID of the annotation to be retrieved.
rpc UpdateAnnotation (UpdateAnnotationRequest, Annotation)
annotations.proto:153
Updates an annotation. Caller must have WRITE permission for the associated dataset.
message UpdateAnnotationRequest
annotations.proto:612
- string annotation_id = 1
  The ID of the annotation to be updated.
- optional Annotation annotation = 2
  The new annotation.
- optional protobuf.FieldMask update_mask = 3
  An optional mask specifying which fields to update. Mutable fields are [name][google.genomics.v1.Annotation.name], [variant][google.genomics.v1.Annotation.variant], [transcript][google.genomics.v1.Annotation.transcript], and [info][google.genomics.v1.Annotation.info]. If unspecified, all mutable fields will be updated.
rpc DeleteAnnotation (DeleteAnnotationRequest, protobuf.Empty)
annotations.proto:162
Deletes an annotation. Caller must have WRITE permission for the associated annotation set.
message DeleteAnnotationRequest
annotations.proto:628
- string annotation_id = 1
  The ID of the annotation to be deleted.
rpc SearchAnnotations (SearchAnnotationsRequest, SearchAnnotationsResponse)
annotations.proto:176
Searches for annotations that match the given criteria. Results are ordered by genomic coordinate (by reference sequence, then position). Annotations with equivalent genomic coordinates are returned in an unspecified order. This order is consistent, such that two queries for the same content (regardless of page size) yield annotations in the same order across their respective streams of paginated responses. Caller must have READ permission for the queried annotation sets.
message SearchAnnotationsRequest
annotations.proto:633
- repeated string annotation_set_ids = 1
  Required. The annotation sets to search within. The caller must have `READ` access to these annotation sets. All queried annotation sets must have the same type.
- oneof reference
  Required. `reference_id` or `reference_name` must be set.
  - string reference_id = 2
    The ID of the reference to query.
  - string reference_name = 3
    The name of the reference to query, within the reference set associated with this query.
- int64 start = 4
  The start position of the range on the reference, 0-based inclusive. If specified, [referenceId][google.genomics.v1.SearchAnnotationsRequest.reference_id] or [referenceName][google.genomics.v1.SearchAnnotationsRequest.reference_name] must be specified. Defaults to 0.
- int64 end = 5
  The end position of the range on the reference, 0-based exclusive. If [referenceId][google.genomics.v1.SearchAnnotationsRequest.reference_id] or [referenceName][google.genomics.v1.SearchAnnotationsRequest.reference_name] must be specified, Defaults to the length of the reference.
- string page_token = 6
  The continuation token, which is used to page through large result sets. To get the next page of results, set this parameter to the value of `nextPageToken` from the previous response.
- int32 page_size = 7
  The maximum number of results to return in a single page. If unspecified, defaults to 256. The maximum value is 2048.
message SearchAnnotationsResponse
annotations.proto:672
- repeated Annotation annotations = 1
  The matching annotations.
- string next_page_token = 2
  The continuation token, which is used to page through large result sets. Provide this value in a subsequent request to return the next page of results. This field will be empty if there aren't any additional results.

This service manages datasets, which are collections of genomic data.

rpc ListDatasets (ListDatasetsRequest, ListDatasetsResponse)
datasets.proto:39
Lists datasets within a project. For the definitions of datasets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
message ListDatasetsRequest
datasets.proto:181
The dataset list request.
- string project_id = 1
  Required. The Google Cloud project ID to list datasets for.
- int32 page_size = 2
  The maximum number of results to return in a single page. If unspecified, defaults to 50. The maximum value is 1024.
- string page_token = 3
  The continuation token, which is used to page through large result sets. To get the next page of results, set this parameter to the value of `nextPageToken` from the previous response.
message ListDatasetsResponse
datasets.proto:196
The dataset list response.
- repeated Dataset datasets = 1
  The list of matching Datasets.
- string next_page_token = 2
  The continuation token, which is used to page through large result sets. Provide this value in a subsequent request to return the next page of results. This field will be empty if there aren't any additional results.
rpc CreateDataset (CreateDatasetRequest, Dataset)
datasets.proto:50
Creates a new dataset. For the definitions of datasets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
message CreateDatasetRequest
datasets.proto:206
- optional Dataset dataset = 1
  The dataset to be created. Must contain projectId and name.
rpc GetDataset (GetDatasetRequest, Dataset)
datasets.proto:62
Gets a dataset by ID. For the definitions of datasets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
message GetDatasetRequest
datasets.proto:235
- string dataset_id = 1
  The ID of the dataset.
rpc UpdateDataset (UpdateDatasetRequest, Dataset)
datasets.proto:75
Updates a dataset. For the definitions of datasets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) This method supports patch semantics.
message UpdateDatasetRequest
datasets.proto:211
- string dataset_id = 1
  The ID of the dataset to be updated.
- optional Dataset dataset = 2
  The new dataset data.
- optional protobuf.FieldMask update_mask = 3
  An optional mask specifying which fields to update. At this time, the only mutable field is [name][google.genomics.v1.Dataset.name]. The only acceptable value is "name". If unspecified, all mutable fields will be updated.
rpc DeleteDataset (DeleteDatasetRequest, protobuf.Empty)
datasets.proto:92
Deletes a dataset and all of its contents (all read group sets, reference sets, variant sets, call sets, annotation sets, etc.) This is reversible (up to one week after the deletion) via the [datasets.undelete][google.genomics.v1.DatasetServiceV1.UndeleteDataset] operation. For the definitions of datasets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
message DeleteDatasetRequest
datasets.proto:225
- string dataset_id = 1
  The ID of the dataset to be deleted.
rpc UndeleteDataset (UndeleteDatasetRequest, Dataset)
datasets.proto:105
Undeletes a dataset by restoring a dataset which was deleted via this API. For the definitions of datasets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) This operation is only possible for a week after the deletion occurred.
message UndeleteDatasetRequest
datasets.proto:230
- string dataset_id = 1
  The ID of the dataset to be undeleted.
rpc SetIamPolicy (iam.v1.SetIamPolicyRequest, iam.v1.Policy)
datasets.proto:121
Sets the access control policy on the specified dataset. Replaces any existing policy. For the definitions of datasets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) See <a href="/iam/docs/managing-policies#setting_a_policy">Setting a Policy</a> for more information.
rpc GetIamPolicy (iam.v1.GetIamPolicyRequest, iam.v1.Policy)
datasets.proto:138
Gets the access control policy for the dataset. This is empty if the policy or resource does not exist. See <a href="/iam/docs/managing-policies#getting_a_policy">Getting a Policy</a> for more information. For the definitions of datasets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
rpc TestIamPermissions (iam.v1.TestIamPermissionsRequest, iam.v1.TestIamPermissionsResponse)
datasets.proto:153
Returns permissions that a caller has on the specified resource. See <a href="/iam/docs/managing-policies#testing_permissions">Testing Permissions</a> for more information. For the definitions of datasets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)

The Readstore. A data store for DNA sequencing Reads.

rpc ImportReadGroupSets (ImportReadGroupSetsRequest, longrunning.Operation)
reads.proto:64
Creates read group sets by asynchronously importing the provided information. For the definitions of read group sets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) The caller must have WRITE permissions to the dataset. ## Notes on [BAM](https://samtools.github.io/hts-specs/SAMv1.pdf) import - Tags will be converted to strings - tag types are not preserved - Comments (`@CO`) in the input file header will not be preserved - Original header order of references (`@SQ`) will not be preserved - Any reverse stranded unmapped reads will be reverse complemented, and their qualities (also the "BQ" and "OQ" tags, if any) will be reversed - Unmapped reads will be stripped of positional information (reference name and position)
message ImportReadGroupSetsRequest
reads.proto:227
The read group set import request.
- string dataset_id = 1
  Required. The ID of the dataset these read group sets will belong to. The caller must have WRITE permissions to this dataset.
- string reference_set_id = 4
  The reference set to which the imported read group sets are aligned to, if any. The reference names of this reference set must be a superset of those found in the imported file headers. If no reference set id is provided, a best effort is made to associate with a matching reference set.
- repeated string source_uris = 2
  A list of URIs pointing at [BAM files](https://samtools.github.io/hts-specs/SAMv1.pdf) in Google Cloud Storage. Those URIs can include wildcards (*), but do not add or remove matching files before import has completed. Note that Google Cloud Storage object listing is only eventually consistent: files added may be not be immediately visible to everyone. Thus, if using a wildcard it is preferable not to start the import immediately after the files are created.
- ImportReadGroupSetsRequest.PartitionStrategy partition_strategy = 5
  The partition strategy describes how read groups are partitioned into read group sets.
rpc ExportReadGroupSet (ExportReadGroupSetRequest, longrunning.Operation)
reads.proto:82
Exports a read group set to a BAM file in Google Cloud Storage. For the definitions of read group sets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) Note that currently there may be some differences between exported BAM files and the original BAM file at the time of import. See [ImportReadGroupSets][google.genomics.v1.ReadServiceV1.ImportReadGroupSets] for caveats.
message ExportReadGroupSetRequest
reads.proto:281
The read group set export request.
- string project_id = 1
  Required. The Google Cloud project ID that owns this export. The caller must have WRITE access to this project.
- string export_uri = 2
  Required. A Google Cloud Storage URI for the exported BAM file. The currently authenticated user must have write access to the new file. An error will be returned if the URI already contains data.
- string read_group_set_id = 3
  Required. The ID of the read group set to export. The caller must have READ access to this read group set.
- repeated string reference_names = 4
  The reference names to export. If this is not specified, all reference sequences, including unmapped reads, are exported. Use `*` to export only unmapped reads.
rpc SearchReadGroupSets (SearchReadGroupSetsRequest, SearchReadGroupSetsResponse)
reads.proto:98
Searches for read group sets matching the criteria. For the definitions of read group sets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) Implements [GlobalAllianceApi.searchReadGroupSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/readmethods.avdl#L135).
message SearchReadGroupSetsRequest
reads.proto:196
The read group set search request.
- repeated string dataset_ids = 1
  Restricts this query to read group sets within the given datasets. At least one ID must be provided.
- string name = 3
  Only return read group sets for which a substring of the name matches this string.
- string page_token = 2
  The continuation token, which is used to page through large result sets. To get the next page of results, set this parameter to the value of `nextPageToken` from the previous response.
- int32 page_size = 4
  The maximum number of results to return in a single page. If unspecified, defaults to 256. The maximum value is 1024.
message SearchReadGroupSetsResponse
reads.proto:216
The read group set search response.
- repeated ReadGroupSet read_group_sets = 1
  The list of matching read group sets.
- string next_page_token = 2
  The continuation token, which is used to page through large result sets. Provide this value in a subsequent request to return the next page of results. This field will be empty if there aren't any additional results.
rpc UpdateReadGroupSet (UpdateReadGroupSetRequest, ReadGroupSet)
reads.proto:113
Updates a read group set. For the definitions of read group sets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) This method supports patch semantics.
message UpdateReadGroupSetRequest
reads.proto:301
- string read_group_set_id = 1
  The ID of the read group set to be updated. The caller must have WRITE permissions to the dataset associated with this read group set.
- optional ReadGroupSet read_group_set = 2
  The new read group set data. See `updateMask` for details on mutability of fields.
- optional protobuf.FieldMask update_mask = 3
  An optional mask specifying which fields to update. Supported fields: * [name][google.genomics.v1.ReadGroupSet.name]. * [referenceSetId][google.genomics.v1.ReadGroupSet.reference_set_id]. Leaving `updateMask` unset is equivalent to specifying all mutable fields.
rpc DeleteReadGroupSet (DeleteReadGroupSetRequest, protobuf.Empty)
reads.proto:125
Deletes a read group set. For the definitions of read group sets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
message DeleteReadGroupSetRequest
reads.proto:320
- string read_group_set_id = 1
  The ID of the read group set to be deleted. The caller must have WRITE permissions to the dataset associated with this read group set.
rpc GetReadGroupSet (GetReadGroupSetRequest, ReadGroupSet)
reads.proto:137
Gets a read group set by ID. For the definitions of read group sets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
message GetReadGroupSetRequest
reads.proto:326
- string read_group_set_id = 1
  The ID of the read group set.
rpc ListCoverageBuckets (ListCoverageBucketsRequest, ListCoverageBucketsResponse)
reads.proto:156
Lists fixed width coverage buckets for a read group set, each of which correspond to a range of a reference sequence. Each bucket summarizes coverage information across its corresponding genomic range. For the definitions of read group sets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) Coverage is defined as the number of reads which are aligned to a given base in the reference sequence. Coverage buckets are available at several precomputed bucket widths, enabling retrieval of various coverage 'zoom levels'. The caller must have READ permissions for the target read group set.
message ListCoverageBucketsRequest
reads.proto:331
- string read_group_set_id = 1
  Required. The ID of the read group set over which coverage is requested.
- string reference_name = 3
  The name of the reference to query, within the reference set associated with this query. Optional.
- int64 start = 4
  The start position of the range on the reference, 0-based inclusive. If specified, `referenceName` must also be specified. Defaults to 0.
- int64 end = 5
  The end position of the range on the reference, 0-based exclusive. If specified, `referenceName` must also be specified. If unset or 0, defaults to the length of the reference.
- int64 target_bucket_width = 6
  The desired width of each reported coverage bucket in base pairs. This will be rounded down to the nearest precomputed bucket width; the value of which is returned as `bucketWidth` in the response. Defaults to infinity (each bucket spans an entire reference sequence) or the length of the target range, if specified. The smallest precomputed `bucketWidth` is currently 2048 base pairs; this is subject to change.
- string page_token = 7
  The continuation token, which is used to page through large result sets. To get the next page of results, set this parameter to the value of `nextPageToken` from the previous response.
- int32 page_size = 8
  The maximum number of results to return in a single page. If unspecified, defaults to 1024. The maximum value is 2048.
message ListCoverageBucketsResponse
reads.proto:378
- int64 bucket_width = 1
  The length of each coverage bucket in base pairs. Note that buckets at the end of a reference sequence may be shorter. This value is omitted if the bucket width is infinity (the default behaviour, with no range or `targetBucketWidth`).
- repeated CoverageBucket coverage_buckets = 2
  The coverage buckets. The list of buckets is sparse; a bucket with 0 overlapping reads is not returned. A bucket never crosses more than one reference sequence. Each bucket has width `bucketWidth`, unless its end is the end of the reference sequence.
- string next_page_token = 3
  The continuation token, which is used to page through large result sets. Provide this value in a subsequent request to return the next page of results. This field will be empty if there aren't any additional results.
rpc SearchReads (SearchReadsRequest, SearchReadsResponse)
reads.proto:187
Gets a list of reads for one or more read group sets. For the definitions of read group sets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) Reads search operates over a genomic coordinate space of reference sequence & position defined over the reference sequences to which the requested read group sets are aligned. If a target positional range is specified, search returns all reads whose alignment to the reference genome overlap the range. A query which specifies only read group set IDs yields all reads in those read group sets, including unmapped reads. All reads returned (including reads on subsequent pages) are ordered by genomic coordinate (by reference sequence, then position). Reads with equivalent genomic coordinates are returned in an unspecified order. This order is consistent, such that two queries for the same content (regardless of page size) yield reads in the same order across their respective streams of paginated responses. Implements [GlobalAllianceApi.searchReads](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/readmethods.avdl#L85).
message SearchReadsRequest
reads.proto:398
The read search request.
- repeated string read_group_set_ids = 1
  The IDs of the read groups sets within which to search for reads. All specified read group sets must be aligned against a common set of reference sequences; this defines the genomic coordinates for the query. Must specify one of `readGroupSetIds` or `readGroupIds`.
- repeated string read_group_ids = 5
  The IDs of the read groups within which to search for reads. All specified read groups must belong to the same read group sets. Must specify one of `readGroupSetIds` or `readGroupIds`.
- string reference_name = 7
  The reference sequence name, for example `chr1`, `1`, or `chrX`. If set to `*`, only unmapped reads are returned. If unspecified, all reads (mapped and unmapped) are returned.
- int64 start = 8
  The start position of the range on the reference, 0-based inclusive. If specified, `referenceName` must also be specified.
- int64 end = 9
  The end position of the range on the reference, 0-based exclusive. If specified, `referenceName` must also be specified.
- string page_token = 3
  The continuation token, which is used to page through large result sets. To get the next page of results, set this parameter to the value of `nextPageToken` from the previous response.
- int32 page_size = 4
  The maximum number of results to return in a single page. If unspecified, defaults to 256. The maximum value is 2048.
message SearchReadsResponse
reads.proto:434
The read search response.
- repeated Read alignments = 1
  The list of matching alignments sorted by mapped genomic coordinate, if any, ascending in position within the same reference. Unmapped reads, which have no position, are returned contiguously and are sorted in ascending lexicographic order by fragment name.
- string next_page_token = 2
  The continuation token, which is used to page through large result sets. Provide this value in a subsequent request to return the next page of results. This field will be empty if there aren't any additional results.

rpc SearchReferenceSets (SearchReferenceSetsRequest, SearchReferenceSetsResponse)
references.proto:36
Searches for reference sets which match the given criteria. For the definitions of references and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) Implements [GlobalAllianceApi.searchReferenceSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L71)
message SearchReferenceSetsRequest
references.proto:181
- repeated string md5checksums = 1
  If present, return reference sets for which the [md5checksum][google.genomics.v1.ReferenceSet.md5checksum] matches exactly.
- repeated string accessions = 2
  If present, return reference sets for which a prefix of any of [sourceAccessions][google.genomics.v1.ReferenceSet.source_accessions] match any of these strings. Accession numbers typically have a main number and a version, for example `NC_000001.11`.
- string assembly_id = 3
  If present, return reference sets for which a substring of their `assemblyId` matches this string (case insensitive).
- string page_token = 4
  The continuation token, which is used to page through large result sets. To get the next page of results, set this parameter to the value of `nextPageToken` from the previous response.
- int32 page_size = 5
  The maximum number of results to return in a single page. If unspecified, defaults to 1024. The maximum value is 4096.
message SearchReferenceSetsResponse
references.proto:206
- repeated ReferenceSet reference_sets = 1
  The matching references sets.
- string next_page_token = 2
  The continuation token, which is used to page through large result sets. Provide this value in a subsequent request to return the next page of results. This field will be empty if there aren't any additional results.
rpc GetReferenceSet (GetReferenceSetRequest, ReferenceSet)
references.proto:52
Gets a reference set. For the definitions of references and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) Implements [GlobalAllianceApi.getReferenceSet](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L83).
message GetReferenceSetRequest
references.proto:216
- string reference_set_id = 1
  The ID of the reference set.
rpc SearchReferences (SearchReferencesRequest, SearchReferencesResponse)
references.proto:66
Searches for references which match the given criteria. For the definitions of references and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) Implements [GlobalAllianceApi.searchReferences](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L146).
message SearchReferencesRequest
references.proto:221
- repeated string md5checksums = 1
  If present, return references for which the [md5checksum][google.genomics.v1.Reference.md5checksum] matches exactly.
- repeated string accessions = 2
  If present, return references for which a prefix of any of [sourceAccessions][google.genomics.v1.Reference.source_accessions] match any of these strings. Accession numbers typically have a main number and a version, for example `GCF_000001405.26`.
- string reference_set_id = 3
  If present, return only references which belong to this reference set.
- string page_token = 4
  The continuation token, which is used to page through large result sets. To get the next page of results, set this parameter to the value of `nextPageToken` from the previous response.
- int32 page_size = 5
  The maximum number of results to return in a single page. If unspecified, defaults to 1024. The maximum value is 4096.
message SearchReferencesResponse
references.proto:245
- repeated Reference references = 1
  The matching references.
- string next_page_token = 2
  The continuation token, which is used to page through large result sets. Provide this value in a subsequent request to return the next page of results. This field will be empty if there aren't any additional results.
rpc GetReference (GetReferenceRequest, Reference)
references.proto:82
Gets a reference. For the definitions of references and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) Implements [GlobalAllianceApi.getReference](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L158).
message GetReferenceRequest
references.proto:255
- string reference_id = 1
  The ID of the reference.
rpc ListBases (ListBasesRequest, ListBasesResponse)
references.proto:96
Lists the bases in a reference, optionally restricted to a range. For the definitions of references and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) Implements [GlobalAllianceApi.getReferenceBases](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L221).
message ListBasesRequest
references.proto:260
- string reference_id = 1
  The ID of the reference.
- int64 start = 2
  The start position (0-based) of this query. Defaults to 0.
- int64 end = 3
  The end position (0-based, exclusive) of this query. Defaults to the length of this reference.
- string page_token = 4
  The continuation token, which is used to page through large result sets. To get the next page of results, set this parameter to the value of `nextPageToken` from the previous response.
- int32 page_size = 5
  The maximum number of bases to return in a single page. If unspecified, defaults to 200Kbp (kilo base pairs). The maximum value is 10Mbp (mega base pairs).
message ListBasesResponse
references.proto:282
- int64 offset = 1
  The offset position (0-based) of the given `sequence` from the start of this `Reference`. This value will differ for each page in a paginated request.
- string sequence = 2
  A substring of the bases that make up this reference.
- string next_page_token = 3
  The continuation token, which is used to page through large result sets. Provide this value in a subsequent request to return the next page of results. This field will be empty if there aren't any additional results.

rpc StreamReads (StreamReadsRequest, stream StreamReadsResponse)
reads.proto:36
Returns a stream of all the reads matching the search request, ordered by reference name, position, and ID.
message StreamReadsRequest
reads.proto:448
The stream reads request.
- string project_id = 1
  The Google Cloud project ID which will be billed for this access. The caller must have WRITE access to this project. Required.
- string read_group_set_id = 2
  The ID of the read group set from which to stream reads.
- string reference_name = 3
  The reference sequence name, for example `chr1`, `1`, or `chrX`. If set to *, only unmapped reads are returned.
- int64 start = 4
  The start position of the range on the reference, 0-based inclusive. If specified, `referenceName` must also be specified.
- int64 end = 5
  The end position of the range on the reference, 0-based exclusive. If specified, `referenceName` must also be specified.
- int32 shard = 6
  Restricts results to a shard containing approximately `1/totalShards` of the normal response payload for this query. Results from a sharded request are disjoint from those returned by all queries which differ only in their shard parameter. A shard may yield 0 results; this is especially likely for large values of `totalShards`. Valid values are `[0, totalShards)`.
- int32 total_shards = 7
  Specifying `totalShards` causes a disjoint subset of the normal response payload to be returned for each query with a unique `shard` parameter specified. A best effort is made to yield equally sized shards. Sharding can be used to distribute processing amongst workers, where each worker is assigned a unique `shard` number and all workers specify the same `totalShards` number. The union of reads returned for all sharded queries `[0, totalShards)` is equal to those returned by a single unsharded query. Queries for different values of `totalShards` with common divisors will share shard boundaries. For example, streaming `shard` 2 of 5 `totalShards` yields the same results as streaming `shard`s 4 and 5 of 10 `totalShards`. This property can be leveraged for adaptive retries.
message StreamReadsResponse
reads.proto:494
- repeated Read alignments = 1

rpc StreamVariants (StreamVariantsRequest, stream StreamVariantsResponse)
variants.proto:34
Returns a stream of all the variants matching the search request, ordered by reference name, position, and ID.
message StreamVariantsRequest
variants.proto:913
The stream variants request.
- string project_id = 1
  The Google Cloud project ID which will be billed for this access. The caller must have WRITE access to this project. Required.
- string variant_set_id = 2
  The variant set ID from which to stream variants.
- repeated string call_set_ids = 3
  Only return variant calls which belong to call sets with these IDs. Leaving this blank returns all variant calls.
- string reference_name = 4
  Required. Only return variants in this reference sequence.
- int64 start = 5
  The beginning of the window (0-based, inclusive) for which overlapping variants should be returned.
- int64 end = 6
  The end of the window (0-based, exclusive) for which overlapping variants should be returned.
message StreamVariantsResponse
variants.proto:938
- repeated Variant variants = 1

rpc ImportVariants (ImportVariantsRequest, longrunning.Operation)
variants.proto:62
Creates variant data by asynchronously importing the provided information. For the definitions of variant sets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) The variants for import will be merged with any existing variant that matches its reference sequence, start, end, reference bases, and alternative bases. If no such variant exists, a new one will be created. When variants are merged, the call information from the new variant is added to the existing variant, and Variant info fields are merged as specified in [infoMergeConfig][google.genomics.v1.ImportVariantsRequest.info_merge_config]. As a special case, for single-sample VCF files, QUAL and FILTER fields will be moved to the call level; these are sometimes interpreted in a call-specific context. Imported VCF headers are appended to the metadata already in a variant set.
message ImportVariantsRequest
variants.proto:611
The variant data import request.
- string variant_set_id = 1
  Required. The variant set to which variant data should be imported.
- repeated string source_uris = 2
  A list of URIs referencing variant files in Google Cloud Storage. URIs can include wildcards [as described here](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). Note that recursive wildcards ('**') are not supported.
- ImportVariantsRequest.Format format = 3
  The format of the variant data being imported. If unspecified, defaults to to `VCF`.
- bool normalize_reference_names = 5
  Convert reference names to the canonical representation. hg19 haploytypes (those reference names containing "_hap") are not modified in any way. All other reference names are modified according to the following rules: The reference name is capitalized. The "chr" prefix is dropped for all autosomes and sex chromsomes. For example "chr17" becomes "17" and "chrX" becomes "X". All mitochondrial chromosomes ("chrM", "chrMT", etc) become "MT".
- map<string, InfoMergeOperation> info_merge_config = 6
  A mapping between info field keys and the InfoMergeOperations to be performed on them. This is plumbed down to the MergeVariantRequests generated by the resulting import job.
rpc CreateVariantSet (CreateVariantSetRequest, VariantSet)
variants.proto:79
Creates a new variant set. For the definitions of variant sets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) The provided variant set must have a valid `datasetId` set - all other fields are optional. Note that the `id` field will be ignored, as this is assigned by the server.
message CreateVariantSetRequest
variants.proto:660
The CreateVariantSet request
- optional VariantSet variant_set = 1
  Required. The variant set to be created. Must have a valid `datasetId`.
rpc ExportVariantSet (ExportVariantSetRequest, longrunning.Operation)
variants.proto:91
Exports variant set data to an external destination. For the definitions of variant sets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
message ExportVariantSetRequest
variants.proto:666
The variant data export request.
- string variant_set_id = 1
  Required. The ID of the variant set that contains variant data which should be exported. The caller must have READ access to this variant set.
- repeated string call_set_ids = 2
  If provided, only variant call information from the specified call sets will be exported. By default all variant calls are exported.
- string project_id = 3
  Required. The Google Cloud project ID that owns the destination BigQuery dataset. The caller must have WRITE access to this project. This project will also own the resulting export job.
- ExportVariantSetRequest.Format format = 4
  The format for the exported data.
- string bigquery_dataset = 5
  Required. The BigQuery dataset to export data to. This dataset must already exist. Note that this is distinct from the Genomics concept of "dataset".
- string bigquery_table = 6
  Required. The BigQuery table to export data to. If the table doesn't exist, it will be created. If it already exists, it will be overwritten.
rpc GetVariantSet (GetVariantSetRequest, VariantSet)
variants.proto:104
Gets a variant set by ID. For the definitions of variant sets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
message GetVariantSetRequest
variants.proto:701
The variant set request.
- string variant_set_id = 1
  Required. The ID of the variant set.
rpc SearchVariantSets (SearchVariantSetsRequest, SearchVariantSetsResponse)
variants.proto:118
Returns a list of all variant sets matching search criteria. For the definitions of variant sets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) Implements [GlobalAllianceApi.searchVariantSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/variantmethods.avdl#L49).
message SearchVariantSetsRequest
variants.proto:707
The search variant sets request.
- repeated string dataset_ids = 1
  Exactly one dataset ID must be provided here. Only variant sets which belong to this dataset will be returned.
- string page_token = 2
  The continuation token, which is used to page through large result sets. To get the next page of results, set this parameter to the value of `nextPageToken` from the previous response.
- int32 page_size = 3
  The maximum number of results to return in a single page. If unspecified, defaults to 1024.
message SearchVariantSetsResponse
variants.proto:723
The search variant sets response.
- repeated VariantSet variant_sets = 1
  The variant sets belonging to the requested dataset.
- string next_page_token = 2
  The continuation token, which is used to page through large result sets. Provide this value in a subsequent request to return the next page of results. This field will be empty if there aren't any additional results.
rpc DeleteVariantSet (DeleteVariantSetRequest, protobuf.Empty)
variants.proto:132
Deletes a variant set including all variants, call sets, and calls within. This is not reversible. For the definitions of variant sets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
message DeleteVariantSetRequest
variants.proto:734
The delete variant set request.
- string variant_set_id = 1
  The ID of the variant set to be deleted.
rpc UpdateVariantSet (UpdateVariantSetRequest, VariantSet)
variants.proto:144
Updates a variant set using patch semantics. For the definitions of variant sets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
message UpdateVariantSetRequest
variants.proto:739
- string variant_set_id = 1
  The ID of the variant to be updated (must already exist).
- optional VariantSet variant_set = 2
  The new variant data. Only the variant_set.metadata will be considered for update.
- optional protobuf.FieldMask update_mask = 5
  An optional mask specifying which fields to update. Supported fields: * [metadata][google.genomics.v1.VariantSet.metadata]. * [name][google.genomics.v1.VariantSet.name]. * [description][google.genomics.v1.VariantSet.description]. Leaving `updateMask` unset is equivalent to specifying all mutable fields.
rpc SearchVariants (SearchVariantsRequest, SearchVariantsResponse)
variants.proto:159
Gets a list of variants matching the criteria. For the definitions of variants and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) Implements [GlobalAllianceApi.searchVariants](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/variantmethods.avdl#L126).
message SearchVariantsRequest
variants.proto:759
The variant search request.
- repeated string variant_set_ids = 1
  At most one variant set ID must be provided. Only variants from this variant set will be returned. If omitted, a call set id must be included in the request.
- string variant_name = 2
  Only return variants which have exactly this name.
- repeated string call_set_ids = 3
  Only return variant calls which belong to call sets with these ids. Leaving this blank returns all variant calls. If a variant has no calls belonging to any of these call sets, it won't be returned at all.
- string reference_name = 4
  Required. Only return variants in this reference sequence.
- int64 start = 5
  The beginning of the window (0-based, inclusive) for which overlapping variants should be returned. If unspecified, defaults to 0.
- int64 end = 6
  The end of the window, 0-based exclusive. If unspecified or 0, defaults to the length of the reference.
- string page_token = 7
  The continuation token, which is used to page through large result sets. To get the next page of results, set this parameter to the value of `nextPageToken` from the previous response.
- int32 page_size = 8
  The maximum number of variants to return in a single page. If unspecified, defaults to 5000. The maximum value is 10000.
- int32 max_calls = 9
  The maximum number of calls to return in a single page. Note that this limit may be exceeded in the event that a matching variant contains more calls than the requested maximum. If unspecified, defaults to 5000. The maximum value is 10000.
message SearchVariantsResponse
variants.proto:801
The variant search response.
- repeated Variant variants = 1
  The list of matching Variants.
- string next_page_token = 2
  The continuation token, which is used to page through large result sets. Provide this value in a subsequent request to return the next page of results. This field will be empty if there aren't any additional results.
rpc CreateVariant (CreateVariantRequest, Variant)
variants.proto:171
Creates a new variant. For the definitions of variants and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
message CreateVariantRequest
variants.proto:811
- optional Variant variant = 1
  The variant to be created.
rpc UpdateVariant (UpdateVariantRequest, Variant)
variants.proto:186
Updates a variant. For the definitions of variants and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) This method supports patch semantics. Returns the modified variant without its calls.
message UpdateVariantRequest
variants.proto:816
- string variant_id = 1
  The ID of the variant to be updated.
- optional Variant variant = 2
  The new variant data.
- optional protobuf.FieldMask update_mask = 3
  An optional mask specifying which fields to update. At this time, mutable fields are [names][google.genomics.v1.Variant.names] and [info][google.genomics.v1.Variant.info]. Acceptable values are "names" and "info". If unspecified, all mutable fields will be updated.
rpc DeleteVariant (DeleteVariantRequest, protobuf.Empty)
variants.proto:198
Deletes a variant. For the definitions of variants and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
message DeleteVariantRequest
variants.proto:830
- string variant_id = 1
  The ID of the variant to be deleted.
rpc GetVariant (GetVariantRequest, Variant)
variants.proto:209
Gets a variant by ID. For the definitions of variants and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
message GetVariantRequest
variants.proto:835
- string variant_id = 1
  The ID of the variant.
rpc MergeVariants (MergeVariantsRequest, protobuf.Empty)
variants.proto:306
Merges the given variants with existing variants. For the definitions of variants and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) Each variant will be merged with an existing variant that matches its reference sequence, start, end, reference bases, and alternative bases. If no such variant exists, a new one will be created. When variants are merged, the call information from the new variant is added to the existing variant. Variant info fields are merged as specified in the [infoMergeConfig][google.genomics.v1.MergeVariantsRequest.info_merge_config] field of the MergeVariantsRequest. Please exercise caution when using this method! It is easy to introduce mistakes in existing variants and difficult to back out of them. For example, suppose you were trying to merge a new variant with an existing one and both variants contain calls that belong to callsets with the same callset ID. // Existing variant - irrelevant fields trimmed for clarity { "variantSetId": "10473108253681171589", "referenceName": "1", "start": "10582", "referenceBases": "G", "alternateBases": [ "A" ], "calls": [ { "callSetId": "10473108253681171589-0", "callSetName": "CALLSET0", "genotype": [ 0, 1 ], } ] } // New variant with conflicting call information { "variantSetId": "10473108253681171589", "referenceName": "1", "start": "10582", "referenceBases": "G", "alternateBases": [ "A" ], "calls": [ { "callSetId": "10473108253681171589-0", "callSetName": "CALLSET0", "genotype": [ 1, 1 ], } ] } The resulting merged variant would overwrite the existing calls with those from the new variant: { "variantSetId": "10473108253681171589", "referenceName": "1", "start": "10582", "referenceBases": "G", "alternateBases": [ "A" ], "calls": [ { "callSetId": "10473108253681171589-0", "callSetName": "CALLSET0", "genotype": [ 1, 1 ], } ] } This may be the desired outcome, but it is up to the user to determine if if that is indeed the case.
message MergeVariantsRequest
variants.proto:840
- string variant_set_id = 1
  The destination variant set.
- repeated Variant variants = 2
  The variants to be merged with existing variants.
- map<string, InfoMergeOperation> info_merge_config = 3
  A mapping between info field keys and the InfoMergeOperations to be performed on them.
rpc SearchCallSets (SearchCallSetsRequest, SearchCallSetsResponse)
variants.proto:321
Gets a list of call sets matching the criteria. For the definitions of call sets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) Implements [GlobalAllianceApi.searchCallSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/variantmethods.avdl#L178).
message SearchCallSetsRequest
variants.proto:853
The call set search request.
- repeated string variant_set_ids = 1
  Restrict the query to call sets within the given variant sets. At least one ID must be provided.
- string name = 2
  Only return call sets for which a substring of the name matches this string.
- string page_token = 3
  The continuation token, which is used to page through large result sets. To get the next page of results, set this parameter to the value of `nextPageToken` from the previous response.
- int32 page_size = 4
  The maximum number of results to return in a single page. If unspecified, defaults to 1024.
message SearchCallSetsResponse
variants.proto:873
The call set search response.
- repeated CallSet call_sets = 1
  The list of matching call sets.
- string next_page_token = 2
  The continuation token, which is used to page through large result sets. Provide this value in a subsequent request to return the next page of results. This field will be empty if there aren't any additional results.
rpc CreateCallSet (CreateCallSetRequest, CallSet)
variants.proto:333
Creates a new call set. For the definitions of call sets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
message CreateCallSetRequest
variants.proto:883
- optional CallSet call_set = 1
  The call set to be created.
rpc UpdateCallSet (UpdateCallSetRequest, CallSet)
variants.proto:347
Updates a call set. For the definitions of call sets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) This method supports patch semantics.
message UpdateCallSetRequest
variants.proto:888
- string call_set_id = 1
  The ID of the call set to be updated.
- optional CallSet call_set = 2
  The new call set data.
- optional protobuf.FieldMask update_mask = 3
  An optional mask specifying which fields to update. At this time, the only mutable field is [name][google.genomics.v1.CallSet.name]. The only acceptable value is "name". If unspecified, all mutable fields will be updated.
rpc DeleteCallSet (DeleteCallSetRequest, protobuf.Empty)
variants.proto:359
Deletes a call set. For the definitions of call sets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
message DeleteCallSetRequest
variants.proto:902
- string call_set_id = 1
  The ID of the call set to be deleted.
rpc GetCallSet (GetCallSetRequest, CallSet)
variants.proto:370
Gets a call set by ID. For the definitions of call sets and other genomics resources, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
message GetCallSetRequest
variants.proto:907
- string call_set_id = 1
  The ID of the call set.

An annotation describes a region of reference genome. The value of an annotation may be one of several canonical types, supplemented by arbitrary info tags. An annotation is not inherently associated with a specific sample or individual (though a client could choose to use annotations in this way). Example canonical annotation types are `GENE` and `VARIANT`.

Used as response type in: AnnotationServiceV1.CreateAnnotation, AnnotationServiceV1.GetAnnotation, AnnotationServiceV1.UpdateAnnotation

Used as field type in: BatchCreateAnnotationsRequest, BatchCreateAnnotationsResponse.Entry, CreateAnnotationRequest, SearchAnnotationsResponse, UpdateAnnotationRequest

string id = 1
The server-generated annotation ID, unique across all annotations.
string annotation_set_id = 2
The annotation set to which this annotation belongs.
string name = 3
The display name of this annotation.
string reference_id = 4
The ID of the Google Genomics reference associated with this range.
string reference_name = 5
The display name corresponding to the reference specified by `referenceId`, for example `chr1`, `1`, or `chrX`.
int64 start = 6
The start position of the range on the reference, 0-based inclusive.
int64 end = 7
The end position of the range on the reference, 0-based exclusive.
bool reverse_strand = 8
Whether this range refers to the reverse strand, as opposed to the forward strand. Note that regardless of this field, the start/end position of the range always refer to the forward strand.
AnnotationType type = 9
The data type for this annotation. Must match the containing annotation set's type.
oneof value
- VariantAnnotation variant = 10
  A variant annotation, which describes the effect of a variant on the genome, the coding sequence, and/or higher level consequences at the organism level e.g. pathogenicity. This field is only set for annotations of type `VARIANT`.
- Transcript transcript = 11
  A transcript value represents the assertion that a particular region of the reference genome may be transcribed as RNA. An alternative splicing pattern would be represented as a separate transcript object. This field is only set for annotations of type `TRANSCRIPT`.
map<string, protobuf.ListValue> info = 12
A map of additional read alignment information. This must be of the form map<string, string[]> (string key mapping to a list of string values).

An annotation set is a logical grouping of annotations that share consistent type information and provenance. Examples of annotation sets include 'all genes from refseq', and 'all variant annotations from ClinVar'.

Used as response type in: AnnotationServiceV1.CreateAnnotationSet, AnnotationServiceV1.GetAnnotationSet, AnnotationServiceV1.UpdateAnnotationSet

Used as field type in: CreateAnnotationSetRequest, SearchAnnotationSetsResponse, UpdateAnnotationSetRequest

string id = 1
The server-generated annotation set ID, unique across all annotation sets.
string dataset_id = 2
The dataset to which this annotation set belongs.
string reference_set_id = 3
The ID of the reference set that defines the coordinate space for this set's annotations.
string name = 4
The display name for this annotation set.
string source_uri = 5
The source URI describing the file from which this annotation set was generated, if any.
AnnotationType type = 6
The type of annotations contained within this set.
map<string, protobuf.ListValue> info = 17
A map of additional read alignment information. This must be of the form map<string, string[]> (string key mapping to a list of string values).

When an [Annotation][google.genomics.v1.Annotation] or [AnnotationSet][google.genomics.v1.AnnotationSet] is created, if `type` is not specified it will be set to `GENERIC`.

Used in: Annotation, AnnotationSet, SearchAnnotationSetsRequest

ANNOTATION_TYPE_UNSPECIFIED = 0
GENERIC = 1
A `GENERIC` annotation type should be used when no other annotation type will suffice. This represents an untyped annotation of the reference genome.
VARIANT = 2
A `VARIANT` annotation type.
GENE = 3
A `GENE` annotation type represents the existence of a gene at the associated reference coordinates. The start coordinate is typically the gene's transcription start site and the end is typically the end of the gene's last exon.
TRANSCRIPT = 4
A `TRANSCRIPT` annotation type represents the assertion that a particular region of the reference genome may be transcribed as RNA.

Used in: BatchCreateAnnotationsResponse

optional rpc.Status status = 1
The creation status.
optional Annotation annotation = 2
The created annotation, if creation was successful.

A call set is a collection of variant calls, typically for one sample. It belongs to a variant set. For more genomics resource definitions, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)

Used as response type in: VariantServiceV1.CreateCallSet, VariantServiceV1.GetCallSet, VariantServiceV1.UpdateCallSet

Used as field type in: CreateCallSetRequest, SearchCallSetsResponse, UpdateCallSetRequest

string id = 1
The server-generated call set ID, unique across all call sets.
string name = 2
The call set name.
string sample_id = 7
The sample ID this call set corresponds to.
repeated string variant_set_ids = 6
The IDs of the variant sets this call set belongs to. This field must have exactly length one, as a call set belongs to a single variant set. This field is repeated for compatibility with the [GA4GH 0.5.1 API](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/variants.avdl#L76).
int64 created = 5
The date this call set was created in milliseconds from the epoch.
map<string, protobuf.ListValue> info = 4
A map of additional call set information. This must be of the form map<string, string[]> (string key mapping to a list of string values).

A single CIGAR operation.

Used in: LinearAlignment

CigarUnit.Operation operation = 1
int64 operation_length = 2
The number of genomic bases that the operation runs for. Required.
string reference_sequence = 3
`referenceSequence` is only used at mismatches (`SEQUENCE_MISMATCH`) and deletions (`DELETE`). Filling this field replaces SAM's MD tag. If the relevant information is not available, this field is unset.

Describes the different types of CIGAR alignment operations that exist. Used wherever CIGAR alignments are used.

Used in: CigarUnit

OPERATION_UNSPECIFIED = 0
ALIGNMENT_MATCH = 1
An alignment match indicates that a sequence can be aligned to the reference without evidence of an INDEL. Unlike the `SEQUENCE_MATCH` and `SEQUENCE_MISMATCH` operators, the `ALIGNMENT_MATCH` operator does not indicate whether the reference and read sequences are an exact match. This operator is equivalent to SAM's `M`.
INSERT = 2
The insert operator indicates that the read contains evidence of bases being inserted into the reference. This operator is equivalent to SAM's `I`.
DELETE = 3
The delete operator indicates that the read contains evidence of bases being deleted from the reference. This operator is equivalent to SAM's `D`.
SKIP = 4
The skip operator indicates that this read skips a long segment of the reference, but the bases have not been deleted. This operator is commonly used when working with RNA-seq data, where reads may skip long segments of the reference between exons. This operator is equivalent to SAM's `N`.
CLIP_SOFT = 5
The soft clip operator indicates that bases at the start/end of a read have not been considered during alignment. This may occur if the majority of a read maps, except for low quality bases at the start/end of a read. This operator is equivalent to SAM's `S`. Bases that are soft clipped will still be stored in the read.
CLIP_HARD = 6
The hard clip operator indicates that bases at the start/end of a read have been omitted from this alignment. This may occur if this linear alignment is part of a chimeric alignment, or if the read has been trimmed (for example, during error correction or to trim poly-A tails for RNA-seq). This operator is equivalent to SAM's `H`.
PAD = 7
The pad operator indicates that there is padding in an alignment. This operator is equivalent to SAM's `P`.
SEQUENCE_MATCH = 8
This operator indicates that this portion of the aligned sequence exactly matches the reference. This operator is equivalent to SAM's `=`.
SEQUENCE_MISMATCH = 9
This operator indicates that this portion of the aligned sequence is an alignment match to the reference, but a sequence mismatch. This can indicate a SNP or a read error. This operator is equivalent to SAM's `X`.

A bucket over which read coverage has been precomputed. A bucket corresponds to a specific range of the reference sequence.

Used in: ListCoverageBucketsResponse

optional Range range = 1
The genomic coordinate range spanned by this bucket.
float mean_coverage = 2
The average number of reads which are aligned to each individual reference base in this bucket.

A Dataset is a collection of genomic data. For more genomics resource definitions, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)

Used as response type in: DatasetServiceV1.CreateDataset, DatasetServiceV1.GetDataset, DatasetServiceV1.UndeleteDataset, DatasetServiceV1.UpdateDataset

Used as field type in: CreateDatasetRequest, ListDatasetsResponse, UpdateDatasetRequest

string id = 1
The server-generated dataset ID, unique across all datasets.
string project_id = 2
The Google Cloud project ID that this dataset belongs to.
string name = 3
The dataset name.
optional protobuf.Timestamp create_time = 4
The time this dataset was created, in seconds from the epoch.

Used in: ExportVariantSetRequest

FORMAT_UNSPECIFIED = 0
FORMAT_BIGQUERY = 1
Export the data to Google BigQuery.

Used in: VariantAnnotation.ClinicalCondition

string source_name = 1
The name of the source of this data.
string id = 2
The id used by the source of this data.

Used in: ImportReadGroupSetsRequest

PARTITION_STRATEGY_UNSPECIFIED = 0
PER_FILE_PER_SAMPLE = 1
In most cases, this strategy yields one read group set per file. This is the default behavior. Allocate one read group set per file per sample. For BAM files, read groups are considered to share a sample if they have identical sample names. Furthermore, all reads for each file which do not belong to a read group, if any, will be grouped into a single read group set per-file.
MERGE_ALL = 2
Includes all read groups in all imported files into a single read group set. Requires that the headers for all imported files are equivalent. All reads which do not belong to a read group, if any, will be grouped into a separate read group set.

The read group set import response.

repeated string read_group_set_ids = 1
IDs of the read group sets that were created.

Used in: ImportVariantsRequest

FORMAT_UNSPECIFIED = 0
FORMAT_VCF = 1
VCF (Variant Call Format). The VCF files may be gzip compressed. gVCF is also supported.
FORMAT_COMPLETE_GENOMICS = 2
Complete Genomics masterVarBeta format. The masterVarBeta files may be bzip2 compressed.

The variant data import response.

repeated string call_set_ids = 1
IDs of the call sets created during the import.

Operations to be performed during import on Variant info fields. These operations are set for each info field in the info_merge_config map of ImportVariantsRequest, which is plumbed down to the MergeVariantRequests generated by the import job.

Used in: ImportVariantsRequest, MergeVariantsRequest

INFO_MERGE_OPERATION_UNSPECIFIED = 0
IGNORE_NEW = 1
By default, Variant info fields are persisted if the Variant doesn't already exist in the variantset. If the Variant is equivalent to a Variant already in the variantset, the incoming Variant's info field is ignored in favor of that of the already persisted Variant.
MOVE_TO_CALLS = 2
This operation removes an info field from the incoming Variant and persists this info field in each of the incoming Variant's Calls.

A linear alignment can be represented by one CIGAR string. Describes the mapped position and local alignment of the read to the reference.

Used in: Read

optional Position position = 1
The position of this alignment.
int32 mapping_quality = 2
The mapping quality of this alignment. Represents how likely the read maps to this position as opposed to other locations. Specifically, this is -10 log10 Pr(mapping position is wrong), rounded to the nearest integer.
repeated CigarUnit cigar = 3
Represents the local alignment of this sequence (alignment matches, indels, etc) against the reference.

An event that occurred during an [Operation][google.longrunning.Operation].

Used in: OperationMetadata

optional protobuf.Timestamp start_time = 1
Optional time of when event started.
optional protobuf.Timestamp end_time = 2
Optional time of when event finished. An event can have a start time and no finish time. If an event has a finish time, there must be a start time.
string description = 3
Required description of event.

Metadata describing an [Operation][google.longrunning.Operation].

string project_id = 1
The Google Cloud Project in which the job is scoped.
optional protobuf.Timestamp create_time = 2
The time at which the job was submitted to the Genomics service.
optional protobuf.Timestamp start_time = 3
The time at which the job began to run.
optional protobuf.Timestamp end_time = 4
The time at which the job stopped running.
optional protobuf.Any request = 5
The original request that started the operation. Note that this will be in current version of the API. If the operation was started with v1beta2 API and a GetOperation is performed on v1 API, a v1 request will be returned.
repeated OperationEvent events = 6
Optional event messages that were generated during the job's execution. This also contains any warnings that were generated during import or export.
string client_id = 7
This field is deprecated. Use `labels` instead. Optionally provided by the caller when submitting the request that creates the operation.
optional protobuf.Any runtime_metadata = 8
Runtime metadata on this Operation.
map<string, string> labels = 9
Optionally provided by the caller when submitting the request that creates the operation.

An abstraction for referring to a genomic position, in relation to some already known reference. For now, represents a genomic position as a reference name, a base number on that reference (0-based), and a determination of forward or reverse strand.

Used in: LinearAlignment, Read

string reference_name = 1
The name of the reference in whatever reference set is being used.
int64 position = 2
The 0-based offset from the start of the forward strand for that reference.
bool reverse_strand = 3
Whether this position is on the reverse strand, as opposed to the forward strand.

A 0-based half-open genomic coordinate range for search requests.

Used in: CoverageBucket

string reference_name = 1
The reference sequence name, for example `chr1`, `1`, or `chrX`.
int64 start = 2
The start position of the range on the reference, 0-based inclusive.
int64 end = 3
The end position of the range on the reference, 0-based exclusive.

A read alignment describes a linear alignment of a string of DNA to a [reference sequence][google.genomics.v1.Reference], in addition to metadata about the fragment (the molecule of DNA sequenced) and the read (the bases which were read by the sequencer). A read is equivalent to a line in a SAM file. A read belongs to exactly one read group and exactly one [read group set][google.genomics.v1.ReadGroupSet]. For more genomics resource definitions, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) ### Reverse-stranded reads Mapped reads (reads having a non-null `alignment`) can be aligned to either the forward or the reverse strand of their associated reference. Strandedness of a mapped read is encoded by `alignment.position.reverseStrand`. If we consider the reference to be a forward-stranded coordinate space of `[0, reference.length)` with `0` as the left-most position and `reference.length` as the right-most position, reads are always aligned left to right. That is, `alignment.position.position` always refers to the left-most reference coordinate and `alignment.cigar` describes the alignment of this read to the reference from left to right. All per-base fields such as `alignedSequence` and `alignedQuality` share this same left-to-right orientation; this is true of reads which are aligned to either strand. For reverse-stranded reads, this means that `alignedSequence` is the reverse complement of the bases that were originally reported by the sequencing machine. ### Generating a reference-aligned sequence string When interacting with mapped reads, it's often useful to produce a string representing the local alignment of the read to reference. The following pseudocode demonstrates one way of doing this: out = "" offset = 0 for c in read.alignment.cigar { switch c.operation { case "ALIGNMENT_MATCH", "SEQUENCE_MATCH", "SEQUENCE_MISMATCH": out += read.alignedSequence[offset:offset+c.operationLength] offset += c.operationLength break case "CLIP_SOFT", "INSERT": offset += c.operationLength break case "PAD": out += repeat("*", c.operationLength) break case "DELETE": out += repeat("-", c.operationLength) break case "SKIP": out += repeat(" ", c.operationLength) break case "CLIP_HARD": break } } return out ### Converting to SAM's CIGAR string The following pseudocode generates a SAM CIGAR string from the `cigar` field. Note that this is a lossy conversion (`cigar.referenceSequence` is lost). cigarMap = { "ALIGNMENT_MATCH": "M", "INSERT": "I", "DELETE": "D", "SKIP": "N", "CLIP_SOFT": "S", "CLIP_HARD": "H", "PAD": "P", "SEQUENCE_MATCH": "=", "SEQUENCE_MISMATCH": "X", } cigarStr = "" for c in read.alignment.cigar { cigarStr += c.operationLength + cigarMap[c.operation] } return cigarStr

Used in: SearchReadsResponse, StreamReadsResponse

string id = 1
The server-generated read ID, unique across all reads. This is different from the `fragmentName`.
string read_group_id = 2
The ID of the read group this read belongs to. A read belongs to exactly one read group. This is a server-generated ID which is distinct from SAM's RG tag (for that value, see [ReadGroup.name][google.genomics.v1.ReadGroup.name]).
string read_group_set_id = 3
The ID of the read group set this read belongs to. A read belongs to exactly one read group set.
string fragment_name = 4
The fragment name. Equivalent to QNAME (query template name) in SAM.
bool proper_placement = 5
The orientation and the distance between reads from the fragment are consistent with the sequencing protocol (SAM flag 0x2).
bool duplicate_fragment = 6
The fragment is a PCR or optical duplicate (SAM flag 0x400).
int32 fragment_length = 7
The observed length of the fragment, equivalent to TLEN in SAM.
int32 read_number = 8
The read number in sequencing. 0-based and less than numberReads. This field replaces SAM flag 0x40 and 0x80.
int32 number_reads = 9
The number of reads in the fragment (extension to SAM flag 0x1).
bool failed_vendor_quality_checks = 10
Whether this read did not pass filters, such as platform or vendor quality controls (SAM flag 0x200).
optional LinearAlignment alignment = 11
The linear alignment for this alignment record. This field is null for unmapped reads.
bool secondary_alignment = 12
Whether this alignment is secondary. Equivalent to SAM flag 0x100. A secondary alignment represents an alternative to the primary alignment for this read. Aligners may return secondary alignments if a read can map ambiguously to multiple coordinates in the genome. By convention, each read has one and only one alignment where both `secondaryAlignment` and `supplementaryAlignment` are false.
bool supplementary_alignment = 13
Whether this alignment is supplementary. Equivalent to SAM flag 0x800. Supplementary alignments are used in the representation of a chimeric alignment. In a chimeric alignment, a read is split into multiple linear alignments that map to different reference contigs. The first linear alignment in the read will be designated as the representative alignment; the remaining linear alignments will be designated as supplementary alignments. These alignments may have different mapping quality scores. In each linear alignment in a chimeric alignment, the read will be hard clipped. The `alignedSequence` and `alignedQuality` fields in the alignment record will only represent the bases for its respective linear alignment.
string aligned_sequence = 14
The bases of the read sequence contained in this alignment record, **without CIGAR operations applied** (equivalent to SEQ in SAM). `alignedSequence` and `alignedQuality` may be shorter than the full read sequence and quality. This will occur if the alignment is part of a chimeric alignment, or if the read was trimmed. When this occurs, the CIGAR for this read will begin/end with a hard clip operator that will indicate the length of the excised sequence.
repeated int32 aligned_quality = 15
The quality of the read sequence contained in this alignment record (equivalent to QUAL in SAM). `alignedSequence` and `alignedQuality` may be shorter than the full read sequence and quality. This will occur if the alignment is part of a chimeric alignment, or if the read was trimmed. When this occurs, the CIGAR for this read will begin/end with a hard clip operator that will indicate the length of the excised sequence.
optional Position next_mate_position = 16
The mapping of the primary alignment of the `(readNumber+1)%numberReads` read in the fragment. It replaces mate position and mate strand in SAM.
map<string, protobuf.ListValue> info = 17
A map of additional read alignment information. This must be of the form map<string, string[]> (string key mapping to a list of string values).

A read group is all the data that's processed the same way by the sequencer.

Used in: ReadGroupSet

string id = 1
The server-generated read group ID, unique for all read groups. Note: This is different than the @RG ID field in the SAM spec. For that value, see [name][google.genomics.v1.ReadGroup.name].
string dataset_id = 2
The dataset to which this read group belongs.
string name = 3
The read group name. This corresponds to the @RG ID field in the SAM spec.
string description = 4
A free-form text description of this read group.
string sample_id = 5
A client-supplied sample identifier for the reads in this read group.
optional ReadGroup.Experiment experiment = 6
The experiment used to generate this read group.
int32 predicted_insert_size = 7
The predicted insert size of this read group. The insert size is the length the sequenced DNA fragment from end-to-end, not including the adapters.
repeated ReadGroup.Program programs = 10
The programs used to generate this read group. Programs are always identical for all read groups within a read group set. For this reason, only the first read group in a returned set will have this field populated.
string reference_set_id = 11
The reference set the reads in this read group are aligned to.
map<string, protobuf.ListValue> info = 12
A map of additional read group information. This must be of the form map<string, string[]> (string key mapping to a list of string values).

Used in: ReadGroup

string library_id = 1
A client-supplied library identifier; a library is a collection of DNA fragments which have been prepared for sequencing from a sample. This field is important for quality control as error or bias can be introduced during sample preparation.
string platform_unit = 2
The platform unit used as part of this experiment, for example flowcell-barcode.lane for Illumina or slide for SOLiD. Corresponds to the @RG PU field in the SAM spec.
string sequencing_center = 3
The sequencing center used as part of this experiment.
string instrument_model = 4
The instrument model used as part of this experiment. This maps to sequencing technology in the SAM spec.

Used in: ReadGroup

string command_line = 1
The command line used to run this program.
string id = 2
The user specified locally unique ID of the program. Used along with `prevProgramId` to define an ordering between programs.
string name = 3
The display name of the program. This is typically the colloquial name of the tool used, for example 'bwa' or 'picard'.
string prev_program_id = 4
The ID of the program run before this one.
string version = 5
The version of the program run.

A read group set is a logical collection of read groups, which are collections of reads produced by a sequencer. A read group set typically models reads corresponding to one sample, sequenced one way, and aligned one way. * A read group set belongs to one dataset. * A read group belongs to one read group set. * A read belongs to one read group. For more genomics resource definitions, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)

Used as response type in: ReadServiceV1.GetReadGroupSet, ReadServiceV1.UpdateReadGroupSet

Used as field type in: SearchReadGroupSetsResponse, UpdateReadGroupSetRequest

string id = 1
The server-generated read group set ID, unique for all read group sets.
string dataset_id = 2
The dataset to which this read group set belongs.
string reference_set_id = 3
The reference set to which the reads in this read group set are aligned.
string name = 4
The read group set name. By default this will be initialized to the sample name of the sequenced data contained in this set.
string filename = 5
The filename of the original source file for this read group set, if any.
repeated ReadGroup read_groups = 6
The read groups in this set. There are typically 1-10 read groups in a read group set.
map<string, protobuf.ListValue> info = 7
A map of additional read group set information.

A reference is a canonical assembled DNA sequence, intended to act as a reference coordinate space for other genomic annotations. A single reference might represent the human chromosome 1 or mitochandrial DNA, for instance. A reference belongs to one or more reference sets. For more genomics resource definitions, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)

Used as response type in: ReferenceServiceV1.GetReference

Used as field type in: SearchReferencesResponse

string id = 1
The server-generated reference ID, unique across all references.
int64 length = 2
The length of this reference's sequence.
string md5checksum = 3
MD5 of the upper-case sequence excluding all whitespace characters (this is equivalent to SQ:M5 in SAM). This value is represented in lower case hexadecimal format.
string name = 4
The name of this reference, for example `22`.
string source_uri = 5
The URI from which the sequence was obtained. Typically specifies a FASTA format file.
repeated string source_accessions = 6
All known corresponding accession IDs in INSDC (GenBank/ENA/DDBJ) ideally with a version number, for example `GCF_000001405.26`.
int32 ncbi_taxon_id = 7
ID from http://www.ncbi.nlm.nih.gov/taxonomy. For example, 9606 for human.

ReferenceBound records an upper bound for the starting coordinate of variants in a particular reference.

Used in: VariantSet

string reference_name = 1
The name of the reference associated with this reference bound.
int64 upper_bound = 2
An upper bound (inclusive) on the starting coordinate of any variant in the reference sequence.

A reference set is a set of references which typically comprise a reference assembly for a species, such as `GRCh38` which is representative of the human genome. A reference set defines a common coordinate space for comparing reference-aligned experimental data. A reference set contains 1 or more references. For more genomics resource definitions, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)

Used as response type in: ReferenceServiceV1.GetReferenceSet

Used as field type in: SearchReferenceSetsResponse

string id = 1
The server-generated reference set ID, unique across all reference sets.
repeated string reference_ids = 2
The IDs of the reference objects that are part of this set. `Reference.md5checksum` must be unique within this set.
string md5checksum = 3
Order-independent MD5 checksum which identifies this reference set. The checksum is computed by sorting all lower case hexidecimal string `reference.md5checksum` (for all reference in this set) in ascending lexicographic order, concatenating, and taking the MD5 of that value. The resulting value is represented in lower case hexadecimal format.
int32 ncbi_taxon_id = 4
ID from http://www.ncbi.nlm.nih.gov/taxonomy (for example, 9606 for human) indicating the species which this reference set is intended to model. Note that contained references may specify a different `ncbiTaxonId`, as assemblies may contain reference sequences which do not belong to the modeled species, for example EBV in a human reference genome.
string description = 5
Free text description of this reference set.
string assembly_id = 6
Public id of this reference set, such as `GRCh37`.
string source_uri = 7
The URI from which the references were obtained.
repeated string source_accessions = 8
All known corresponding accession IDs in INSDC (GenBank/ENA/DDBJ) ideally with a version number, for example `NC_000001.11`.

A transcript represents the assertion that a particular region of the reference genome may be transcribed as RNA.

Used in: Annotation

string gene_id = 1
The annotation ID of the gene from which this transcript is transcribed.
repeated Transcript.Exon exons = 2
The <a href="http://en.wikipedia.org/wiki/Exon">exons</a> that compose this transcript. This field should be unset for genomes where transcript splicing does not occur, for example prokaryotes. Introns are regions of the transcript that are not included in the spliced RNA product. Though not explicitly modeled here, intron ranges can be deduced; all regions of this transcript that are not exons are introns. Exonic sequences do not necessarily code for a translational product (amino acids). Only the regions of exons bounded by the [codingSequence][google.genomics.v1.Transcript.coding_sequence] correspond to coding DNA sequence. Exons are ordered by start position and may not overlap.
optional Transcript.CodingSequence coding_sequence = 3
The range of the coding sequence for this transcript, if any. To determine the exact ranges of coding sequence, intersect this range with those of the [exons][google.genomics.v1.Transcript.exons], if any. If there are any [exons][google.genomics.v1.Transcript.exons], the [codingSequence][google.genomics.v1.Transcript.coding_sequence] must start and end within them. Note that in some cases, the reference genome will not exactly match the observed mRNA transcript e.g. due to variance in the source genome from reference. In these cases, [exon.frame][google.genomics.v1.Transcript.Exon.frame] will not necessarily match the expected reference reading frame and coding exon reference bases cannot necessarily be concatenated to produce the original transcript mRNA.

Used in: Transcript

int64 start = 1
The start of the coding sequence on this annotation's reference sequence, 0-based inclusive. Note that this position is relative to the reference start, and *not* the containing annotation start.
int64 end = 2
The end of the coding sequence on this annotation's reference sequence, 0-based exclusive. Note that this position is relative to the reference start, and *not* the containing annotation start.

Used in: Transcript

int64 start = 1
The start position of the exon on this annotation's reference sequence, 0-based inclusive. Note that this is relative to the reference start, and **not** the containing annotation start.
int64 end = 2
The end position of the exon on this annotation's reference sequence, 0-based exclusive. Note that this is relative to the reference start, and *not* the containing annotation start.
optional protobuf.Int32Value frame = 3
The frame of this exon. Contains a value of 0, 1, or 2, which indicates the offset of the first coding base of the exon within the reading frame of the coding DNA sequence, if any. This field is dependent on the strandedness of this annotation (see [Annotation.reverse_strand][google.genomics.v1.Annotation.reverse_strand]). For forward stranded annotations, this offset is relative to the [exon.start][google.genomics.v1.Transcript.Exon.start]. For reverse strand annotations, this offset is relative to the [exon.end][google.genomics.v1.Transcript.Exon.end] `- 1`. Unset if this exon does not intersect the coding sequence. Upon creation of a transcript, the frame must be populated for all or none of the coding exons.

A variant represents a change in DNA sequence relative to a reference sequence. For example, a variant could represent a SNP or an insertion. Variants belong to a variant set. For more genomics resource definitions, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) Each of the calls on a variant represent a determination of genotype with respect to that variant. For example, a call might assign probability of 0.32 to the occurrence of a SNP named rs1234 in a sample named NA12345. A call belongs to a call set, which contains related calls typically from one sample.

Used as response type in: VariantServiceV1.CreateVariant, VariantServiceV1.GetVariant, VariantServiceV1.UpdateVariant

Used as field type in: CreateVariantRequest, MergeVariantsRequest, SearchVariantsResponse, StreamVariantsResponse, UpdateVariantRequest

string variant_set_id = 15
The ID of the variant set this variant belongs to.
string id = 2
The server-generated variant ID, unique across all variants.
repeated string names = 3
Names for the variant, for example a RefSNP ID.
int64 created = 12
The date this variant was created, in milliseconds from the epoch.
string reference_name = 14
The reference on which this variant occurs. (such as `chr20` or `X`)
int64 start = 16
The position at which this variant occurs (0-based). This corresponds to the first base of the string of reference bases.
int64 end = 13
The end position (0-based) of this variant. This corresponds to the first base after the last base in the reference allele. So, the length of the reference allele is (end - start). This is useful for variants that don't explicitly give alternate bases, for example large deletions.
string reference_bases = 6
The reference bases for this variant. They start at the given position.
repeated string alternate_bases = 7
The bases that appear instead of the reference bases.
double quality = 8
A measure of how likely this variant is to be real. A higher value is better.
repeated string filter = 9
A list of filters (normally quality filters) this variant has failed. `PASS` indicates this variant has passed all filters.
map<string, protobuf.ListValue> info = 10
A map of additional variant information. This must be of the form map<string, string[]> (string key mapping to a list of string values).
repeated VariantCall calls = 11
The variant calls for this particular variant. Each one represents the determination of genotype with respect to this variant.

Used in: Annotation

VariantAnnotation.Type type = 1
Type has been adapted from ClinVar's list of variant types.
VariantAnnotation.Effect effect = 2
Effect of the variant on the coding sequence.
string alternate_bases = 3
The alternate allele for this variant. If multiple alternate alleles exist at this location, create a separate variant for each one, as they may represent distinct conditions.
string gene_id = 4
Google annotation ID of the gene affected by this variant. This should be provided when the variant is created.
repeated string transcript_ids = 5
Google annotation IDs of the transcripts affected by this variant. These should be provided when the variant is created.
repeated VariantAnnotation.ClinicalCondition conditions = 6
The set of conditions associated with this variant. A condition describes the way a variant influences human health.
VariantAnnotation.ClinicalSignificance clinical_significance = 7
Describes the clinical significance of a variant. It is adapted from the ClinVar controlled vocabulary for clinical significance described at: http://www.ncbi.nlm.nih.gov/clinvar/docs/clinsig/

Used in: VariantAnnotation

repeated string names = 1
A set of names for the condition.
repeated ExternalId external_ids = 2
The set of external IDs for this condition.
string concept_id = 3
The MedGen concept id associated with this gene. Search for these IDs at http://www.ncbi.nlm.nih.gov/medgen/
string omim_id = 4
The OMIM id for this condition. Search for these IDs at http://omim.org/

Used in: VariantAnnotation

CLINICAL_SIGNIFICANCE_UNSPECIFIED = 0
CLINICAL_SIGNIFICANCE_OTHER = 1
`OTHER` should be used when no other clinical significance value will suffice.
UNCERTAIN = 2
BENIGN = 3
LIKELY_BENIGN = 4
LIKELY_PATHOGENIC = 5
PATHOGENIC = 6
DRUG_RESPONSE = 7
HISTOCOMPATIBILITY = 8
CONFERS_SENSITIVITY = 9
RISK_FACTOR = 10
ASSOCIATION = 11
PROTECTIVE = 12
MULTIPLE_REPORTED = 13
`MULTIPLE_REPORTED` should be used when multiple clinical signficances are reported for a variant. The original clinical significance values may be provided in the `info` field.

Used in: VariantAnnotation

EFFECT_UNSPECIFIED = 0
EFFECT_OTHER = 1
`EFFECT_OTHER` should be used when no other Effect will suffice.
FRAMESHIFT = 2
`FRAMESHIFT` indicates a mutation in which the insertion or deletion of nucleotides resulted in a frameshift change.
FRAME_PRESERVING_INDEL = 3
`FRAME_PRESERVING_INDEL` indicates a mutation in which a multiple of three nucleotides has been inserted or deleted, resulting in no change to the reading frame of the coding sequence.
SYNONYMOUS_SNP = 4
`SYNONYMOUS_SNP` indicates a single nucleotide polymorphism mutation that results in no amino acid change.
NONSYNONYMOUS_SNP = 5
`NONSYNONYMOUS_SNP` indicates a single nucleotide polymorphism mutation that results in an amino acid change.
STOP_GAIN = 6
`STOP_GAIN` indicates a mutation that leads to the creation of a stop codon at the variant site. Frameshift mutations creating downstream stop codons do not count as `STOP_GAIN`.
STOP_LOSS = 7
`STOP_LOSS` indicates a mutation that eliminates a stop codon at the variant site.
SPLICE_SITE_DISRUPTION = 8
`SPLICE_SITE_DISRUPTION` indicates that this variant is found in a splice site for the associated transcript, and alters the normal splicing pattern.

Used in: VariantAnnotation

TYPE_UNSPECIFIED = 0
TYPE_OTHER = 1
`TYPE_OTHER` should be used when no other Type will suffice. Further explanation of the variant type may be included in the [info][google.genomics.v1.Annotation.info] field.
INSERTION = 2
`INSERTION` indicates an insertion.
DELETION = 3
`DELETION` indicates a deletion.
SUBSTITUTION = 4
`SUBSTITUTION` indicates a block substitution of two or more nucleotides.
SNP = 5
`SNP` indicates a single nucleotide polymorphism.
STRUCTURAL = 6
`STRUCTURAL` indicates a large structural variant, including chromosomal fusions, inversions, etc.
CNV = 7
`CNV` indicates a variation in copy number.

A call represents the determination of genotype with respect to a particular variant. It may include associated information such as quality and phasing. For example, a call might assign a probability of 0.32 to the occurrence of a SNP named rs1234 in a call set with the name NA12345.

Used in: Variant

string call_set_id = 8
The ID of the call set this variant call belongs to.
string call_set_name = 9
The name of the call set this variant call belongs to.
repeated int32 genotype = 7
The genotype of this variant call. Each value represents either the value of the `referenceBases` field or a 1-based index into `alternateBases`. If a variant had a `referenceBases` value of `T` and an `alternateBases` value of `["A", "C"]`, and the `genotype` was `[2, 1]`, that would mean the call represented the heterozygous value `CA` for this variant. If the `genotype` was instead `[0, 1]`, the represented value would be `TA`. Ordering of the genotype values is important if the `phaseset` is present. If a genotype is not called (that is, a `.` is present in the GT string) -1 is returned.
string phaseset = 5
If this field is present, this variant call's genotype ordering implies the phase of the bases and is consistent with any other variant calls in the same reference sequence which have the same phaseset value. When importing data from VCF, if the genotype data was phased but no phase set was specified this field will be set to `*`.
repeated double genotype_likelihood = 6
The genotype likelihoods for this variant call. Each array entry represents how likely a specific genotype is for this call. The value ordering is defined by the GL tag in the VCF spec. If Phred-scaled genotype likelihood scores (PL) are available and log10(P) genotype likelihood scores (GL) are not, PL scores are converted to GL scores. If both are available, PL scores are stored in `info`.
map<string, protobuf.ListValue> info = 2
A map of additional variant call information. This must be of the form map<string, string[]> (string key mapping to a list of string values).

A variant set is a collection of call sets and variants. It contains summary statistics of those contents. A variant set belongs to a dataset. For more genomics resource definitions, see [Fundamentals of Google Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)

Used as response type in: VariantServiceV1.CreateVariantSet, VariantServiceV1.GetVariantSet, VariantServiceV1.UpdateVariantSet

Used as field type in: CreateVariantSetRequest, SearchVariantSetsResponse, UpdateVariantSetRequest

string dataset_id = 1
The dataset to which this variant set belongs.
string id = 2
The server-generated variant set ID, unique across all variant sets.
string reference_set_id = 6
The reference set to which the variant set is mapped. The reference set describes the alignment provenance of the variant set, while the `referenceBounds` describe the shape of the actual variant data. The reference set's reference names are a superset of those found in the `referenceBounds`. For example, given a variant set that is mapped to the GRCh38 reference set and contains a single variant on reference 'X', `referenceBounds` would contain only an entry for 'X', while the associated reference set enumerates all possible references: '1', '2', 'X', 'Y', 'MT', etc.
repeated ReferenceBound reference_bounds = 5
A list of all references used by the variants in a variant set with associated coordinate upper bounds for each one.
repeated VariantSetMetadata metadata = 4
The metadata associated with this variant set.
string name = 7
User-specified, mutable name.
string description = 8
A textual description of this variant set.

Metadata describes a single piece of variant call metadata. These data include a top level key and either a single value string (value) or a list of key-value pairs (info.) Value and info are mutually exclusive.

Used in: VariantSet

string key = 1
The top-level key.
string value = 2
The value field for simple metadata
string id = 4
User-provided ID field, not enforced by this API. Two or more pieces of structured metadata with identical id and key fields are considered equivalent.
VariantSetMetadata.Type type = 5
The type of data. Possible types include: Integer, Float, Flag, Character, and String.
string number = 8
The number of values that can be included in a field described by this metadata.
string description = 7
A textual description of this metadata.
map<string, protobuf.ListValue> info = 3
Remaining structured metadata key-value pairs. This must be of the form map<string, string[]> (string key mapping to a list of string values).

Used in: VariantSetMetadata

TYPE_UNSPECIFIED = 0
INTEGER = 1
FLOAT = 2
FLAG = 3
CHARACTER = 4
STRING = 5

package google.genomics.v1

service AnnotationServiceV1

rpc CreateAnnotationSet (CreateAnnotationSetRequest, AnnotationSet)

message CreateAnnotationSetRequest

optional AnnotationSet annotation_set = 1

rpc GetAnnotationSet (GetAnnotationSetRequest, AnnotationSet)

message GetAnnotationSetRequest

string annotation_set_id = 1

rpc UpdateAnnotationSet (UpdateAnnotationSetRequest, AnnotationSet)

message UpdateAnnotationSetRequest

string annotation_set_id = 1

optional AnnotationSet annotation_set = 2

optional protobuf.FieldMask update_mask = 3

rpc DeleteAnnotationSet (DeleteAnnotationSetRequest, protobuf.Empty)

message DeleteAnnotationSetRequest

string annotation_set_id = 1

rpc SearchAnnotationSets (SearchAnnotationSetsRequest, SearchAnnotationSetsResponse)

message SearchAnnotationSetsRequest

repeated string dataset_ids = 1

string reference_set_id = 2

string name = 3

repeated AnnotationType types = 4

string page_token = 5

int32 page_size = 6

message SearchAnnotationSetsResponse

repeated AnnotationSet annotation_sets = 1

string next_page_token = 2

rpc CreateAnnotation (CreateAnnotationRequest, Annotation)

message CreateAnnotationRequest

optional Annotation annotation = 1

rpc BatchCreateAnnotations (BatchCreateAnnotationsRequest, BatchCreateAnnotationsResponse)

message BatchCreateAnnotationsRequest

repeated Annotation annotations = 1

string request_id = 2

message BatchCreateAnnotationsResponse

repeated BatchCreateAnnotationsResponse.Entry entries = 1

rpc GetAnnotation (GetAnnotationRequest, Annotation)

message GetAnnotationRequest

string annotation_id = 1

rpc UpdateAnnotation (UpdateAnnotationRequest, Annotation)

message UpdateAnnotationRequest

string annotation_id = 1

optional Annotation annotation = 2

optional protobuf.FieldMask update_mask = 3

rpc DeleteAnnotation (DeleteAnnotationRequest, protobuf.Empty)

message DeleteAnnotationRequest

string annotation_id = 1

rpc SearchAnnotations (SearchAnnotationsRequest, SearchAnnotationsResponse)

message SearchAnnotationsRequest

repeated string annotation_set_ids = 1

oneof reference

string reference_id = 2

string reference_name = 3

int64 start = 4

int64 end = 5

string page_token = 6

int32 page_size = 7

message SearchAnnotationsResponse

repeated Annotation annotations = 1

string next_page_token = 2

service DatasetServiceV1

rpc ListDatasets (ListDatasetsRequest, ListDatasetsResponse)

message ListDatasetsRequest

string project_id = 1

int32 page_size = 2

string page_token = 3

message ListDatasetsResponse

repeated Dataset datasets = 1

string next_page_token = 2

rpc CreateDataset (CreateDatasetRequest, Dataset)

message CreateDatasetRequest

optional Dataset dataset = 1

rpc GetDataset (GetDatasetRequest, Dataset)

message GetDatasetRequest

string dataset_id = 1

rpc UpdateDataset (UpdateDatasetRequest, Dataset)

message UpdateDatasetRequest

string dataset_id = 1

optional Dataset dataset = 2

optional protobuf.FieldMask update_mask = 3