package google.events.cloud.dataplex.v1

Get desktop application:
View/edit binary Protocol Buffers messages

An asset represents a cloud resource that is being managed within a lake as a member of a zone.

Used in: AssetEventData

string name = 1
Output only. The relative resource name of the asset, of the form: `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/assets/{asset_id}`.
string display_name = 2
Optional. User friendly display name.
string uid = 3
Output only. System generated globally unique ID for the asset. This ID will be different if the asset is deleted and re-created with the same name.
optional protobuf.Timestamp create_time = 4
Output only. The time when the asset was created.
optional protobuf.Timestamp update_time = 5
Output only. The time when the asset was last updated.
map<string, string> labels = 6
Optional. User defined labels for the asset.
string description = 7
Optional. Description of the asset.
State state = 8
Output only. Current state of the asset.
optional Asset.ResourceSpec resource_spec = 100
Required. Specification of the resource that is referenced by this asset.
optional Asset.ResourceStatus resource_status = 101
Output only. Status of the resource referenced by this asset.
optional Asset.SecurityStatus security_status = 103
Output only. Status of the security policy applied to resource referenced by this asset.
optional Asset.DiscoverySpec discovery_spec = 106
Optional. Specification of the discovery feature applied to data referenced by this asset. When this spec is left unset, the asset will use the spec set on the parent zone.
optional Asset.DiscoveryStatus discovery_status = 107
Output only. Status of the discovery feature applied to data referenced by this asset.

Settings to manage the metadata discovery and publishing for an asset.

Used in: Asset

bool enabled = 1
Optional. Whether discovery is enabled.
repeated string include_patterns = 2
Optional. The list of patterns to apply for selecting data to include during discovery if only a subset of the data should considered. For Cloud Storage bucket assets, these are interpreted as glob patterns used to match object names. For BigQuery dataset assets, these are interpreted as patterns to match table names.
repeated string exclude_patterns = 3
Optional. The list of patterns to apply for selecting data to exclude during discovery. For Cloud Storage bucket assets, these are interpreted as glob patterns used to match object names. For BigQuery dataset assets, these are interpreted as patterns to match table names.
optional DiscoverySpec.CsvOptions csv_options = 4
Optional. Configuration for CSV data.
optional DiscoverySpec.JsonOptions json_options = 5
Optional. Configuration for Json data.
oneof trigger
Determines when discovery is triggered.
- string schedule = 10
  Optional. Cron schedule (https://en.wikipedia.org/wiki/Cron) for running discovery periodically. Successive discovery runs must be scheduled at least 60 minutes apart. The default value is to run discovery every 60 minutes. To explicitly set a timezone to the cron tab, apply a prefix in the cron tab: "CRON_TZ=${IANA_TIME_ZONE}" or TZ=${IANA_TIME_ZONE}". The ${IANA_TIME_ZONE} may only be a valid string from IANA time zone database. For example, `CRON_TZ=America/New_York 1 * * * *`, or `TZ=America/New_York 1 * * * *`.

Describe CSV and similar semi-structured data formats.

Used in: DiscoverySpec

int32 header_rows = 1
Optional. The number of rows to interpret as header rows that should be skipped when reading data rows.
string delimiter = 2
Optional. The delimiter being used to separate values. This defaults to ','.
string encoding = 3
Optional. The character encoding of the data. The default is UTF-8.
bool disable_type_inference = 4
Optional. Whether to disable the inference of data type for CSV data. If true, all columns will be registered as strings.

Describe JSON data format.

Used in: DiscoverySpec

string encoding = 1
Optional. The character encoding of the data. The default is UTF-8.
bool disable_type_inference = 2
Optional. Whether to disable the inference of data type for Json data. If true, all columns will be registered as their primitive types (strings, number or boolean).

Status of discovery for an asset.

Used in: Asset

DiscoveryStatus.State state = 1
The current status of the discovery feature.
string message = 2
Additional information about the current state.
optional protobuf.Timestamp update_time = 3
Last update time of the status.
optional protobuf.Timestamp last_run_time = 4
The start time of the last discovery run.
optional DiscoveryStatus.Stats stats = 6
Data Stats of the asset reported by discovery.
optional protobuf.Duration last_run_duration = 7
The duration of the last discovery run.

Current state of discovery.

Used in: DiscoveryStatus

STATE_UNSPECIFIED = 0
State is unspecified.
SCHEDULED = 1
Discovery for the asset is scheduled.
IN_PROGRESS = 2
Discovery for the asset is running.
PAUSED = 3
Discovery for the asset is currently paused (e.g. due to a lack of available resources). It will be automatically resumed.
DISABLED = 5
Discovery for the asset is disabled.

The aggregated data statistics for the asset reported by discovery.

Used in: DiscoveryStatus

int64 data_items = 1
The count of data items within the referenced resource.
int64 data_size = 2
The number of stored data bytes within the referenced resource.
int64 tables = 3
The count of table entities within the referenced resource.
int64 filesets = 4
The count of fileset entities within the referenced resource.

Identifies the cloud resource that is referenced by this asset.

Used in: Asset

string name = 1
Immutable. Relative name of the cloud resource that contains the data that is being managed within a lake. For example: `projects/{project_number}/buckets/{bucket_id}` `projects/{project_number}/datasets/{dataset_id}`
ResourceSpec.Type type = 2
Required. Immutable. Type of resource.
ResourceSpec.AccessMode read_access_mode = 5
Optional. Determines how read permissions are handled for each asset and their associated tables. Only available to storage buckets assets.

Access Mode determines how data stored within the resource is read. This is only applicable to storage bucket assets.

Used in: ResourceSpec

ACCESS_MODE_UNSPECIFIED = 0
Access mode unspecified.
DIRECT = 1
Default. Data is accessed directly using storage APIs.
MANAGED = 2
Data is accessed through a managed interface using BigQuery APIs.

Type of resource.

Used in: ResourceSpec

TYPE_UNSPECIFIED = 0
Type not specified.
STORAGE_BUCKET = 1
Cloud Storage bucket.
BIGQUERY_DATASET = 2
BigQuery dataset.

Status of the resource referenced by an asset.

Used in: Asset

ResourceStatus.State state = 1
The current state of the managed resource.
string message = 2
Additional information about the current state.
optional protobuf.Timestamp update_time = 3
Last update time of the status.
string managed_access_identity = 4
Output only. Service account associated with the BigQuery Connection.

The state of a resource.

Used in: ResourceStatus

STATE_UNSPECIFIED = 0
State unspecified.
READY = 1
Resource does not have any errors.
ERROR = 2
Resource has errors.

Security policy status of the asset. Data security policy, i.e., readers, writers & owners, should be specified in the lake/zone/asset IAM policy.

Used in: Asset

SecurityStatus.State state = 1
The current state of the security policy applied to the attached resource.
string message = 2
Additional information about the current state.
optional protobuf.Timestamp update_time = 3
Last update time of the status.

The state of the security policy.

Used in: SecurityStatus

STATE_UNSPECIFIED = 0
State unspecified.
READY = 1
Security policy has been successfully applied to the attached resource.
APPLYING = 2
Security policy is in the process of being applied to the attached resource.
ERROR = 3
Security policy could not be applied to the attached resource due to errors.

The CloudEvent raised when an Asset is created.

optional AssetEventData data = 1
The data associated with the event.

The CloudEvent raised when an Asset is deleted.

optional AssetEventData data = 1
The data associated with the event.

The data within all Asset events.

Used in: AssetCreatedEvent, AssetDeletedEvent, AssetUpdatedEvent

optional Asset payload = 1
Optional. The Asset event payload. Unset for deletion events.

Aggregated status of the underlying assets of a lake or zone.

Used in: Lake, Zone

optional protobuf.Timestamp update_time = 1
Last update time of the status.
int32 active_assets = 2
Number of active assets.
int32 security_policy_applying_assets = 3
Number of assets that are in process of updating the security policy on attached resources.

The CloudEvent raised when an Asset is updated.

optional AssetEventData data = 1
The data associated with the event.

DataAccessSpec holds the access control configuration to be enforced on data stored within resources (eg: rows, columns in BigQuery Tables). When associated with data, the data is only accessible to principals explicitly granted access through the DataAccessSpec. Principals with access to the containing resource are not implicitly granted access.

Used in: DataAttribute

repeated string readers = 1
Optional. The format of strings follows the pattern followed by IAM in the bindings. user:{email}, serviceAccount:{email} group:{email}. The set of principals to be granted reader role on data stored within resources.

Denotes one dataAttribute in a dataTaxonomy, for example, PII. DataAttribute resources can be defined in a hierarchy. A single dataAttribute resource can contain specs of multiple types ``` PII - ResourceAccessSpec : - readers :foo@bar.com - DataAccessSpec : - readers :bar@foo.com ```

Used in: DataAttributeEventData

string name = 1
Output only. The relative resource name of the dataAttribute, of the form: projects/{project_number}/locations/{location_id}/dataTaxonomies/{dataTaxonomy}/attributes/{data_attribute_id}.
string uid = 2
Output only. System generated globally unique ID for the DataAttribute. This ID will be different if the DataAttribute is deleted and re-created with the same name.
optional protobuf.Timestamp create_time = 3
Output only. The time when the DataAttribute was created.
optional protobuf.Timestamp update_time = 4
Output only. The time when the DataAttribute was last updated.
string description = 5
Optional. Description of the DataAttribute.
string display_name = 6
Optional. User friendly display name.
map<string, string> labels = 7
Optional. User-defined labels for the DataAttribute.
string parent_id = 8
Optional. The ID of the parent DataAttribute resource, should belong to the same data taxonomy. Circular dependency in parent chain is not valid. Maximum depth of the hierarchy allowed is 4. [a -> b -> c -> d -> e, depth = 4]
int32 attribute_count = 9
Output only. The number of child attributes present for this attribute.
string etag = 10
This checksum is computed by the server based on the value of other fields, and may be sent on update and delete requests to ensure the client has an up-to-date value before proceeding.
optional ResourceAccessSpec resource_access_spec = 100
Optional. Specified when applied to a resource (eg: Cloud Storage bucket, BigQuery dataset, BigQuery table).
optional DataAccessSpec data_access_spec = 101
Optional. Specified when applied to data stored on the resource (eg: rows, columns in BigQuery Tables).

DataAttributeBinding represents binding of attributes to resources. Eg: Bind 'CustomerInfo' entity with 'PII' attribute.

Used in: DataAttributeBindingEventData

string name = 1
Output only. The relative resource name of the Data Attribute Binding, of the form: projects/{project_number}/locations/{location}/dataAttributeBindings/{data_attribute_binding_id}
string uid = 2
Output only. System generated globally unique ID for the DataAttributeBinding. This ID will be different if the DataAttributeBinding is deleted and re-created with the same name.
optional protobuf.Timestamp create_time = 3
Output only. The time when the DataAttributeBinding was created.
optional protobuf.Timestamp update_time = 4
Output only. The time when the DataAttributeBinding was last updated.
string description = 5
Optional. Description of the DataAttributeBinding.
string display_name = 6
Optional. User friendly display name.
map<string, string> labels = 7
Optional. User-defined labels for the DataAttributeBinding.
string etag = 8
This checksum is computed by the server based on the value of other fields, and may be sent on update and delete requests to ensure the client has an up-to-date value before proceeding. Etags must be used when calling the DeleteDataAttributeBinding and the UpdateDataAttributeBinding method.
oneof resource_reference
The reference to the resource that is associated to attributes.
- string resource = 100
  Optional. Immutable. The resource name of the resource that is associated to attributes. Presently, only entity resource is supported in the form: projects/{project}/locations/{location}/lakes/{lake}/zones/{zone}/entities/{entity_id} Must belong in the same project and region as the attribute binding, and there can only exist one active binding for a resource.
repeated string attributes = 110
Optional. List of attributes to be associated with the resource, provided in the form: projects/{project}/locations/{location}/dataTaxonomies/{dataTaxonomy}/attributes/{data_attribute_id}
repeated DataAttributeBinding.Path paths = 120
Optional. The list of paths for items within the associated resource (eg. columns within a table) along with attribute bindings.

Represents a subresource of a given resource, and associated bindings with it.

Used in: DataAttributeBinding

string name = 1
Required. The name identifier of the path. Nested columns should be of the form: 'country.state.city'.
repeated string attributes = 2
Optional. List of attributes to be associated with the path of the resource, provided in the form: projects/{project}/locations/{location}/dataTaxonomies/{dataTaxonomy}/attributes/{data_attribute_id}

The CloudEvent raised when a DataAttributeBinding is created.

optional DataAttributeBindingEventData data = 1
The data associated with the event.

The CloudEvent raised when a DataAttributeBinding is deleted.

optional DataAttributeBindingEventData data = 1
The data associated with the event.

The data within all DataAttributeBinding events.

Used in: DataAttributeBindingCreatedEvent, DataAttributeBindingDeletedEvent, DataAttributeBindingUpdatedEvent

optional DataAttributeBinding payload = 1
Optional. The DataAttributeBinding event payload. Unset for deletion events.

The CloudEvent raised when a DataAttributeBinding is updated.

optional DataAttributeBindingEventData data = 1
The data associated with the event.

The CloudEvent raised when a DataAttribute is created.

optional DataAttributeEventData data = 1
The data associated with the event.

The CloudEvent raised when a DataAttribute is deleted.

optional DataAttributeEventData data = 1
The data associated with the event.

The data within all DataAttribute events.

Used in: DataAttributeCreatedEvent, DataAttributeDeletedEvent, DataAttributeUpdatedEvent

optional DataAttribute payload = 1
Optional. The DataAttribute event payload. Unset for deletion events.

The CloudEvent raised when a DataAttribute is updated.

optional DataAttributeEventData data = 1
The data associated with the event.

DataProfileResult defines the output of DataProfileScan. Each field of the table will have field type specific profile result.

Used in: DataScan

int64 row_count = 3
The count of rows scanned.
optional DataProfileResult.Profile profile = 4
The profile information per field.
optional ScannedData scanned_data = 5
The data scanned for this result.

Contains name, type, mode and field type specific profile information.

Used in: DataProfileResult

repeated Profile.Field fields = 2
List of fields with structural and profile information for each field.

A field within a table.

Used in: Profile

string name = 1
The name of the field.
string type = 2
The field data type. Possible values include: * STRING * BYTE * INT64 * INT32 * INT16 * DOUBLE * FLOAT * DECIMAL * BOOLEAN * BINARY * TIMESTAMP * DATE * TIME * NULL * RECORD
string mode = 3
The mode of the field. Possible values include: * REQUIRED, if it is a required field. * NULLABLE, if it is an optional field. * REPEATED, if it is a repeated field.
optional Field.ProfileInfo profile = 4
Profile information for the corresponding field.

The profile information for each field type.

Used in: Field

double null_ratio = 2
Ratio of rows with null value against total scanned rows.
double distinct_ratio = 3
Ratio of rows with distinct values against total scanned rows. Not available for complex non-groupable field type RECORD and fields with REPEATABLE mode.
repeated ProfileInfo.TopNValue top_n_values = 4
The list of top N non-null values and number of times they occur in the scanned data. N is 10 or equal to the number of distinct values in the field, whichever is smaller. Not available for complex non-groupable field type RECORD and fields with REPEATABLE mode.
oneof field_info
Structural and profile information for specific field type. Not available, if mode is REPEATABLE.
- ProfileInfo.StringFieldInfo string_profile = 101
  String type field information.
- ProfileInfo.IntegerFieldInfo integer_profile = 102
  Integer type field information.
- ProfileInfo.DoubleFieldInfo double_profile = 103
  Double type field information.

The profile information for a double type field.

Used in: ProfileInfo

double average = 1
Average of non-null values in the scanned data. NaN, if the field has a NaN.
double standard_deviation = 3
Standard deviation of non-null values in the scanned data. NaN, if the field has a NaN.
double min = 4
Minimum of non-null values in the scanned data. NaN, if the field has a NaN.
repeated double quartiles = 6
A quartile divides the number of data points into four parts, or quarters, of more-or-less equal size. Three main quartiles used are: The first quartile (Q1) splits off the lowest 25% of data from the highest 75%. It is also known as the lower or 25th empirical quartile, as 25% of the data is below this point. The second quartile (Q2) is the median of a data set. So, 50% of the data lies below this point. The third quartile (Q3) splits off the highest 25% of data from the lowest 75%. It is known as the upper or 75th empirical quartile, as 75% of the data lies below this point. Here, the quartiles is provided as an ordered list of quartile values for the scanned data, occurring in order Q1, median, Q3.
double max = 5
Maximum of non-null values in the scanned data. NaN, if the field has a NaN.

The profile information for an integer type field.

Used in: ProfileInfo

double average = 1
Average of non-null values in the scanned data. NaN, if the field has a NaN.
double standard_deviation = 3
Standard deviation of non-null values in the scanned data. NaN, if the field has a NaN.
int64 min = 4
Minimum of non-null values in the scanned data. NaN, if the field has a NaN.
repeated int64 quartiles = 6
A quartile divides the number of data points into four parts, or quarters, of more-or-less equal size. Three main quartiles used are: The first quartile (Q1) splits off the lowest 25% of data from the highest 75%. It is also known as the lower or 25th empirical quartile, as 25% of the data is below this point. The second quartile (Q2) is the median of a data set. So, 50% of the data lies below this point. The third quartile (Q3) splits off the highest 25% of data from the lowest 75%. It is known as the upper or 75th empirical quartile, as 75% of the data lies below this point. Here, the quartiles is provided as an ordered list of quartile values for the scanned data, occurring in order Q1, median, Q3.
int64 max = 5
Maximum of non-null values in the scanned data. NaN, if the field has a NaN.

The profile information for a string type field.

Used in: ProfileInfo

int64 min_length = 1
Minimum length of non-null values in the scanned data.
int64 max_length = 2
Maximum length of non-null values in the scanned data.
double average_length = 3
Average length of non-null values in the scanned data.

Top N non-null values in the scanned data.

Used in: ProfileInfo

string value = 1
String value of a top N non-null value.
int64 count = 2
Count of the corresponding value in the scanned data.

DataProfileScan related setting.

Used in: DataScan

(message has no fields)

DataQualityDimensionResult provides a more detailed, per-dimension view of the results.

Used in: DataQualityResult

bool passed = 3
Whether the dimension passed or failed.

The output of a DataQualityScan.

Used in: DataScan

bool passed = 5
Overall data quality result -- `true` if all rules passed.
repeated DataQualityDimensionResult dimensions = 2
A list of results at the dimension level.
repeated DataQualityRuleResult rules = 3
A list of all the rules in a job, and their results.
int64 row_count = 4
The count of rows processed.
optional ScannedData scanned_data = 7
The data scanned for this result.

A rule captures data quality intent about a data source.

Used in: DataQualityRuleResult, DataQualitySpec

oneof rule_type
- DataQualityRule.RangeExpectation range_expectation = 1
  ColumnMap rule which evaluates whether each column value lies between a specified range.
- DataQualityRule.NonNullExpectation non_null_expectation = 2
  ColumnMap rule which evaluates whether each column value is null.
- DataQualityRule.SetExpectation set_expectation = 3
  ColumnMap rule which evaluates whether each column value is contained by a specified set.
- DataQualityRule.RegexExpectation regex_expectation = 4
  ColumnMap rule which evaluates whether each column value matches a specified regex.
- DataQualityRule.UniquenessExpectation uniqueness_expectation = 100
  ColumnAggregate rule which evaluates whether the column has duplicates.
- DataQualityRule.StatisticRangeExpectation statistic_range_expectation = 101
  ColumnAggregate rule which evaluates whether the column aggregate statistic lies between a specified range.
- DataQualityRule.RowConditionExpectation row_condition_expectation = 200
  Table rule which evaluates whether each row passes the specified condition.
- DataQualityRule.TableConditionExpectation table_condition_expectation = 201
  Table rule which evaluates whether the provided expression is true.
string column = 500
Optional. The unnested column which this rule is evaluated against.
bool ignore_null = 501
Optional. Rows with `null` values will automatically fail a rule, unless `ignore_null` is `true`. In that case, such `null` rows are trivially considered passing. Only applicable to ColumnMap rules.
string dimension = 502
Required. The dimension a rule belongs to. Results are also aggregated at the dimension level. Supported dimensions are **["COMPLETENESS", "ACCURACY", "CONSISTENCY", "VALIDITY", "UNIQUENESS", "INTEGRITY"]**
double threshold = 503
Optional. The minimum ratio of **passing_rows / total_rows** required to pass this rule, with a range of [0.0, 1.0]. 0 indicates default value (i.e. 1.0).

Evaluates whether each column value is null.

Used in: DataQualityRule

(message has no fields)

Evaluates whether each column value lies between a specified range.

Used in: DataQualityRule

string min_value = 1
Optional. The minimum column value allowed for a row to pass this validation. At least one of `min_value` and `max_value` need to be provided.
string max_value = 2
Optional. The maximum column value allowed for a row to pass this validation. At least one of `min_value` and `max_value` need to be provided.
bool strict_min_enabled = 3
Optional. Whether each value needs to be strictly greater than ('>') the minimum, or if equality is allowed. Only relevant if a `min_value` has been defined. Default = false.
bool strict_max_enabled = 4
Optional. Whether each value needs to be strictly lesser than ('<') the maximum, or if equality is allowed. Only relevant if a `max_value` has been defined. Default = false.

Evaluates whether each column value matches a specified regex.

Used in: DataQualityRule

string regex = 1
A regular expression the column value is expected to match.

Evaluates whether each row passes the specified condition. The SQL expression needs to use BigQuery standard SQL syntax and should produce a boolean value per row as the result. Example: col1 >= 0 AND col2 < 10

Used in: DataQualityRule

string sql_expression = 1
The SQL expression.

Evaluates whether each column value is contained by a specified set.

Used in: DataQualityRule

repeated string values = 1
Expected values for the column value.

Evaluates whether the column aggregate statistic lies between a specified range.

Used in: DataQualityRule

StatisticRangeExpectation.ColumnStatistic statistic = 1
string min_value = 2
The minimum column statistic value allowed for a row to pass this validation. At least one of `min_value` and `max_value` need to be provided.
string max_value = 3
The maximum column statistic value allowed for a row to pass this validation. At least one of `min_value` and `max_value` need to be provided.
bool strict_min_enabled = 4
Whether column statistic needs to be strictly greater than ('>') the minimum, or if equality is allowed. Only relevant if a `min_value` has been defined. Default = false.
bool strict_max_enabled = 5
Whether column statistic needs to be strictly lesser than ('<') the maximum, or if equality is allowed. Only relevant if a `max_value` has been defined. Default = false.

Used in: StatisticRangeExpectation

STATISTIC_UNDEFINED = 0
Unspecified statistic type
MEAN = 1
Evaluate the column mean
MIN = 2
Evaluate the column min
MAX = 3
Evaluate the column max

Evaluates whether the provided expression is true. The SQL expression needs to use BigQuery standard SQL syntax and should produce a scalar boolean result. Example: MIN(col1) >= 0

Used in: DataQualityRule

string sql_expression = 1
The SQL expression.

Evaluates whether the column has duplicates.

Used in: DataQualityRule

(message has no fields)

DataQualityRuleResult provides a more detailed, per-rule view of the results.

Used in: DataQualityResult

optional DataQualityRule rule = 1
The rule specified in the DataQualitySpec, as is.
bool passed = 7
Whether the rule passed or failed.
int64 evaluated_count = 9
The number of rows a rule was evaluated against. This field is only valid for ColumnMap type rules. Evaluated count can be configured to either * include all rows (default) - with `null` rows automatically failing rule evaluation, or * exclude `null` rows from the `evaluated_count`, by setting `ignore_nulls = true`.
int64 passed_count = 8
The number of rows which passed a rule evaluation. This field is only valid for ColumnMap type rules.
int64 null_count = 5
The number of rows with null values in the specified column.
double pass_ratio = 6
The ratio of **passed_count / evaluated_count**. This field is only valid for ColumnMap type rules.
string failing_rows_query = 10
The query to find rows that did not pass this rule. Only applies to ColumnMap and RowCondition rules.

DataQualityScan related setting.

Used in: DataScan

repeated DataQualityRule rules = 1
The list of rules to evaluate against a data source. At least one rule is required.

Represents a user-visible job which provides the insights for the related data source. For example: * Data Quality: generates queries based on the rules and runs against the data to get data quality check results. * Data Profile: analyzes the data in table(s) and generates insights about the structure, content and relationships (such as null percent, cardinality, min/max/mean, etc).

Used in: DataScanEventData

string name = 1
Output only. The relative resource name of the scan, of the form: `projects/{project}/locations/{location_id}/dataScans/{datascan_id}`, where `project` refers to a *project_id* or *project_number* and `location_id` refers to a GCP region.
string uid = 2
Output only. System generated globally unique ID for the scan. This ID will be different if the scan is deleted and re-created with the same name.
string description = 3
Optional. Description of the scan. * Must be between 1-1024 characters.
string display_name = 4
Optional. User friendly display name. * Must be between 1-256 characters.
map<string, string> labels = 5
Optional. User-defined labels for the scan.
State state = 6
Output only. Current state of the DataScan.
optional protobuf.Timestamp create_time = 7
Output only. The time when the scan was created.
optional protobuf.Timestamp update_time = 8
Output only. The time when the scan was last updated.
optional DataSource data = 9
Required. The data source for DataScan.
optional DataScan.ExecutionSpec execution_spec = 10
Optional. DataScan execution settings. If not specified, the fields in it will use their default values.
optional DataScan.ExecutionStatus execution_status = 11
Output only. Status of the data scan execution.
DataScanType type = 12
Output only. The type of DataScan.
oneof spec
Data Scan related setting. It is required and immutable which means once data_quality_spec is set, it cannot be changed to data_profile_spec.
- DataQualitySpec data_quality_spec = 100
  DataQualityScan related setting.
- DataProfileSpec data_profile_spec = 101
  DataProfileScan related setting.
oneof result
The result of the data scan.
- DataQualityResult data_quality_result = 200
  Output only. The result of the data quality scan.
- DataProfileResult data_profile_result = 201
  Output only. The result of the data profile scan.

DataScan execution settings.

Used in: DataScan

optional Trigger trigger = 1
Optional. Spec related to how often and when a scan should be triggered. If not specified, the default is `OnDemand`, which means the scan will not run until the user calls `RunDataScan` API.
oneof incremental
Spec related to incremental scan of the data When an option is selected for incremental scan, it cannot be unset or changed. If not specified, a data scan will run for all data in the table.
- string field = 100
  Immutable. The unnested field (of type *Date* or *Timestamp*) that contains values which monotonically increase over time. If not specified, a data scan will run for all data in the table.

Status of the data scan execution.

Used in: DataScan

optional protobuf.Timestamp latest_job_start_time = 4
The time when the latest DataScanJob started.
optional protobuf.Timestamp latest_job_end_time = 5
The time when the latest DataScanJob ended.

The CloudEvent raised when a DataScan is created.

optional DataScanEventData data = 1
The data associated with the event.

The CloudEvent raised when a DataScan is deleted.

optional DataScanEventData data = 1
The data associated with the event.

The data within all DataScan events.

Used in: DataScanCreatedEvent, DataScanDeletedEvent, DataScanUpdatedEvent

optional DataScan payload = 1
Optional. The DataScan event payload. Unset for deletion events.

The type of DataScan.

Used in: DataScan

DATA_SCAN_TYPE_UNSPECIFIED = 0
The DataScan type is unspecified.
DATA_QUALITY = 1
Data Quality scan.
DATA_PROFILE = 2
Data Profile scan.

The CloudEvent raised when a DataScan is updated.

optional DataScanEventData data = 1
The data associated with the event.

The data source for DataScan.

Used in: DataScan

oneof source
The source is required and immutable. Once it is set, it cannot be change to others.
- string entity = 100
  Immutable. The Dataplex entity that represents the data source (e.g. BigQuery table) for DataScan, of the form: `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}`.

DataTaxonomy represents a set of hierarchical DataAttributes resources, grouped with a common theme Eg: 'SensitiveDataTaxonomy' can have attributes to manage PII data. It is defined at project level.

Used in: DataTaxonomyEventData

string name = 1
Output only. The relative resource name of the DataTaxonomy, of the form: projects/{project_number}/locations/{location_id}/dataTaxonomies/{data_taxonomy_id}.
string uid = 2
Output only. System generated globally unique ID for the dataTaxonomy. This ID will be different if the DataTaxonomy is deleted and re-created with the same name.
optional protobuf.Timestamp create_time = 3
Output only. The time when the DataTaxonomy was created.
optional protobuf.Timestamp update_time = 4
Output only. The time when the DataTaxonomy was last updated.
string description = 5
Optional. Description of the DataTaxonomy.
string display_name = 6
Optional. User friendly display name.
map<string, string> labels = 8
Optional. User-defined labels for the DataTaxonomy.
int32 attribute_count = 9
Output only. The number of attributes in the DataTaxonomy.
string etag = 10
This checksum is computed by the server based on the value of other fields, and may be sent on update and delete requests to ensure the client has an up-to-date value before proceeding.

The CloudEvent raised when a DataTaxonomy is created.

optional DataTaxonomyEventData data = 1
The data associated with the event.

The CloudEvent raised when a DataTaxonomy is deleted.

optional DataTaxonomyEventData data = 1
The data associated with the event.

The data within all DataTaxonomy events.

Used in: DataTaxonomyCreatedEvent, DataTaxonomyDeletedEvent, DataTaxonomyUpdatedEvent

optional DataTaxonomy payload = 1
Optional. The DataTaxonomy event payload. Unset for deletion events.

The CloudEvent raised when a DataTaxonomy is updated.

optional DataTaxonomyEventData data = 1
The data associated with the event.

Environment represents a user-visible compute infrastructure for analytics within a lake.

Used in: EnvironmentEventData

string name = 1
Output only. The relative resource name of the environment, of the form: projects/{project_id}/locations/{location_id}/lakes/{lake_id}/environment/{environment_id}
string display_name = 2
Optional. User friendly display name.
string uid = 3
Output only. System generated globally unique ID for the environment. This ID will be different if the environment is deleted and re-created with the same name.
optional protobuf.Timestamp create_time = 4
Output only. Environment creation time.
optional protobuf.Timestamp update_time = 5
Output only. The time when the environment was last updated.
map<string, string> labels = 6
Optional. User defined labels for the environment.
string description = 7
Optional. Description of the environment.
State state = 8
Output only. Current state of the environment.
optional Environment.InfrastructureSpec infrastructure_spec = 100
Required. Infrastructure specification for the Environment.
optional Environment.SessionSpec session_spec = 101
Optional. Configuration for sessions created for this environment.
optional Environment.SessionStatus session_status = 102
Output only. Status of sessions created for this environment.
optional Environment.Endpoints endpoints = 200
Output only. URI Endpoints to access sessions associated with the Environment.

URI Endpoints to access sessions associated with the Environment.

Used in: Environment

string notebooks = 1
Output only. URI to serve notebook APIs
string sql = 2
Output only. URI to serve SQL APIs

Configuration for the underlying infrastructure used to run workloads.

Used in: Environment

oneof resources
Hardware config
- InfrastructureSpec.ComputeResources compute = 50
  Optional. Compute resources needed for analyze interactive workloads.
oneof runtime
Software config
- InfrastructureSpec.OsImageRuntime os_image = 100
  Required. Software Runtime Configuration for analyze interactive workloads.

Compute resources associated with the analyze interactive workloads.

Used in: InfrastructureSpec

int32 disk_size_gb = 1
Optional. Size in GB of the disk. Default is 100 GB.
int32 node_count = 2
Optional. Total number of nodes in the sessions created for this environment.
int32 max_node_count = 3
Optional. Max configurable nodes. If max_node_count > node_count, then auto-scaling is enabled.

Software Runtime Configuration to run Analyze.

Used in: InfrastructureSpec

string image_version = 1
Required. Dataplex Image version.
repeated string java_libraries = 2
Optional. List of Java jars to be included in the runtime environment. Valid input includes Cloud Storage URIs to Jar binaries. For example, gs://bucket-name/my/path/to/file.jar
repeated string python_packages = 3
Optional. A list of python packages to be installed. Valid formats include Cloud Storage URI to a PIP installable library. For example, gs://bucket-name/my/path/to/lib.tar.gz
map<string, string> properties = 4
Optional. Spark properties to provide configuration for use in sessions created for this environment. The properties to set on daemon config files. Property keys are specified in `prefix:property` format. The prefix must be "spark".

Configuration for sessions created for this environment.

Used in: Environment

optional protobuf.Duration max_idle_duration = 1
Optional. The idle time configuration of the session. The session will be auto-terminated at the end of this period.
bool enable_fast_startup = 2
Optional. If True, this causes sessions to be pre-created and available for faster startup to enable interactive exploration use-cases. This defaults to False to avoid additional billed charges. These can only be set to True for the environment with name set to "default", and with default configuration.

Status of sessions created for this environment.

Used in: Environment

bool active = 1
Output only. Queries over sessions to mark whether the environment is currently active or not

The CloudEvent raised when an Environment is created.

optional EnvironmentEventData data = 1
The data associated with the event.

The CloudEvent raised when an Environment is deleted.

optional EnvironmentEventData data = 1
The data associated with the event.

The data within all Environment events.

Used in: EnvironmentCreatedEvent, EnvironmentDeletedEvent, EnvironmentUpdatedEvent

optional Environment payload = 1
Optional. The Environment event payload. Unset for deletion events.

The CloudEvent raised when an Environment is updated.

optional EnvironmentEventData data = 1
The data associated with the event.

A job represents an instance of a task.

Used in: Task.ExecutionStatus

string name = 1
Output only. The relative resource name of the job, of the form: `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/tasks/{task_id}/jobs/{job_id}`.
string uid = 2
Output only. System generated globally unique ID for the job.
optional protobuf.Timestamp start_time = 3
Output only. The time when the job was started.
optional protobuf.Timestamp end_time = 4
Output only. The time when the job ended.
Job.State state = 5
Output only. Execution state for the job.
uint32 retry_count = 6
Output only. The number of times the job has been retried (excluding the initial attempt).
Job.Service service = 7
Output only. The underlying service running a job.
string service_job = 8
Output only. The full resource name for the job run under a particular service.
string message = 9
Output only. Additional information about the current state.

Used in: Job

SERVICE_UNSPECIFIED = 0
Service used to run the job is unspecified.
DATAPROC = 1
Dataproc service is used to run this job.

Used in: Job

STATE_UNSPECIFIED = 0
The job state is unknown.
RUNNING = 1
The job is running.
CANCELLING = 2
The job is cancelling.
CANCELLED = 3
The job cancellation was successful.
SUCCEEDED = 4
The job completed successfully.
FAILED = 5
The job is no longer running due to an error.
ABORTED = 6
The job was cancelled outside of Dataplex.

A lake is a centralized repository for managing enterprise data across the organization distributed across many cloud projects, and stored in a variety of storage services such as Google Cloud Storage and BigQuery. The resources attached to a lake are referred to as managed resources. Data within these managed resources can be structured or unstructured. A lake provides data admins with tools to organize, secure and manage their data at scale, and provides data scientists and data engineers an integrated experience to easily search, discover, analyze and transform data and associated metadata.

Used in: LakeEventData

string name = 1
Output only. The relative resource name of the lake, of the form: `projects/{project_number}/locations/{location_id}/lakes/{lake_id}`.
string display_name = 2
Optional. User friendly display name.
string uid = 3
Output only. System generated globally unique ID for the lake. This ID will be different if the lake is deleted and re-created with the same name.
optional protobuf.Timestamp create_time = 4
Output only. The time when the lake was created.
optional protobuf.Timestamp update_time = 5
Output only. The time when the lake was last updated.
map<string, string> labels = 6
Optional. User-defined labels for the lake.
string description = 7
Optional. Description of the lake.
State state = 8
Output only. Current state of the lake.
string service_account = 9
Output only. Service account associated with this lake. This service account must be authorized to access or operate on resources managed by the lake.
optional Lake.Metastore metastore = 102
Optional. Settings to manage lake and Dataproc Metastore service instance association.
optional AssetStatus asset_status = 103
Output only. Aggregated status of the underlying assets of the lake.
optional Lake.MetastoreStatus metastore_status = 104
Output only. Metastore status of the lake.

Settings to manage association of Dataproc Metastore with a lake.

Used in: Lake

string service = 1
Optional. A relative reference to the Dataproc Metastore (https://cloud.google.com/dataproc-metastore/docs) service associated with the lake: `projects/{project_id}/locations/{location_id}/services/{service_id}`

Status of Lake and Dataproc Metastore service instance association.

Used in: Lake

MetastoreStatus.State state = 1
Current state of association.
string message = 2
Additional information about the current status.
optional protobuf.Timestamp update_time = 3
Last update time of the metastore status of the lake.
string endpoint = 4
The URI of the endpoint used to access the Metastore service.

Current state of association.

Used in: MetastoreStatus

STATE_UNSPECIFIED = 0
Unspecified.
NONE = 1
A Metastore service instance is not associated with the lake.
READY = 2
A Metastore service instance is attached to the lake.
UPDATING = 3
Attach/detach is in progress.
ERROR = 4
Attach/detach could not be done due to errors.

The CloudEvent raised when a Lake is created.

optional LakeEventData data = 1
The data associated with the event.

The CloudEvent raised when a Lake is deleted.

optional LakeEventData data = 1
The data associated with the event.

The data within all Lake events.

Used in: LakeCreatedEvent, LakeDeletedEvent, LakeUpdatedEvent

optional Lake payload = 1
Optional. The Lake event payload. Unset for deletion events.

The CloudEvent raised when a Lake is updated.

optional LakeEventData data = 1
The data associated with the event.

ResourceAccessSpec holds the access control configuration to be enforced on the resources, for example, Cloud Storage bucket, BigQuery dataset, BigQuery table.

Used in: DataAttribute

repeated string readers = 1
Optional. The format of strings follows the pattern followed by IAM in the bindings. user:{email}, serviceAccount:{email} group:{email}. The set of principals to be granted reader role on the resource.
repeated string writers = 2
Optional. The set of principals to be granted writer role on the resource.
repeated string owners = 3
Optional. The set of principals to be granted owner role on the resource.

The data scanned during processing (e.g. in incremental DataScan)

Used in: DataProfileResult, DataQualityResult

oneof data_range
The range of scanned data
- ScannedData.IncrementalField incremental_field = 1
  The range denoted by values of an incremental field

A data range denoted by a pair of start/end values of a field.

Used in: ScannedData

string field = 1
The field that contains values which monotonically increases over time (e.g. a timestamp column).
string start = 2
Value that marks the start of the range.
string end = 3
Value that marks the end of the range.

State of a resource.

Used in: Asset, DataScan, Environment, Lake, Task, Zone

STATE_UNSPECIFIED = 0
State is not specified.
ACTIVE = 1
Resource is active, i.e., ready to use.
CREATING = 2
Resource is under creation.
DELETING = 3
Resource is under deletion.
ACTION_REQUIRED = 4
Resource is active but has unresolved actions.

A task represents a user-visible job.

Used in: TaskEventData

string name = 1
Output only. The relative resource name of the task, of the form: projects/{project_number}/locations/{location_id}/lakes/{lake_id}/ tasks/{task_id}.
string uid = 2
Output only. System generated globally unique ID for the task. This ID will be different if the task is deleted and re-created with the same name.
optional protobuf.Timestamp create_time = 3
Output only. The time when the task was created.
optional protobuf.Timestamp update_time = 4
Output only. The time when the task was last updated.
string description = 5
Optional. Description of the task.
string display_name = 6
Optional. User friendly display name.
State state = 7
Output only. Current state of the task.
map<string, string> labels = 8
Optional. User-defined labels for the task.
optional Task.TriggerSpec trigger_spec = 100
Required. Spec related to how often and when a task should be triggered.
optional Task.ExecutionSpec execution_spec = 101
Required. Spec related to how a task is executed.
optional Task.ExecutionStatus execution_status = 201
Output only. Status of the latest task executions.
oneof config
Task template specific user-specified config.
- Task.SparkTaskConfig spark = 300
  Config related to running custom Spark tasks.
- Task.NotebookTaskConfig notebook = 302
  Config related to running scheduled Notebooks.

Execution related settings, like retry and service_account.

Used in: Task

map<string, string> args = 4
Optional. The arguments to pass to the task. The args can use placeholders of the format ${placeholder} as part of key/value string. These will be interpolated before passing the args to the driver. Currently supported placeholders: - ${task_id} - ${job_time} To pass positional args, set the key as TASK_ARGS. The value should be a comma-separated string of all the positional arguments. To use a delimiter other than comma, refer to https://cloud.google.com/sdk/gcloud/reference/topic/escaping. In case of other keys being present in the args, then TASK_ARGS will be passed as the last argument.
string service_account = 5
Required. Service account to use to execute a task. If not provided, the default Compute service account for the project is used.
string project = 7
Optional. The project in which jobs are run. By default, the project containing the Lake is used. If a project is provided, the [ExecutionSpec.service_account][google.cloud.dataplex.v1.Task.ExecutionSpec.service_account] must belong to this project.
optional protobuf.Duration max_job_execution_lifetime = 8
Optional. The maximum duration after which the job execution is expired.
string kms_key = 9
Optional. The Cloud KMS key to use for encryption, of the form: `projects/{project_number}/locations/{location_id}/keyRings/{key-ring-name}/cryptoKeys/{key-name}`.

Status of the task execution (e.g. Jobs).

Used in: Task

optional protobuf.Timestamp update_time = 3
Output only. Last update time of the status.
optional Job latest_job = 9
Output only. latest job execution

Configuration for the underlying infrastructure used to run workloads.

Used in: NotebookTaskConfig, SparkTaskConfig

oneof resources
Hardware config.
- InfrastructureSpec.BatchComputeResources batch = 52
  Compute resources needed for a Task when using Dataproc Serverless.
oneof runtime
Software config.
- InfrastructureSpec.ContainerImageRuntime container_image = 101
  Container Image Runtime Configuration.
oneof network
Networking config.
- InfrastructureSpec.VpcNetwork vpc_network = 150
  Vpc network.

Batch compute resources associated with the task.

Used in: InfrastructureSpec

int32 executors_count = 1
Optional. Total number of job executors. Executor Count should be between 2 and 100. [Default=2]
int32 max_executors_count = 2
Optional. Max configurable executors. If max_executors_count > executors_count, then auto-scaling is enabled. Max Executor Count should be between 2 and 1000. [Default=1000]

Container Image Runtime Configuration used with Batch execution.

Used in: InfrastructureSpec

string image = 1
Optional. Container image to use.
repeated string java_jars = 2
Optional. A list of Java JARS to add to the classpath. Valid input includes Cloud Storage URIs to Jar binaries. For example, gs://bucket-name/my/path/to/file.jar
repeated string python_packages = 3
Optional. A list of python packages to be installed. Valid formats include Cloud Storage URI to a PIP installable library. For example, gs://bucket-name/my/path/to/lib.tar.gz
map<string, string> properties = 4
Optional. Override to common configuration of open source components installed on the Dataproc cluster. The properties to set on daemon config files. Property keys are specified in `prefix:property` format, for example `core:hadoop.tmp.dir`. For more information, see [Cluster properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties).

Cloud VPC Network used to run the infrastructure.

Used in: InfrastructureSpec

oneof network_name
The Cloud VPC network identifier.
- string network = 1
  Optional. The Cloud VPC network in which the job is run. By default, the Cloud VPC network named Default within the project is used.
- string sub_network = 2
  Optional. The Cloud VPC sub-network in which the job is run.
repeated string network_tags = 3
Optional. List of network tags to apply to the job.

Config for running scheduled notebooks.

Used in: Task

string notebook = 4
Required. Path to input notebook. This can be the Cloud Storage URI of the notebook file or the path to a Notebook Content. The execution args are accessible as environment variables (`TASK_key=value`).
optional InfrastructureSpec infrastructure_spec = 3
Optional. Infrastructure specification for the execution.
repeated string file_uris = 5
Optional. Cloud Storage URIs of files to be placed in the working directory of each executor.
repeated string archive_uris = 6
Optional. Cloud Storage URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip.

User-specified config for running a Spark task.

Used in: Task

oneof driver
Required. The specification of the main method to call to drive the job. Specify either the jar file that contains the main class or the main class name.
- string main_jar_file_uri = 100
  The Cloud Storage URI of the jar file that contains the main class. The execution args are passed in as a sequence of named process arguments (`--key=value`).
- string main_class = 101
  The name of the driver's main class. The jar file that contains the class must be in the default CLASSPATH or specified in `jar_file_uris`. The execution args are passed in as a sequence of named process arguments (`--key=value`).
- string python_script_file = 102
  The Gcloud Storage URI of the main Python file to use as the driver. Must be a .py file. The execution args are passed in as a sequence of named process arguments (`--key=value`).
- string sql_script_file = 104
  A reference to a query file. This can be the Cloud Storage URI of the query file or it can the path to a SqlScript Content. The execution args are used to declare a set of script variables (`set key="value";`).
- string sql_script = 105
  The query text. The execution args are used to declare a set of script variables (`set key="value";`).
repeated string file_uris = 3
Optional. Cloud Storage URIs of files to be placed in the working directory of each executor.
repeated string archive_uris = 4
Optional. Cloud Storage URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip.
optional InfrastructureSpec infrastructure_spec = 6
Optional. Infrastructure specification for the execution.

Task scheduling and trigger settings.

Used in: Task

TriggerSpec.Type type = 5
Required. Immutable. Trigger type of the user-specified Task.
optional protobuf.Timestamp start_time = 6
Optional. The first run of the task will be after this time. If not specified, the task will run shortly after being submitted if ON_DEMAND and based on the schedule if RECURRING.
bool disabled = 4
Optional. Prevent the task from executing. This does not cancel already running tasks. It is intended to temporarily disable RECURRING tasks.
int32 max_retries = 7
Optional. Number of retry attempts before aborting. Set to zero to never attempt to retry a failed task.
oneof trigger
Trigger only applies for RECURRING tasks.
- string schedule = 100
  Optional. Cron schedule (https://en.wikipedia.org/wiki/Cron) for running tasks periodically. To explicitly set a timezone to the cron tab, apply a prefix in the cron tab: "CRON_TZ=${IANA_TIME_ZONE}" or "TZ=${IANA_TIME_ZONE}". The ${IANA_TIME_ZONE} may only be a valid string from IANA time zone database. For example, `CRON_TZ=America/New_York 1 * * * *`, or `TZ=America/New_York 1 * * * *`. This field is required for RECURRING tasks.

Determines how often and when the job will run.

Used in: TriggerSpec

TYPE_UNSPECIFIED = 0
Unspecified trigger type.
ON_DEMAND = 1
The task runs one-time shortly after Task Creation.
RECURRING = 2
The task is scheduled to run periodically.

The CloudEvent raised when a Task is created.

optional TaskEventData data = 1
The data associated with the event.

The CloudEvent raised when a Task is deleted.

optional TaskEventData data = 1
The data associated with the event.

The data within all Task events.

Used in: TaskCreatedEvent, TaskDeletedEvent, TaskUpdatedEvent

optional Task payload = 1
Optional. The Task event payload. Unset for deletion events.

The CloudEvent raised when a Task is updated.

optional TaskEventData data = 1
The data associated with the event.

DataScan scheduling and trigger settings.

Used in: DataScan.ExecutionSpec

oneof mode
DataScan scheduling and trigger settings. If not specified, the default is `onDemand`.
- Trigger.OnDemand on_demand = 100
  The scan runs once via `RunDataScan` API.
- Trigger.Schedule schedule = 101
  The scan is scheduled to run periodically.

The scan runs once via `RunDataScan` API.

Used in: Trigger

(message has no fields)

The scan is scheduled to run periodically.

Used in: Trigger

string cron = 1
Required. [Cron](https://en.wikipedia.org/wiki/Cron) schedule for running scans periodically. To explicitly set a timezone in the cron tab, apply a prefix in the cron tab: **"CRON_TZ=${IANA_TIME_ZONE}"** or **"TZ=${IANA_TIME_ZONE}"**. The **${IANA_TIME_ZONE}** may only be a valid string from IANA time zone database ([wikipedia](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones#List)). For example, `CRON_TZ=America/New_York 1 * * * *`, or `TZ=America/New_York 1 * * * *`. This field is required for Schedule scans.

A zone represents a logical group of related assets within a lake. A zone can be used to map to organizational structure or represent stages of data readiness from raw to curated. It provides managing behavior that is shared or inherited by all contained assets.

Used in: ZoneEventData

string name = 1
Output only. The relative resource name of the zone, of the form: `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}`.
string display_name = 2
Optional. User friendly display name.
string uid = 3
Output only. System generated globally unique ID for the zone. This ID will be different if the zone is deleted and re-created with the same name.
optional protobuf.Timestamp create_time = 4
Output only. The time when the zone was created.
optional protobuf.Timestamp update_time = 5
Output only. The time when the zone was last updated.
map<string, string> labels = 6
Optional. User defined labels for the zone.
string description = 7
Optional. Description of the zone.
State state = 8
Output only. Current state of the zone.
Zone.Type type = 9
Required. Immutable. The type of the zone.
optional Zone.DiscoverySpec discovery_spec = 103
Optional. Specification of the discovery feature applied to data in this zone.
optional Zone.ResourceSpec resource_spec = 104
Required. Specification of the resources that are referenced by the assets within this zone.
optional AssetStatus asset_status = 105
Output only. Aggregated status of the underlying assets of the zone.

Settings to manage the metadata discovery and publishing in a zone.

Used in: Zone

bool enabled = 1
Required. Whether discovery is enabled.
repeated string include_patterns = 2
Optional. The list of patterns to apply for selecting data to include during discovery if only a subset of the data should considered. For Cloud Storage bucket assets, these are interpreted as glob patterns used to match object names. For BigQuery dataset assets, these are interpreted as patterns to match table names.
repeated string exclude_patterns = 3
Optional. The list of patterns to apply for selecting data to exclude during discovery. For Cloud Storage bucket assets, these are interpreted as glob patterns used to match object names. For BigQuery dataset assets, these are interpreted as patterns to match table names.
optional DiscoverySpec.CsvOptions csv_options = 4
Optional. Configuration for CSV data.
optional DiscoverySpec.JsonOptions json_options = 5
Optional. Configuration for Json data.
oneof trigger
Determines when discovery is triggered.
- string schedule = 10
  Optional. Cron schedule (https://en.wikipedia.org/wiki/Cron) for running discovery periodically. Successive discovery runs must be scheduled at least 60 minutes apart. The default value is to run discovery every 60 minutes. To explicitly set a timezone to the cron tab, apply a prefix in the cron tab: "CRON_TZ=${IANA_TIME_ZONE}" or TZ=${IANA_TIME_ZONE}". The ${IANA_TIME_ZONE} may only be a valid string from IANA time zone database. For example, `CRON_TZ=America/New_York 1 * * * *`, or `TZ=America/New_York 1 * * * *`.

Describe CSV and similar semi-structured data formats.

Used in: DiscoverySpec

int32 header_rows = 1
Optional. The number of rows to interpret as header rows that should be skipped when reading data rows.
string delimiter = 2
Optional. The delimiter being used to separate values. This defaults to ','.
string encoding = 3
Optional. The character encoding of the data. The default is UTF-8.
bool disable_type_inference = 4
Optional. Whether to disable the inference of data type for CSV data. If true, all columns will be registered as strings.

Describe JSON data format.

Used in: DiscoverySpec

string encoding = 1
Optional. The character encoding of the data. The default is UTF-8.
bool disable_type_inference = 2
Optional. Whether to disable the inference of data type for Json data. If true, all columns will be registered as their primitive types (strings, number or boolean).

Settings for resources attached as assets within a zone.

Used in: Zone

ResourceSpec.LocationType location_type = 1
Required. Immutable. The location type of the resources that are allowed to be attached to the assets within this zone.

Location type of the resources attached to a zone.

Used in: ResourceSpec

LOCATION_TYPE_UNSPECIFIED = 0
Unspecified location type.
SINGLE_REGION = 1
Resources that are associated with a single region.
MULTI_REGION = 2
Resources that are associated with a multi-region location.

Type of zone.

Used in: Zone

TYPE_UNSPECIFIED = 0
Zone type not specified.
RAW = 1
A zone that contains data that needs further processing before it is considered generally ready for consumption and analytics workloads.
CURATED = 2
A zone that contains data that is considered to be ready for broader consumption and analytics workloads. Curated structured data stored in Cloud Storage must conform to certain file formats (parquet, avro and orc) and organized in a hive-compatible directory layout.

The CloudEvent raised when a Zone is created.

optional ZoneEventData data = 1
The data associated with the event.

The CloudEvent raised when a Zone is deleted.

optional ZoneEventData data = 1
The data associated with the event.

The data within all Zone events.

Used in: ZoneCreatedEvent, ZoneDeletedEvent, ZoneUpdatedEvent

optional Zone payload = 1
Optional. The Zone event payload. Unset for deletion events.

The CloudEvent raised when a Zone is updated.

optional ZoneEventData data = 1
The data associated with the event.

package google.events.cloud.dataplex.v1

message Asset

string name = 1

string display_name = 2

string uid = 3

optional protobuf.Timestamp create_time = 4

optional protobuf.Timestamp update_time = 5

map<string, string> labels = 6

string description = 7

State state = 8

optional Asset.ResourceSpec resource_spec = 100

optional Asset.ResourceStatus resource_status = 101

optional Asset.SecurityStatus security_status = 103

optional Asset.DiscoverySpec discovery_spec = 106

optional Asset.DiscoveryStatus discovery_status = 107

message Asset.DiscoverySpec

bool enabled = 1

repeated string include_patterns = 2

repeated string exclude_patterns = 3

optional DiscoverySpec.CsvOptions csv_options = 4

optional DiscoverySpec.JsonOptions json_options = 5

oneof trigger

string schedule = 10

message Asset.DiscoverySpec.CsvOptions

int32 header_rows = 1

string delimiter = 2

string encoding = 3

bool disable_type_inference = 4

message Asset.DiscoverySpec.JsonOptions

string encoding = 1

bool disable_type_inference = 2

message Asset.DiscoveryStatus

DiscoveryStatus.State state = 1

string message = 2

optional protobuf.Timestamp update_time = 3

optional protobuf.Timestamp last_run_time = 4

optional DiscoveryStatus.Stats stats = 6

optional protobuf.Duration last_run_duration = 7

enum Asset.DiscoveryStatus.State

STATE_UNSPECIFIED = 0

SCHEDULED = 1

IN_PROGRESS = 2

PAUSED = 3

DISABLED = 5

message Asset.DiscoveryStatus.Stats

int64 data_items = 1

int64 data_size = 2

int64 tables = 3

int64 filesets = 4

message Asset.ResourceSpec

string name = 1

ResourceSpec.Type type = 2

ResourceSpec.AccessMode read_access_mode = 5

enum Asset.ResourceSpec.AccessMode

ACCESS_MODE_UNSPECIFIED = 0

DIRECT = 1

MANAGED = 2

enum Asset.ResourceSpec.Type

TYPE_UNSPECIFIED = 0

STORAGE_BUCKET = 1

BIGQUERY_DATASET = 2

message Asset.ResourceStatus

ResourceStatus.State state = 1

string message = 2

optional protobuf.Timestamp update_time = 3

string managed_access_identity = 4

enum Asset.ResourceStatus.State

STATE_UNSPECIFIED = 0

READY = 1

ERROR = 2

message Asset.SecurityStatus

SecurityStatus.State state = 1

string message = 2

optional protobuf.Timestamp update_time = 3

enum Asset.SecurityStatus.State

STATE_UNSPECIFIED = 0

READY = 1

APPLYING = 2

ERROR = 3

message AssetCreatedEvent