package google.cloud.vision.v1p2beta1

Get desktop application:
View/edit binary Protocol Buffers messages

Service that performs Google Cloud Vision API detection tasks over client images, such as face, landmark, logo, label, and text detection. The ImageAnnotator service returns detected entities from the images.

rpc AsyncBatchAnnotateFiles (AsyncBatchAnnotateFilesRequest, longrunning.Operation)
image_annotator.proto:54
Run async image detection and annotation for a list of generic files (e.g. PDF) which may contain multiple pages and multiple images per page. Progress and results can be retrieved through the `google.longrunning.Operations` interface. `Operation.metadata` contains `OperationMetadata` (metadata). `Operation.response` contains `AsyncBatchAnnotateFilesResponse` (results).
message AsyncBatchAnnotateFilesRequest
image_annotator.proto:648
Multiple async file annotation requests are batched into a single service call.
- repeated AsyncAnnotateFileRequest requests = 1
  Individual async file annotation requests for this batch.
rpc BatchAnnotateImages (BatchAnnotateImagesRequest, BatchAnnotateImagesResponse)
image_annotator.proto:40
Run image detection and annotation for a batch of images.
message BatchAnnotateImagesRequest
image_annotator.proto:614
Multiple image annotation requests are batched into a single service call.
- repeated AnnotateImageRequest requests = 1
  Individual image annotation requests for this batch.
message BatchAnnotateImagesResponse
image_annotator.proto:620
Response to a batch image annotation request.
- repeated AnnotateImageResponse responses = 1
  Individual responses to image annotation requests within the batch.

Response to a single file annotation request. A file may contain one or more images, which individually have their own responses.

optional InputConfig input_config = 1
Information about the file for which this response is generated.
repeated AnnotateImageResponse responses = 2
Individual responses to images found within the file.

Request for performing Google Cloud Vision API tasks over a user-provided image, with user-requested features.

Used in: BatchAnnotateImagesRequest

optional Image image = 1
The image to be processed.
repeated Feature features = 2
Requested features.
optional ImageContext image_context = 3
Additional context that may accompany the image.

Response to an image annotation request.

Used in: AnnotateFileResponse, BatchAnnotateImagesResponse

repeated FaceAnnotation face_annotations = 1
If present, face detection has completed successfully.
repeated EntityAnnotation landmark_annotations = 2
If present, landmark detection has completed successfully.
repeated EntityAnnotation logo_annotations = 3
If present, logo detection has completed successfully.
repeated EntityAnnotation label_annotations = 4
If present, label detection has completed successfully.
repeated EntityAnnotation text_annotations = 5
If present, text (OCR) detection has completed successfully.
optional TextAnnotation full_text_annotation = 12
If present, text (OCR) detection or document (OCR) text detection has completed successfully. This annotation provides the structural hierarchy for the OCR detected text.
optional SafeSearchAnnotation safe_search_annotation = 6
If present, safe-search annotation has completed successfully.
optional ImageProperties image_properties_annotation = 8
If present, image properties were extracted successfully.
optional CropHintsAnnotation crop_hints_annotation = 11
If present, crop hints have completed successfully.
optional WebDetection web_detection = 13
If present, web detection has completed successfully.
optional rpc.Status error = 9
If set, represents the error message for the operation. Note that filled-in image annotations are guaranteed to be correct, even when `error` is set.
optional ImageAnnotationContext context = 21
If present, contextual information is needed to understand where this image comes from.

An offline file annotation request.

Used in: AsyncBatchAnnotateFilesRequest

optional InputConfig input_config = 1
Required. Information about the input file.
repeated Feature features = 2
Required. Requested features.
optional ImageContext image_context = 3
Additional context that may accompany the image(s) in the file.
optional OutputConfig output_config = 4
Required. The desired output location and metadata (e.g. format).

The response for a single offline file annotation request.

Used in: AsyncBatchAnnotateFilesResponse

optional OutputConfig output_config = 1
The output location and metadata from AsyncAnnotateFileRequest.

Response to an async batch file annotation request.

repeated AsyncAnnotateFileResponse responses = 1
The list of file annotation responses, one for each request in AsyncBatchAnnotateFilesRequest.

Logical element on the page.

Used in: Page

optional TextAnnotation.TextProperty property = 1
Additional information detected for the block.
optional BoundingPoly bounding_box = 2
The bounding box for the block. The vertices are in the order of top-left, top-right, bottom-right, bottom-left. When a rotation of the bounding box is detected the rotation is represented as around the top-left corner as defined when the text is read in the 'natural' orientation. For example: * when the text is horizontal it might look like: 0----1 | | 3----2 * when it's rotated 180 degrees around the top-left corner it becomes: 2----3 | | 1----0 and the vertice order will still be (0, 1, 2, 3).
repeated Paragraph paragraphs = 3
List of paragraphs in this block (if this blocks is of type text).
Block.BlockType block_type = 4
Detected block type (text, image etc) for this block.
float confidence = 5
Confidence of the OCR results on the block. Range [0, 1].

Type of a block (text, image etc) as identified by OCR.

Used in: Block

UNKNOWN = 0
Unknown block type.
TEXT = 1
Regular text block.
TABLE = 2
Table block.
PICTURE = 3
Image block.
RULER = 4
Horizontal/vertical line box.
BARCODE = 5
Barcode block.

A bounding polygon for the detected image annotation.

Used in: Block, CropHint, EntityAnnotation, FaceAnnotation, Paragraph, Symbol, Word

repeated Vertex vertices = 1
The bounding polygon vertices.
repeated NormalizedVertex normalized_vertices = 2
The bounding polygon normalized vertices.

Color information consists of RGB channels, score, and the fraction of the image that the color occupies in the image.

Used in: DominantColorsAnnotation

optional type.Color color = 1
RGB components of the color.
float score = 2
Image-specific score for this color. Value in range [0, 1].
float pixel_fraction = 3
The fraction of pixels the color occupies in the image. Value in range [0, 1].

Single crop hint that is used to generate a new crop when serving an image.

Used in: CropHintsAnnotation

optional BoundingPoly bounding_poly = 1
The bounding polygon for the crop region. The coordinates of the bounding box are in the original image's scale, as returned in `ImageParams`.
float confidence = 2
Confidence of this being a salient region. Range [0, 1].
float importance_fraction = 3
Fraction of importance of this salient region with respect to the original image.

Set of crop hints that are used to generate new crops when serving images.

Used in: AnnotateImageResponse

repeated CropHint crop_hints = 1
Crop hint results.

Parameters for crop hints annotation request.

Used in: ImageContext

repeated float aspect_ratios = 1
Aspect ratios in floats, representing the ratio of the width to the height of the image. For example, if the desired aspect ratio is 4/3, the corresponding float value should be 1.33333. If not specified, the best possible crop is returned. The number of provided aspect ratios is limited to a maximum of 16; any aspect ratios provided after the 16th are ignored.

Set of dominant colors and their corresponding scores.

Used in: ImageProperties

repeated ColorInfo colors = 1
RGB color values with their score and pixel fraction.

Set of detected entity features.

Used in: AnnotateImageResponse

string mid = 1
Opaque entity ID. Some IDs may be available in [Google Knowledge Graph Search API](https://developers.google.com/knowledge-graph/).
string locale = 2
The language code for the locale in which the entity textual `description` is expressed.
string description = 3
Entity textual description, expressed in its `locale` language.
float score = 4
Overall score of the result. Range [0, 1].
float confidence = 5
**Deprecated. Use `score` instead.** The accuracy of the entity detection in an image. For example, for an image in which the "Eiffel Tower" entity is detected, this field represents the confidence that there is a tower in the query image. Range [0, 1].
float topicality = 6
The relevancy of the ICA (Image Content Annotation) label to the image. For example, the relevancy of "tower" is likely higher to an image containing the detected "Eiffel Tower" than to an image containing a detected distant towering building, even though the confidence that there is a tower in each image may be the same. Range [0, 1].
optional BoundingPoly bounding_poly = 7
Image region to which this entity belongs. Not produced for `LABEL_DETECTION` features.
repeated LocationInfo locations = 8
The location information for the detected entity. Multiple `LocationInfo` elements can be present because one location may indicate the location of the scene in the image, and another location may indicate the location of the place where the image was taken. Location information is usually present for landmarks.
repeated Property properties = 9
Some entities may have optional user-supplied `Property` (name/value) fields, such a score or string that qualifies the entity.

A face annotation object contains the results of face detection.

Used in: AnnotateImageResponse

optional BoundingPoly bounding_poly = 1
The bounding polygon around the face. The coordinates of the bounding box are in the original image's scale, as returned in `ImageParams`. The bounding box is computed to "frame" the face in accordance with human expectations. It is based on the landmarker results. Note that one or more x and/or y coordinates may not be generated in the `BoundingPoly` (the polygon will be unbounded) if only a partial face appears in the image to be annotated.
optional BoundingPoly fd_bounding_poly = 2
The `fd_bounding_poly` bounding polygon is tighter than the `boundingPoly`, and encloses only the skin part of the face. Typically, it is used to eliminate the face from any image analysis that detects the "amount of skin" visible in an image. It is not based on the landmarker results, only on the initial face detection, hence the <code>fd</code> (face detection) prefix.
repeated FaceAnnotation.Landmark landmarks = 3
Detected face landmarks.
float roll_angle = 4
Roll angle, which indicates the amount of clockwise/anti-clockwise rotation of the face relative to the image vertical about the axis perpendicular to the face. Range [-180,180].
float pan_angle = 5
Yaw angle, which indicates the leftward/rightward angle that the face is pointing relative to the vertical plane perpendicular to the image. Range [-180,180].
float tilt_angle = 6
Pitch angle, which indicates the upwards/downwards angle that the face is pointing relative to the image's horizontal plane. Range [-180,180].
float detection_confidence = 7
Detection confidence. Range [0, 1].
float landmarking_confidence = 8
Face landmarking confidence. Range [0, 1].
Likelihood joy_likelihood = 9
Joy likelihood.
Likelihood sorrow_likelihood = 10
Sorrow likelihood.
Likelihood anger_likelihood = 11
Anger likelihood.
Likelihood surprise_likelihood = 12
Surprise likelihood.
Likelihood under_exposed_likelihood = 13
Under-exposed likelihood.
Likelihood blurred_likelihood = 14
Blurred likelihood.
Likelihood headwear_likelihood = 15
Headwear likelihood.

A face-specific landmark (for example, a face feature).

Used in: FaceAnnotation

Landmark.Type type = 3
Face landmark type.
optional Position position = 4
Face landmark position.

Face landmark (feature) type. Left and right are defined from the vantage of the viewer of the image without considering mirror projections typical of photos. So, `LEFT_EYE`, typically, is the person's right eye.

Used in: Landmark

UNKNOWN_LANDMARK = 0
Unknown face landmark detected. Should not be filled.
LEFT_EYE = 1
Left eye.
RIGHT_EYE = 2
Right eye.
LEFT_OF_LEFT_EYEBROW = 3
Left of left eyebrow.
RIGHT_OF_LEFT_EYEBROW = 4
Right of left eyebrow.
LEFT_OF_RIGHT_EYEBROW = 5
Left of right eyebrow.
RIGHT_OF_RIGHT_EYEBROW = 6
Right of right eyebrow.
MIDPOINT_BETWEEN_EYES = 7
Midpoint between eyes.
NOSE_TIP = 8
Nose tip.
UPPER_LIP = 9
Upper lip.
LOWER_LIP = 10
Lower lip.
MOUTH_LEFT = 11
Mouth left.
MOUTH_RIGHT = 12
Mouth right.
MOUTH_CENTER = 13
Mouth center.
NOSE_BOTTOM_RIGHT = 14
Nose, bottom right.
NOSE_BOTTOM_LEFT = 15
Nose, bottom left.
NOSE_BOTTOM_CENTER = 16
Nose, bottom center.
LEFT_EYE_TOP_BOUNDARY = 17
Left eye, top boundary.
LEFT_EYE_RIGHT_CORNER = 18
Left eye, right corner.
LEFT_EYE_BOTTOM_BOUNDARY = 19
Left eye, bottom boundary.
LEFT_EYE_LEFT_CORNER = 20
Left eye, left corner.
RIGHT_EYE_TOP_BOUNDARY = 21
Right eye, top boundary.
RIGHT_EYE_RIGHT_CORNER = 22
Right eye, right corner.
RIGHT_EYE_BOTTOM_BOUNDARY = 23
Right eye, bottom boundary.
RIGHT_EYE_LEFT_CORNER = 24
Right eye, left corner.
LEFT_EYEBROW_UPPER_MIDPOINT = 25
Left eyebrow, upper midpoint.
RIGHT_EYEBROW_UPPER_MIDPOINT = 26
Right eyebrow, upper midpoint.
LEFT_EAR_TRAGION = 27
Left ear tragion.
RIGHT_EAR_TRAGION = 28
Right ear tragion.
LEFT_EYE_PUPIL = 29
Left eye pupil.
RIGHT_EYE_PUPIL = 30
Right eye pupil.
FOREHEAD_GLABELLA = 31
Forehead glabella.
CHIN_GNATHION = 32
Chin gnathion.
CHIN_LEFT_GONION = 33
Chin left gonion.
CHIN_RIGHT_GONION = 34
Chin right gonion.

The type of Google Cloud Vision API detection to perform, and the maximum number of results to return for that type. Multiple `Feature` objects can be specified in the `features` list.

Used in: AnnotateImageRequest, AsyncAnnotateFileRequest

Feature.Type type = 1
The feature type.
int32 max_results = 2
Maximum number of results of this type. Does not apply to `TEXT_DETECTION`, `DOCUMENT_TEXT_DETECTION`, or `CROP_HINTS`.
string model = 3
Model to use for the feature. Supported values: "builtin/stable" (the default if unset) and "builtin/latest".

Type of Google Cloud Vision API feature to be extracted.

Used in: Feature

TYPE_UNSPECIFIED = 0
Unspecified feature type.
FACE_DETECTION = 1
Run face detection.
LANDMARK_DETECTION = 2
Run landmark detection.
LOGO_DETECTION = 3
Run logo detection.
LABEL_DETECTION = 4
Run label detection.
TEXT_DETECTION = 5
Run text detection / optical character recognition (OCR). Text detection is optimized for areas of text within a larger image; if the image is a document, use `DOCUMENT_TEXT_DETECTION` instead.
DOCUMENT_TEXT_DETECTION = 11
Run dense text document OCR. Takes precedence when both `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` are present.
SAFE_SEARCH_DETECTION = 6
Run Safe Search to detect potentially unsafe or undesirable content.
IMAGE_PROPERTIES = 7
Compute a set of image properties, such as the image's dominant colors.
CROP_HINTS = 9
Run crop hints.
WEB_DETECTION = 10
Run web detection.

The Google Cloud Storage location where the output will be written to.

Used in: OutputConfig

string uri = 1
Google Cloud Storage URI where the results will be stored. Results will be in JSON format and preceded by its corresponding input URI. This field can either represent a single file, or a prefix for multiple outputs. Prefixes must end in a `/`. Examples: * File: gs://bucket-name/filename.json * Prefix: gs://bucket-name/prefix/here/ * File: gs://bucket-name/prefix/here If multiple outputs, each response is still AnnotateFileResponse, each of which contains some subset of the full list of AnnotateImageResponse. Multiple outputs can happen if, for example, the output JSON is too large and overflows into multiple sharded files.

The Google Cloud Storage location where the input will be read from.

Used in: InputConfig

string uri = 1
Google Cloud Storage URI for the input file. This must only be a GCS object. Wildcards are not currently supported.

Client image to perform Google Cloud Vision API tasks over.

Used in: AnnotateImageRequest

bytes content = 1
Image content, represented as a stream of bytes. Note: As with all `bytes` fields, protobuffers use a pure binary representation, whereas JSON representations use base64.
optional ImageSource source = 2
Google Cloud Storage image location, or publicly-accessible image URL. If both `content` and `source` are provided for an image, `content` takes precedence and is used to perform the image annotation request.

message ImageAnnotationContext

image_annotator.proto:549

If an image was produced from a file (e.g. a PDF), this message gives information about the source of that image.

Used in: AnnotateImageResponse

string uri = 1
The URI of the file used to produce the image.
int32 page_number = 2
If the file was a PDF or TIFF, this field gives the page number within the file used to produce the image.

message ImageContext

image_annotator.proto:513

Image context and/or feature-specific parameters.

Used in: AnnotateImageRequest, AsyncAnnotateFileRequest

optional LatLongRect lat_long_rect = 1
Not used.
repeated string language_hints = 2
List of languages to use for TEXT_DETECTION. In most cases, an empty value yields the best results since it enables automatic language detection. For languages based on the Latin alphabet, setting `language_hints` is not needed. In rare cases, when the language of the text in the image is known, setting a hint will help get better results (although it will be a significant hindrance if the hint is wrong). Text detection returns an error if one or more of the specified languages is not one of the [supported languages](/vision/docs/languages).
optional CropHintsParams crop_hints_params = 4
Parameters for crop hints annotation request.
optional WebDetectionParams web_detection_params = 6
Parameters for web detection.

Stores image properties, such as dominant colors.

Used in: AnnotateImageResponse

optional DominantColorsAnnotation dominant_colors = 1
If present, dominant colors completed successfully.

External image source (Google Cloud Storage or web URL image location).

Used in: Image

string gcs_image_uri = 1
**Use `image_uri` instead.** The Google Cloud Storage URI of the form `gs://bucket_name/object_name`. Object versioning is not supported. See [Google Cloud Storage Request URIs](https://cloud.google.com/storage/docs/reference-uris) for more info.
string image_uri = 2
The URI of the source image. Can be either: 1. A Google Cloud Storage URI of the form `gs://bucket_name/object_name`. Object versioning is not supported. See [Google Cloud Storage Request URIs](https://cloud.google.com/storage/docs/reference-uris) for more info. 2. A publicly-accessible image HTTP/HTTPS URL. When fetching images from HTTP/HTTPS URLs, Google cannot guarantee that the request will be completed. Your request may fail if the specified host denies the request (e.g. due to request throttling or DOS prevention), or if Google throttles requests to the site for abuse prevention. You should not depend on externally-hosted images for production applications. When both `gcs_image_uri` and `image_uri` are specified, `image_uri` takes precedence.

The desired input location and metadata.

Used in: AnnotateFileResponse, AsyncAnnotateFileRequest

optional GcsSource gcs_source = 1
The Google Cloud Storage location to read the input from.
string mime_type = 2
The type of the file. Currently only "application/pdf" and "image/tiff" are supported. Wildcards are not supported.

Rectangle determined by min and max `LatLng` pairs.

Used in: ImageContext

optional type.LatLng min_lat_lng = 1
Min lat/long pair.
optional type.LatLng max_lat_lng = 2
Max lat/long pair.

A bucketized representation of likelihood, which is intended to give clients highly stable results across model upgrades.

Used in: FaceAnnotation, SafeSearchAnnotation

UNKNOWN = 0
Unknown likelihood.
VERY_UNLIKELY = 1
It is very unlikely that the image belongs to the specified vertical.
UNLIKELY = 2
It is unlikely that the image belongs to the specified vertical.
POSSIBLE = 3
It is possible that the image belongs to the specified vertical.
LIKELY = 4
It is likely that the image belongs to the specified vertical.
VERY_LIKELY = 5
It is very likely that the image belongs to the specified vertical.

Detected entity location information.

Used in: EntityAnnotation

optional type.LatLng lat_lng = 1
lat/long location coordinates.

A vertex represents a 2D point in the image. NOTE: the normalized vertex coordinates are relative to the original image and range from 0 to 1.

Used in: BoundingPoly

float x = 1
X coordinate.
float y = 2
Y coordinate.

Contains metadata for the BatchAnnotateImages operation.

OperationMetadata.State state = 1
Current state of the batch operation.
optional protobuf.Timestamp create_time = 5
The time when the batch request was received.
optional protobuf.Timestamp update_time = 6
The time when the operation result was last updated.

Batch operation states.

Used in: OperationMetadata

STATE_UNSPECIFIED = 0
Invalid.
CREATED = 1
Request is received.
RUNNING = 2
Request is actively being processed.
DONE = 3
The batch processing is done.
CANCELLED = 4
The batch processing was cancelled.

The desired output location and metadata.

Used in: AsyncAnnotateFileRequest, AsyncAnnotateFileResponse

optional GcsDestination gcs_destination = 1
The Google Cloud Storage location to write the output(s) to.
int32 batch_size = 2
The max number of response protos to put into each output JSON file on GCS. The valid range is [1, 100]. If not specified, the default value is 20. For example, for one pdf file with 100 pages, 100 response protos will be generated. If `batch_size` = 20, then 5 json files each containing 20 response protos will be written under the prefix `gcs_destination`.`uri`. Currently, batch_size only applies to GcsDestination, with potential future support for other output configurations.

Detected page from OCR.

Used in: TextAnnotation

optional TextAnnotation.TextProperty property = 1
Additional information detected on the page.
int32 width = 2
Page width. For PDFs the unit is points. For images (including TIFFs) the unit is pixels.
int32 height = 3
Page height. For PDFs the unit is points. For images (including TIFFs) the unit is pixels.
repeated Block blocks = 4
List of blocks of text, images etc on this page.
float confidence = 5
Confidence of the OCR results on the page. Range [0, 1].

Structural unit of text representing a number of words in certain order.

Used in: Block

optional TextAnnotation.TextProperty property = 1
Additional information detected for the paragraph.
optional BoundingPoly bounding_box = 2
The bounding box for the paragraph. The vertices are in the order of top-left, top-right, bottom-right, bottom-left. When a rotation of the bounding box is detected the rotation is represented as around the top-left corner as defined when the text is read in the 'natural' orientation. For example: * when the text is horizontal it might look like: 0----1 | | 3----2 * when it's rotated 180 degrees around the top-left corner it becomes: 2----3 | | 1----0 and the vertice order will still be (0, 1, 2, 3).
repeated Word words = 3
List of words in this paragraph.
float confidence = 4
Confidence of the OCR results for the paragraph. Range [0, 1].

A 3D position in the image, used primarily for Face detection landmarks. A valid Position must have both x and y coordinates. The position coordinates are in the same scale as the original image.

Used in: FaceAnnotation.Landmark

float x = 1
X coordinate.
float y = 2
Y coordinate.
float z = 3
Z coordinate (or depth).

A `Property` consists of a user-supplied name/value pair.

Used in: EntityAnnotation

string name = 1
Name of the property.
string value = 2
Value of the property.
uint64 uint64_value = 3
Value of numeric properties.

Set of features pertaining to the image, computed by computer vision methods over safe-search verticals (for example, adult, spoof, medical, violence).

Used in: AnnotateImageResponse

Likelihood adult = 1
Represents the adult content likelihood for the image. Adult content may contain elements such as nudity, pornographic images or cartoons, or sexual activities.
Likelihood spoof = 2
Spoof likelihood. The likelihood that an modification was made to the image's canonical version to make it appear funny or offensive.
Likelihood medical = 3
Likelihood that this is a medical image.
Likelihood violence = 4
Likelihood that this image contains violent content.
Likelihood racy = 9
Likelihood that the request image contains racy content. Racy content may include (but is not limited to) skimpy or sheer clothing, strategically covered nudity, lewd or provocative poses, or close-ups of sensitive body areas.

A single symbol representation.

Used in: Word

optional TextAnnotation.TextProperty property = 1
Additional information detected for the symbol.
optional BoundingPoly bounding_box = 2
The bounding box for the symbol. The vertices are in the order of top-left, top-right, bottom-right, bottom-left. When a rotation of the bounding box is detected the rotation is represented as around the top-left corner as defined when the text is read in the 'natural' orientation. For example: * when the text is horizontal it might look like: 0----1 | | 3----2 * when it's rotated 180 degrees around the top-left corner it becomes: 2----3 | | 1----0 and the vertice order will still be (0, 1, 2, 3).
string text = 3
The actual UTF-8 representation of the symbol.
float confidence = 4
Confidence of the OCR results for the symbol. Range [0, 1].

message TextAnnotation

text_annotation.proto:36

TextAnnotation contains a structured representation of OCR extracted text. The hierarchy of an OCR extracted text structure is like this: TextAnnotation -> Page -> Block -> Paragraph -> Word -> Symbol Each structural component, starting from Page, may further have their own properties. Properties describe detected languages, breaks etc.. Please refer to the [TextAnnotation.TextProperty][google.cloud.vision.v1p2beta1.TextAnnotation.TextProperty] message definition below for more detail.

Used in: AnnotateImageResponse

repeated Page pages = 1
List of pages detected by OCR.
string text = 2
UTF-8 text detected on the pages.

message TextAnnotation.DetectedBreak

text_annotation.proto:49

Detected start or end of a structural component.

Used in: TextProperty

DetectedBreak.BreakType type = 1
Detected break type.
bool is_prefix = 2
True if break prepends the element.

enum TextAnnotation.DetectedBreak.BreakType

text_annotation.proto:51

Enum to denote the type of break found. New line, space etc.

Used in: DetectedBreak

UNKNOWN = 0
Unknown break label type.
SPACE = 1
Regular space.
SURE_SPACE = 2
Sure space (very wide).
EOL_SURE_SPACE = 3
Line-wrapping break.
HYPHEN = 4
End-line hyphen that is not present in text; does not co-occur with `SPACE`, `LEADER_SPACE`, or `LINE_BREAK`.
LINE_BREAK = 5
Line break that ends a paragraph.

message TextAnnotation.DetectedLanguage

text_annotation.proto:38

Detected language for a structural component.

Used in: TextProperty

string language_code = 1
The BCP-47 language code, such as "en-US" or "sr-Latn". For more information, see http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
float confidence = 2
Confidence of detected language. Range [0, 1].

message TextAnnotation.TextProperty

text_annotation.proto:80

Additional information detected on the structural component.

Used in: Block, Page, Paragraph, Symbol, Word

repeated DetectedLanguage detected_languages = 1
A list of detected languages together with confidence.
optional DetectedBreak detected_break = 2
Detected start or end of a text segment.

A vertex represents a 2D point in the image. NOTE: the vertex coordinates are in the same scale as the original image.

Used in: BoundingPoly

int32 x = 1
X coordinate.
int32 y = 2
Y coordinate.

Relevant information for the image from the Internet.

Used in: AnnotateImageResponse

repeated WebDetection.WebEntity web_entities = 1
Deduced entities from similar images on the Internet.
repeated WebDetection.WebImage full_matching_images = 2
Fully matching images from the Internet. Can include resized copies of the query image.
repeated WebDetection.WebImage partial_matching_images = 3
Partial matching images from the Internet. Those images are similar enough to share some key-point features. For example an original image will likely have partial matching for its crops.
repeated WebDetection.WebPage pages_with_matching_images = 4
Web pages containing the matching images from the Internet.
repeated WebDetection.WebImage visually_similar_images = 6
The visually similar image results.
repeated WebDetection.WebLabel best_guess_labels = 8
Best guess text labels for the request image.

Entity deduced from similar images on the Internet.

Used in: WebDetection

string entity_id = 1
Opaque entity ID.
float score = 2
Overall relevancy score for the entity. Not normalized and not comparable across different image queries.
string description = 3
Canonical description of the entity, in English.

Metadata for online images.

Used in: WebDetection, WebPage

string url = 1
The result image URL.
float score = 2
(Deprecated) Overall relevancy score for the image.

Label to provide extra metadata for the web detection.

Used in: WebDetection

string label = 1
Label for extra metadata.
string language_code = 2
The BCP-47 language code for `label`, such as "en-US" or "sr-Latn". For more information, see http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.

Metadata for web pages.

Used in: WebDetection

string url = 1
The result web page URL.
float score = 2
(Deprecated) Overall relevancy score for the web page.
string page_title = 3
Title for the web page, may contain HTML markups.
repeated WebImage full_matching_images = 4
Fully matching images on the page. Can include resized copies of the query image.
repeated WebImage partial_matching_images = 5
Partial matching images on the page. Those images are similar enough to share some key-point features. For example an original image will likely have partial matching for its crops.

Parameters for web detection request.

Used in: ImageContext

bool include_geo_results = 2
Whether to include results derived from the geo information in the image.

A word representation.

Used in: Paragraph

optional TextAnnotation.TextProperty property = 1
Additional information detected for the word.
optional BoundingPoly bounding_box = 2
The bounding box for the word. The vertices are in the order of top-left, top-right, bottom-right, bottom-left. When a rotation of the bounding box is detected the rotation is represented as around the top-left corner as defined when the text is read in the 'natural' orientation. For example: * when the text is horizontal it might look like: 0----1 | | 3----2 * when it's rotated 180 degrees around the top-left corner it becomes: 2----3 | | 1----0 and the vertice order will still be (0, 1, 2, 3).
repeated Symbol symbols = 3
List of symbols in the word. The order of the symbols follows the natural reading order.
float confidence = 4
Confidence of the OCR results for the word. Range [0, 1].

package google.cloud.vision.v1p2beta1

service ImageAnnotator

rpc AsyncBatchAnnotateFiles (AsyncBatchAnnotateFilesRequest, longrunning.Operation)

message AsyncBatchAnnotateFilesRequest

repeated AsyncAnnotateFileRequest requests = 1

rpc BatchAnnotateImages (BatchAnnotateImagesRequest, BatchAnnotateImagesResponse)

message BatchAnnotateImagesRequest

repeated AnnotateImageRequest requests = 1

message BatchAnnotateImagesResponse

repeated AnnotateImageResponse responses = 1

message AnnotateFileResponse

optional InputConfig input_config = 1

repeated AnnotateImageResponse responses = 2

message AnnotateImageRequest

optional Image image = 1

repeated Feature features = 2

optional ImageContext image_context = 3

message AnnotateImageResponse

repeated FaceAnnotation face_annotations = 1

repeated EntityAnnotation landmark_annotations = 2

repeated EntityAnnotation logo_annotations = 3

repeated EntityAnnotation label_annotations = 4

repeated EntityAnnotation text_annotations = 5

optional TextAnnotation full_text_annotation = 12

optional SafeSearchAnnotation safe_search_annotation = 6

optional ImageProperties image_properties_annotation = 8

optional CropHintsAnnotation crop_hints_annotation = 11

optional WebDetection web_detection = 13

optional rpc.Status error = 9

optional ImageAnnotationContext context = 21

message AsyncAnnotateFileRequest

optional InputConfig input_config = 1

repeated Feature features = 2

optional ImageContext image_context = 3

optional OutputConfig output_config = 4

message AsyncAnnotateFileResponse

optional OutputConfig output_config = 1

message AsyncBatchAnnotateFilesResponse

repeated AsyncAnnotateFileResponse responses = 1

message Block

optional TextAnnotation.TextProperty property = 1

optional BoundingPoly bounding_box = 2

repeated Paragraph paragraphs = 3

Block.BlockType block_type = 4

float confidence = 5

enum Block.BlockType

UNKNOWN = 0

TEXT = 1

TABLE = 2

PICTURE = 3

RULER = 4

BARCODE = 5

message BoundingPoly

repeated Vertex vertices = 1

repeated NormalizedVertex normalized_vertices = 2

message ColorInfo

optional type.Color color = 1

float score = 2

float pixel_fraction = 3

message CropHint

optional BoundingPoly bounding_poly = 1

float confidence = 2

float importance_fraction = 3

message CropHintsAnnotation

repeated CropHint crop_hints = 1

message CropHintsParams

repeated float aspect_ratios = 1

message DominantColorsAnnotation

repeated ColorInfo colors = 1

message EntityAnnotation

string mid = 1

string locale = 2

string description = 3

float score = 4

float confidence = 5

float topicality = 6

optional BoundingPoly bounding_poly = 7

repeated LocationInfo locations = 8

repeated Property properties = 9

message FaceAnnotation