package chrome_lang_id

Get desktop application:
View/edit binary Protocol Buffers messages

Descriptor for feature extractor.

repeated FeatureFunctionDescriptor feature = 1
Top-level feature function for extractor.

Descriptor for feature function.

Used in: FeatureExtractorDescriptor

required string type = 1
Feature function type.
optional string name = 2
Feature function name.
optional int32 argument = 3
Default argument for feature function.
repeated Parameter parameter = 4
Named parameters for feature descriptor.
repeated FeatureFunctionDescriptor feature = 7
Nested sub-feature function descriptors.

Used in: FeatureFunctionDescriptor

optional string name = 1
optional string value = 2

A Sentence contains the raw text contents of a sentence, as well as an analysis.

optional string id = 1
Identifier for sentence.
optional string text = 2
Raw text contents of the sentence.
repeated Token token = 3
Tokenization of the sentence.

Task input descriptor.

Used in: TaskSpec

required string name = 1
Name of input resource.
optional string creator = 2
Name of stage responsible of creating this resource.
repeated string file_format = 3
File format for resource.
repeated string record_format = 4
Record format for resource.
optional bool multi_file = 5
Is this resource multi-file?
repeated group chrome_lang_id.TaskInput.Part = 6

An input can consist of multiple file sets.

Used in: TaskInput

optional string file_pattern = 7
File pattern for file set.
optional string file_format = 8
File format for file set.
optional string record_format = 9
Record format for file set.

Task output descriptor.

Used in: TaskSpec

required string name = 1
Name of output resource.
optional string file_format = 2
File format for output resource.
optional string record_format = 3
Record format for output resource.
optional int32 shards = 4
Number of shards in output. If it is different from zero this output is sharded. If the number of shards is set to -1 this means that the output is sharded, but the number of shard is unknown. The files are then named 'base-*-of-*'.
optional string file_base = 5
Base file name for output resource. If this is not set by the task component it is set to a default value by the workflow engine.
optional string file_extension = 6
Optional extension added to the file name.

A task specification is used for describing executing parameters.

optional string task_name = 1
Name of task.
optional string task_type = 2
Workflow task type.
repeated group chrome_lang_id.TaskSpec.Parameter = 3
repeated TaskInput input = 6
Task inputs.
repeated TaskOutput output = 7
Task outputs.

Task parameters.

Used in: TaskSpec

required string name = 4
optional string value = 5

A sentence token marks a span of bytes in the sentence text as a token or word.

Used in: Sentence

required string word = 1
Token word form.
required int32 start = 2
Start position of token in text.
required int32 end = 3
End position of token in text. Gives index of last byte, not one past the last byte. If token came from lexer, excludes any trailing HTML tags.
optional int32 head = 4
Head of this token in the dependency tree: the id of the token which has an arc going to this one. If it is the root token of a sentence, then it is set to -1.
optional string tag = 5
Part-of-speech tag for token.
optional string category = 6
Coarse-grained word category for token.
optional string label = 7
Label for dependency relation between this token and its head.
optional Token.BreakLevel break_level = 8

Break level for tokens that indicates how it was separated from the previous token in the text.

Used in: Token

NO_BREAK = 0
No separation between tokens.
SPACE_BREAK = 1
Tokens separated by space.
LINE_BREAK = 2
Tokens separated by line break.
SENTENCE_BREAK = 3
Tokens separated by sentence break.

package chrome_lang_id

message FeatureExtractorDescriptor

repeated FeatureFunctionDescriptor feature = 1

message FeatureFunctionDescriptor

required string type = 1

optional string name = 2

optional int32 argument = 3

repeated Parameter parameter = 4

repeated FeatureFunctionDescriptor feature = 7

message Parameter

optional string name = 1

optional string value = 2

message Sentence

optional string id = 1

optional string text = 2

repeated Token token = 3

message TaskInput

required string name = 1

optional string creator = 2

repeated string file_format = 3

repeated string record_format = 4

optional bool multi_file = 5

repeated group chrome_lang_id.TaskInput.Part = 6

message TaskInput.Part

optional string file_pattern = 7

optional string file_format = 8

optional string record_format = 9

message TaskOutput

required string name = 1

optional string file_format = 2

optional string record_format = 3

optional int32 shards = 4

optional string file_base = 5

optional string file_extension = 6

message TaskSpec

optional string task_name = 1

optional string task_type = 2

repeated group chrome_lang_id.TaskSpec.Parameter = 3

repeated TaskInput input = 6

repeated TaskOutput output = 7

message TaskSpec.Parameter

required string name = 4

optional string value = 5

message Token

required string word = 1

required int32 start = 2

required int32 end = 3

optional int32 head = 4

optional string tag = 5

optional string category = 6

optional string label = 7

optional Token.BreakLevel break_level = 8

enum Token.BreakLevel

NO_BREAK = 0

SPACE_BREAK = 1

LINE_BREAK = 2

SENTENCE_BREAK = 3