package edu.stanford.nlp.pipeline

Get desktop application:
View/edit binary Protocol Buffers messages

A coreference chain. These fields are not *really* optional. CoreNLP will crash without them.

Used in: Document

required int32 chainID = 1
repeated CorefChain.CorefMention mention = 2
required uint32 representative = 3

Used in: CorefChain

optional int32 mentionID = 1
optional string mentionType = 2
optional string number = 3
optional string gender = 4
optional string animacy = 5
optional uint32 beginIndex = 6
optional uint32 endIndex = 7
optional uint32 headIndex = 9
optional uint32 sentenceIndex = 10
optional uint32 position = 11
the second element of position

A protobuf which allows to pass in a document with basic dependencies to be converted to enhanced

required Document document = 1
oneof ref
- Language language = 2
- string relativePronouns = 3
  The expected value of this is a regex which matches relative pronouns

A dependency graph representation.

Used in: Quote, RelationTriple, SemgrexRequest.Dependencies, Sentence

repeated DependencyGraph.Node node = 1
repeated DependencyGraph.Edge edge = 2
repeated uint32 root = 3

Used in: DependencyGraph

required uint32 source = 1
required uint32 target = 2
optional string dep = 3
optional bool isExtra = 4
optional uint32 sourceCopy = 5
optional uint32 targetCopy = 6
optional Language language = 7

Used in: DependencyGraph

required uint32 sentenceIndex = 1
required uint32 index = 2
optional uint32 copyAnnotation = 3

A document; that is, the equivalent of an Annotation.

Used in: DependencyEnhancerRequest, TokensRegexRequest

required string text = 1
repeated Sentence sentence = 2
repeated CorefChain corefChain = 3
optional string docID = 4
optional string docDate = 7
optional uint64 calendar = 8
repeated Token sentencelessToken = 5
* A peculiar field, for the corner case when a Document is serialized without any sentences. Otherwise
repeated Token character = 10
repeated Quote quote = 6
repeated NERMention mentions = 9
* This field is for entity mentions across the document.
optional bool hasEntityMentionsAnnotation = 13
used to differentiate between null and empty list
optional bool xmlDoc = 11
* xml information
repeated Section sections = 12
repeated Mention mentionsForCoref = 14
* coref mentions for entire document *
optional bool hasCorefMentionAnnotation = 15
optional bool hasCorefAnnotation = 16
repeated int32 corefMentionToEntityMentionMappings = 17
repeated int32 entityMentionToCorefMentionMappings = 18

A representation of an entity in a relation. This corresponds to the EntityMention, and more broadly the ExtractionObject classes.

Used in: Relation, Sentence

optional uint32 headStart = 6
optional uint32 headEnd = 7
optional string mentionType = 8
optional string normalizedName = 9
optional uint32 headTokenIndex = 10
optional string corefID = 11
optional string objectID = 1
inherited from ExtractionObject
optional uint32 extentStart = 2
optional uint32 extentEnd = 3
optional string type = 4
optional string subtype = 5
Implicit uint32 sentence @see implicit in sentence

A protobuf for calling the java constituency parser evaluator from elsewhere

repeated EvaluateParserRequest.ParseResult treebank = 1

Used in: EvaluateParserRequest

required FlattenedParseTree gold = 1
repeated FlattenedParseTree predicted = 2
repeated so you can send in kbest parses, if your parser handles that note that this already includes a score field

required double f1 = 1

A version of ParseTree with a flattened structure so that deep trees don't exceed the protobuf stack depth

Used in: EvaluateParserRequest.ParseResult

repeated FlattenedParseTree.Node nodes = 1

Used in: FlattenedParseTree

oneof contents
- bool openNode = 1
- bool closeNode = 2
- string value = 3
optional double score = 4

Used in: Mention

optional int32 sentenceNum = 1
optional int32 tokenIndex = 2
optional int32 docID = 3
optional uint32 copyCount = 4

An enumeration for the valid languages allowed in CoreNLP

Used in: DependencyEnhancerRequest, DependencyGraph.Edge

Unknown = 0
Any = 1
Arabic = 2
Chinese = 3
English = 4
German = 5
French = 6
Hebrew = 7
Spanish = 8
UniversalEnglish = 9
UniversalChinese = 10

A map from integers to strings. Used, minimally, in the CoNLLU featurizer

repeated uint32 key = 1
repeated string value = 2

A map from strings to strings. Used, minimally, in the CoNLLU featurizer

Used in: Token

repeated string key = 1
repeated string value = 2

Used in: Document, Sentence

optional int32 mentionID = 1
optional string mentionType = 2
optional string number = 3
optional string gender = 4
optional string animacy = 5
optional string person = 6
optional uint32 startIndex = 7
optional uint32 endIndex = 9
optional int32 headIndex = 10
optional string headString = 11
optional string nerString = 12
optional int32 originalRef = 13
optional int32 goldCorefClusterID = 14
optional int32 corefClusterID = 15
optional int32 mentionNum = 16
optional int32 sentNum = 17
optional int32 utter = 18
optional int32 paragraph = 19
optional bool isSubject = 20
optional bool isDirectObject = 21
optional bool isIndirectObject = 22
optional bool isPrepositionObject = 23
optional bool hasTwin = 24
optional bool generic = 25
optional bool isSingleton = 26
optional bool hasBasicDependency = 27
optional bool hasEnhancedDepenedncy = 28
optional bool hasContextParseTree = 29
optional IndexedWord headIndexedWord = 30
optional IndexedWord dependingVerb = 31
optional IndexedWord headWord = 32
optional SpeakerInfo speakerInfo = 33
repeated IndexedWord sentenceWords = 50
repeated IndexedWord originalSpan = 51
repeated string dependents = 52
repeated string preprocessedTerms = 53
repeated int32 appositions = 54
repeated int32 predicateNominatives = 55
repeated int32 relativePronouns = 56
repeated int32 listMembers = 57
repeated int32 belongToLists = 58

An NER mention in the text

Used in: Document, Sentence

optional uint32 sentenceIndex = 1
required uint32 tokenStartInSentenceInclusive = 2
required uint32 tokenEndInSentenceExclusive = 3
required string ner = 4
optional string normalizedNER = 5
optional string entityType = 6
optional Timex timex = 7
optional string wikipediaEntity = 8
optional string gender = 9
optional uint32 entityMentionIndex = 10
optional uint32 canonicalEntityMentionIndex = 11
optional string entityMentionText = 12

The seven informative Natural Logic relations

Used in: Polarity

EQUIVALENCE = 0
FORWARD_ENTAILMENT = 1
REVERSE_ENTAILMENT = 2
NEGATION = 3
ALTERNATION = 4
COVER = 5
INDEPENDENCE = 6

A Natural Logic operator

Used in: Token

required string name = 1
required int32 quantifierSpanBegin = 2
required int32 quantifierSpanEnd = 3
required int32 subjectSpanBegin = 4
required int32 subjectSpanEnd = 5
required int32 objectSpanBegin = 6
required int32 objectSpanEnd = 7

A syntactic parse tree, with scores.

Used in: Sentence

repeated ParseTree child = 1
optional string value = 2
optional uint32 yieldBeginIndex = 3
optional uint32 yieldEndIndex = 4
optional double score = 5
optional Sentiment sentiment = 6

The polarity of a word, according to Natural Logic

Used in: Token

required NaturalLogicRelation projectEquivalence = 1
required NaturalLogicRelation projectForwardEntailment = 2
required NaturalLogicRelation projectReverseEntailment = 3
required NaturalLogicRelation projectNegation = 4
required NaturalLogicRelation projectAlternation = 5
required NaturalLogicRelation projectCover = 6
required NaturalLogicRelation projectIndependence = 7

A quotation marker in text

Used in: Document, Section

optional string text = 1
optional uint32 begin = 2
optional uint32 end = 3
optional uint32 sentenceBegin = 5
optional uint32 sentenceEnd = 6
optional uint32 tokenBegin = 7
optional uint32 tokenEnd = 8
optional string docid = 9
optional uint32 index = 10
optional string author = 11
optional string mention = 12
optional uint32 mentionBegin = 13
optional uint32 mentionEnd = 14
optional string mentionType = 15
optional string mentionSieve = 16
optional string speaker = 17
optional string speakerSieve = 18
optional string canonicalMention = 19
optional uint32 canonicalMentionBegin = 20
optional uint32 canonicalMentionEnd = 21
optional DependencyGraph attributionDependencyGraph = 22

A representation of a relation, mirroring RelationMention

Used in: Sentence

repeated string argName = 6
repeated Entity arg = 7
optional string signature = 8
optional string objectID = 1
inherited from ExtractionObject
optional uint32 extentStart = 2
optional uint32 extentEnd = 3
optional string type = 4
optional string subtype = 5
Implicit uint32 sentence @see implicit in sentence

An OpenIE relation triple. Created by the openie annotator.

Used in: Sentence

optional string subject = 1
The surface form of the subject
optional string relation = 2
The surface form of the relation (required)
optional string object = 3
The surface form of the object
optional double confidence = 4
The [optional] confidence of the extraction
repeated TokenLocation subjectTokens = 13
The tokens comprising the subject of the triple
repeated TokenLocation relationTokens = 14
The tokens comprising the relation of the triple
repeated TokenLocation objectTokens = 15
The tokens comprising the object of the triple
optional DependencyGraph tree = 8
The dependency graph fragment for this triple
optional bool istmod = 9
If true, this expresses an implicit tmod relation
optional bool prefixBe = 10
If true, this relation string is missing a 'be' prefix
optional bool suffixBe = 11
If true, this relation string is missing a 'be' suffix
optional bool suffixOf = 12
If true, this relation string is missing a 'of' prefix

Used in: Document

required uint32 charBegin = 1
required uint32 charEnd = 2
optional string author = 3
repeated uint32 sentenceIndexes = 4
optional string datetime = 5
repeated Quote quotes = 6
optional uint32 authorCharBegin = 7
optional uint32 authorCharEnd = 8
required Token xmlTag = 9

A message for requesting a semgrex Each sentence stores information about the tokens making up the corresponding graph An alternative would have been to use the existing Document or Sentence classes, but the problem with that is it would be ambiguous which dependency object to use.

repeated string semgrex = 1
repeated SemgrexRequest.Dependencies query = 2

Used in: SemgrexRequest

repeated Token token = 1
required DependencyGraph graph = 2

The response from running a semgrex If you pass in M semgrex expressions and N dependency graphs, this returns MxN nested results. Each SemgrexResult can match multiple times in one graph

repeated SemgrexResponse.GraphResult result = 1

Used in: SemgrexResponse

repeated SemgrexResult result = 1

Used in: SemgrexResult

required int32 matchIndex = 1
repeated NamedNode node = 2
repeated NamedRelation reln = 3

Used in: Match

required string name = 1
required int32 matchIndex = 2

Used in: Match

required string name = 1
required string reln = 2

Used in: GraphResult

repeated Match match = 1

The serialized version of a CoreMap representing a sentence.

Used in: Document

repeated Token token = 1
required uint32 tokenOffsetBegin = 2
required uint32 tokenOffsetEnd = 3
optional uint32 sentenceIndex = 4
optional uint32 characterOffsetBegin = 5
optional uint32 characterOffsetEnd = 6
optional ParseTree parseTree = 7
optional ParseTree binarizedParseTree = 31
optional ParseTree annotatedParseTree = 32
optional string sentiment = 33
repeated ParseTree kBestParseTrees = 34
optional DependencyGraph basicDependencies = 8
optional DependencyGraph collapsedDependencies = 9
optional DependencyGraph collapsedCCProcessedDependencies = 10
optional DependencyGraph alternativeDependencies = 13
repeated RelationTriple openieTriple = 14
The OpenIE triples in the sentence
repeated RelationTriple kbpTriple = 16
The KBP triples in this sentence
repeated SentenceFragment entailedSentence = 15
The entailed sentences, by natural logic
repeated SentenceFragment entailedClause = 35
The entailed clauses, by natural logic
optional DependencyGraph enhancedDependencies = 17
optional DependencyGraph enhancedPlusPlusDependencies = 18
repeated Token character = 19
optional uint32 paragraph = 11
optional string text = 12
Only needed if we're only saving the sentence.
optional uint32 lineNumber = 20
optional bool hasRelationAnnotations = 51
Fields set by other annotators in CoreNLP
repeated Entity entity = 52
repeated Relation relation = 53
optional bool hasNumerizedTokensAnnotation = 54
repeated NERMention mentions = 55
repeated Mention mentionsForCoref = 56
optional bool hasCorefMentionsAnnotation = 57
optional string sentenceID = 58
Useful when storing sentences (e.g. ForEach)
optional string sectionDate = 59
date of section
optional uint32 sectionIndex = 60
section index for this sentence's section
optional string sectionName = 61
name of section
optional string sectionAuthor = 62
author of section
optional string docID = 63
doc id
optional bool sectionQuoted = 64
is this sentence in an xml quote in a post
optional bool hasEntityMentionsAnnotation = 65
check if there are entity mentions
optional bool hasKBPTriplesAnnotation = 68
check if there are KBP triples
optional bool hasOpenieTriplesAnnotation = 69
check if there are OpenIE triples
optional uint32 chapterIndex = 66
quote stuff
optional uint32 paragraphIndex = 67
optional Sentence enhancedSentence = 70
the quote annotator can soometimes add merged sentences
optional string speaker = 71
speaker stuff
The speaker speaking this sentence
optional string speakerType = 72
The type of speaker speaking this sentence

An entailed sentence fragment. Created by the openie annotator.

Used in: Sentence

repeated uint32 tokenIndex = 1
optional uint32 root = 2
optional bool assumedTruth = 3
optional double score = 4

An enumeration of valid sentiment values for the sentiment classifier.

Used in: ParseTree

STRONG_NEGATIVE = 0
WEAK_NEGATIVE = 1
NEUTRAL = 2
WEAK_POSITIVE = 3
STRONG_POSITIVE = 4

A Span of text

Used in: Token

required uint32 begin = 1
required uint32 end = 2

Used in: Mention

optional string speakerName = 1
repeated int32 mentions = 2

A Timex object, representing a temporal expression (TIMe EXpression) These fields are not *really* optional. CoreNLP will crash without them.

Used in: NERMention, Token

optional string value = 1
optional string altValue = 2
optional string text = 3
optional string type = 4
optional string tid = 5
optional uint32 beginPoint = 6
optional uint32 endPoint = 7

The serialized version of a Token (a CoreLabel).

Used in: Document, Section, SemgrexRequest.Dependencies, Sentence

optional string word = 1
Fields set by the default annotators [new CoreNLP(new Properties())]
the word's gloss (post-tokenization)
optional string pos = 2
The word's part of speech tag
optional string value = 3
The word's 'value', (e.g., parse tree node)
optional string category = 4
The word's 'category' (e.g., parse tree node)
optional string before = 5
The whitespace/xml before the token
optional string after = 6
The whitespace/xml after the token
optional string originalText = 7
The original text for this token
optional string ner = 8
The word's NER tag
optional string coarseNER = 62
The word's coarse NER tag
optional string fineGrainedNER = 63
The word's fine-grained NER tag
repeated string nerLabelProbs = 66
listing of probs
optional string normalizedNER = 9
The word's normalized NER tag
optional string lemma = 10
The word's lemma
optional uint32 beginChar = 11
The character offset begin, in the document
optional uint32 endChar = 12
The character offset end, in the document
optional uint32 utterance = 13
The utterance tag used in dcoref
optional string speaker = 14
The speaker speaking this word
optional string speakerType = 77
The type of speaker speaking this word
optional uint32 beginIndex = 15
The begin index of, e.g., a span
optional uint32 endIndex = 16
The begin index of, e.g., a span
optional uint32 tokenBeginIndex = 17
The begin index of the token
optional uint32 tokenEndIndex = 18
The end index of the token
optional Timex timexValue = 19
The time this word refers to
optional bool hasXmlContext = 21
Used by clean xml annotator
repeated string xmlContext = 22
Used by clean xml annotator
optional uint32 corefClusterID = 23
The [primary] cluster id for this token
optional string answer = 24
A temporary annotation which is occasionally left in
optional uint32 headWordIndex = 26
optional string projectedCategory = 25; // The syntactic category of the maximal constituent headed by the word. Not used anywhere, so deleted.
The index of the head word of this word.
optional Operator operator = 27
If this is an operator, which one is it and what is its scope (as per Natural Logic)?
optional Polarity polarity = 28
The polarity of this word, according to Natural Logic
optional string polarity_dir = 39
The polarity of this word, either "up", "down", or "flat"
optional Span span = 29
The span of a leaf node of a tree
optional string sentiment = 30
The final sentiment of the sentence
optional int32 quotationIndex = 31
The index of the quotation this token refers to
optional MapStringString conllUFeatures = 32
optional string coarseTag = 33
The coarse POS tag (used to store the UPOS tag)
optional Span conllUTokenSpan = 34
optional string conllUMisc = 35
optional MapStringString conllUSecondaryDeps = 36
optional string wikipediaEntity = 37
optional bool isNewline = 38
optional string gender = 51
Fields set by other annotators in CoreNLP
gender annotation (machine reading)
optional string trueCase = 52
true case type of token
optional string trueCaseText = 53
true case gloss of token
optional string chineseChar = 54
Chinese character info
optional string chineseSeg = 55
optional string chineseXMLChar = 60
optional string arabicSeg = 76
Arabic character info
optional string sectionName = 56
Section info
optional string sectionAuthor = 57
optional string sectionDate = 58
optional string sectionEndLabel = 59
optional string parent = 61
French tokens have parents
repeated uint32 corefMentionIndex = 64
mention index info
optional uint32 entityMentionIndex = 65
optional bool isMWT = 67
mwt stuff
optional bool isFirstMWT = 68
optional string mwtText = 69
optional uint64 numericValue = 70
number info
optional string numericType = 71
optional uint64 numericCompositeValue = 72
optional string numericCompositeType = 73
optional uint32 codepointOffsetBegin = 74
optional uint32 codepointOffsetEnd = 75

The index of a token in a document, including the sentence index and the offset.

Used in: RelationTriple

optional uint32 sentenceIndex = 1
optional uint32 tokenIndex = 2

It's possible to send in a whole document, but we only care about the Sentences and Tokens

required Document doc = 1
repeated string pattern = 2

The result will be a nested structure: repeated PatternMatch, one for each pattern each PatternMatch has a repeated Match, which tells you which sentence matched and where

repeated TokensRegexResponse.PatternMatch match = 1

Used in: PatternMatch

required int32 sentence = 1
required MatchLocation match = 2
repeated MatchLocation group = 3

Used in: Match

optional string text = 1
optional int32 begin = 2
optional int32 end = 3

Used in: TokensRegexResponse

repeated Match match = 1

package edu.stanford.nlp.pipeline

message CorefChain

required int32 chainID = 1

repeated CorefChain.CorefMention mention = 2

required uint32 representative = 3

message CorefChain.CorefMention

optional int32 mentionID = 1

optional string mentionType = 2

optional string number = 3

optional string gender = 4

optional string animacy = 5

optional uint32 beginIndex = 6

optional uint32 endIndex = 7

optional uint32 headIndex = 9

optional uint32 sentenceIndex = 10

optional uint32 position = 11

message DependencyEnhancerRequest

required Document document = 1

oneof ref

Language language = 2

string relativePronouns = 3

message DependencyGraph

repeated DependencyGraph.Node node = 1

repeated DependencyGraph.Edge edge = 2

repeated uint32 root = 3

message DependencyGraph.Edge

required uint32 source = 1

required uint32 target = 2

optional string dep = 3

optional bool isExtra = 4

optional uint32 sourceCopy = 5

optional uint32 targetCopy = 6

optional Language language = 7

message DependencyGraph.Node

required uint32 sentenceIndex = 1

required uint32 index = 2

optional uint32 copyAnnotation = 3

message Document

required string text = 1

repeated Sentence sentence = 2

repeated CorefChain corefChain = 3

optional string docID = 4

optional string docDate = 7

optional uint64 calendar = 8

repeated Token sentencelessToken = 5

repeated Token character = 10

repeated Quote quote = 6

repeated NERMention mentions = 9

optional bool hasEntityMentionsAnnotation = 13

optional bool xmlDoc = 11

repeated Section sections = 12

repeated Mention mentionsForCoref = 14

optional bool hasCorefMentionAnnotation = 15

optional bool hasCorefAnnotation = 16

repeated int32 corefMentionToEntityMentionMappings = 17

repeated int32 entityMentionToCorefMentionMappings = 18

message Entity

optional uint32 headStart = 6

optional uint32 headEnd = 7

optional string mentionType = 8

optional string normalizedName = 9

optional uint32 headTokenIndex = 10

optional string corefID = 11

optional string objectID = 1

optional uint32 extentStart = 2

optional uint32 extentEnd = 3

optional string type = 4

optional string subtype = 5

message EvaluateParserRequest

repeated EvaluateParserRequest.ParseResult treebank = 1

message EvaluateParserRequest.ParseResult

required FlattenedParseTree gold = 1

repeated FlattenedParseTree predicted = 2

message EvaluateParserResponse

required double f1 = 1

message FlattenedParseTree

repeated FlattenedParseTree.Node nodes = 1

message FlattenedParseTree.Node

oneof contents

bool openNode = 1