final class Tokeniser
extends java.lang.Object
Modifier and Type | Field and Description |
---|---|
(package private) Token.Character |
charPending |
private java.lang.StringBuilder |
charsBuilder |
private java.lang.String |
charsString |
private int |
charStartPos |
private int[] |
codepointHolder |
(package private) Token.Comment |
commentPending |
(package private) java.lang.StringBuilder |
dataBuffer |
(package private) Token.Doctype |
doctypePending |
private Token |
emitPending |
(package private) Token.EndTag |
endPending |
private ParseErrorList |
errors |
private boolean |
isEmitPending |
private java.lang.String |
lastStartCloseSeq |
private java.lang.String |
lastStartTag |
private int |
markupStartPos |
private int[] |
multipointHolder |
private static char[] |
notCharRefCharsSorted |
private CharacterReader |
reader |
(package private) static char |
replacementChar |
(package private) Token.StartTag |
startPending |
private TokeniserState |
state |
(package private) Token.Tag |
tagPending |
private static int |
Unset |
(package private) static int[] |
win1252Extensions |
(package private) static int |
win1252ExtensionsStart |
Constructor and Description |
---|
Tokeniser(CharacterReader reader,
ParseErrorList errors) |
Modifier and Type | Method and Description |
---|---|
(package private) void |
advanceTransition(TokeniserState newState) |
(package private) java.lang.String |
appropriateEndTagName() |
(package private) java.lang.String |
appropriateEndTagSeq()
Returns the closer sequence
</lastStart |
private void |
characterReferenceError(java.lang.String message,
java.lang.Object... args) |
(package private) int[] |
consumeCharacterReference(java.lang.Character additionalAllowedCharacter,
boolean inAttribute) |
(package private) void |
createBogusCommentPending() |
(package private) void |
createCommentPending() |
(package private) void |
createDoctypePending() |
(package private) Token.Tag |
createTagPending(boolean start) |
(package private) void |
createTempBuffer() |
(package private) boolean |
currentNodeInHtmlNS() |
(package private) void |
emit(char c) |
(package private) void |
emit(char[] chars) |
(package private) void |
emit(int[] codepoints) |
(package private) void |
emit(java.lang.String str) |
(package private) void |
emit(java.lang.StringBuilder str) |
(package private) void |
emit(Token token) |
(package private) void |
emitCommentPending() |
(package private) void |
emitDoctypePending() |
(package private) void |
emitTagPending() |
(package private) void |
eofError(TokeniserState state) |
(package private) void |
error(java.lang.String errorMsg) |
(package private) void |
error(java.lang.String errorMsg,
java.lang.Object... args) |
(package private) void |
error(TokeniserState state) |
(package private) TokeniserState |
getState() |
(package private) boolean |
isAppropriateEndTagToken() |
(package private) Token |
read() |
(package private) void |
transition(TokeniserState newState) |
(package private) java.lang.String |
unescapeEntities(boolean inAttribute)
Utility method to consume reader and unescape entities found within.
|
static final char replacementChar
private static final char[] notCharRefCharsSorted
static final int win1252ExtensionsStart
static final int[] win1252Extensions
private final CharacterReader reader
private final ParseErrorList errors
private TokeniserState state
@Nullable private Token emitPending
private boolean isEmitPending
@Nullable private java.lang.String charsString
private final java.lang.StringBuilder charsBuilder
java.lang.StringBuilder dataBuffer
Token.StartTag startPending
Token.EndTag endPending
Token.Tag tagPending
Token.Character charPending
Token.Doctype doctypePending
Token.Comment commentPending
@Nullable private java.lang.String lastStartTag
@Nullable private java.lang.String lastStartCloseSeq
private static final int Unset
private int markupStartPos
private int charStartPos
private final int[] codepointHolder
private final int[] multipointHolder
Tokeniser(CharacterReader reader, ParseErrorList errors)
Token read()
void emit(Token token)
void emit(java.lang.String str)
void emit(java.lang.StringBuilder str)
void emit(char c)
void emit(char[] chars)
void emit(int[] codepoints)
TokeniserState getState()
void transition(TokeniserState newState)
void advanceTransition(TokeniserState newState)
@Nullable int[] consumeCharacterReference(@Nullable java.lang.Character additionalAllowedCharacter, boolean inAttribute)
Token.Tag createTagPending(boolean start)
void emitTagPending()
void createCommentPending()
void emitCommentPending()
void createBogusCommentPending()
void createDoctypePending()
void emitDoctypePending()
void createTempBuffer()
boolean isAppropriateEndTagToken()
@Nullable java.lang.String appropriateEndTagName()
java.lang.String appropriateEndTagSeq()
</lastStart
void error(TokeniserState state)
void eofError(TokeniserState state)
private void characterReferenceError(java.lang.String message, java.lang.Object... args)
void error(java.lang.String errorMsg)
void error(java.lang.String errorMsg, java.lang.Object... args)
boolean currentNodeInHtmlNS()
java.lang.String unescapeEntities(boolean inAttribute)
inAttribute
- if the text to be unescaped is in an attribute