/** All the states the tokenizer can be in. */ declare const enum State { Text = 1, BeforeTagName = 2, InTagName = 3, InSelfClosingTag = 4, BeforeClosingTagName = 5, InClosingTagName = 6, AfterClosingTagName = 7, BeforeAttributeName = 8, InAttributeName = 9, AfterAttributeName = 10, BeforeAttributeValue = 11, InAttributeValueDq = 12, InAttributeValueSq = 13, InAttributeValueNq = 14, BeforeDeclaration = 15, InDeclaration = 16, InProcessingInstruction = 17, BeforeComment = 18, InComment = 19, InSpecialComment = 20, AfterComment1 = 21, AfterComment2 = 22, BeforeCdata1 = 23, BeforeCdata2 = 24, BeforeCdata3 = 25, BeforeCdata4 = 26, BeforeCdata5 = 27, BeforeCdata6 = 28, InCdata = 29, AfterCdata1 = 30, AfterCdata2 = 31, BeforeSpecialS = 32, BeforeSpecialSEnd = 33, BeforeScript1 = 34, BeforeScript2 = 35, BeforeScript3 = 36, BeforeScript4 = 37, BeforeScript5 = 38, AfterScript1 = 39, AfterScript2 = 40, AfterScript3 = 41, AfterScript4 = 42, AfterScript5 = 43, BeforeStyle1 = 44, BeforeStyle2 = 45, BeforeStyle3 = 46, BeforeStyle4 = 47, AfterStyle1 = 48, AfterStyle2 = 49, AfterStyle3 = 50, AfterStyle4 = 51, BeforeSpecialT = 52, BeforeSpecialTEnd = 53, BeforeTitle1 = 54, BeforeTitle2 = 55, BeforeTitle3 = 56, BeforeTitle4 = 57, AfterTitle1 = 58, AfterTitle2 = 59, AfterTitle3 = 60, AfterTitle4 = 61, BeforeEntity = 62, BeforeNumericEntity = 63, InNamedEntity = 64, InNumericEntity = 65, InHexEntity = 66 } export interface Callbacks { onattribdata(value: string): void; onattribend(quote: string | undefined | null): void; onattribname(name: string): void; oncdata(data: string): void; onclosetag(name: string): void; oncomment(data: string): void; ondeclaration(content: string): void; onend(): void; onerror(error: Error, state?: State): void; onopentagend(): void; onopentagname(name: string): void; onprocessinginstruction(instruction: string): void; onselfclosingtag(): void; ontext(value: string): void; } export default class Tokenizer { private readonly cbs; /** The current state the tokenizer is in. */ _state: State; /** The read buffer. */ private buffer; /** The beginning of the section that is currently being read. */ sectionStart: number; /** The index within the buffer that we are currently looking at. */ _index: number; /** * Data that has already been processed will be removed from the buffer occasionally. * `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate. */ private bufferOffset; /** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */ private baseState; /** For special parsing behavior inside of script and style tags. */ private special; /** Indicates whether the tokenizer has been paused. */ private running; /** Indicates whether the tokenizer has finished running / `.end` has been called. */ private ended; private readonly xmlMode; private readonly decodeEntities; private readonly entityTrie; constructor({ xmlMode, decodeEntities, }: { xmlMode?: boolean; decodeEntities?: boolean; }, cbs: Callbacks); reset(): void; write(chunk: string): void; end(chunk?: string): void; pause(): void; resume(): void; /** * The start of the current section. */ getAbsoluteSectionStart(): number; /** * The current index within all of the written data. */ getAbsoluteIndex(): number; private stateText; /** * HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name. * * XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar). * We allow anything that wouldn't end the tag. */ private isTagStartChar; private stateBeforeTagName; private stateInTagName; private stateBeforeClosingTagName; private stateInClosingTagName; private stateAfterClosingTagName; private stateBeforeAttributeName; private stateInSelfClosingTag; private stateInAttributeName; private stateAfterAttributeName; private stateBeforeAttributeValue; private handleInAttributeValue; private stateInAttributeValueDoubleQuotes; private stateInAttributeValueSingleQuotes; private stateInAttributeValueNoQuotes; private stateBeforeDeclaration; private stateInDeclaration; private stateInProcessingInstruction; private stateBeforeComment; private stateInComment; private stateInSpecialComment; private stateAfterComment1; private stateAfterComment2; private stateBeforeCdata6; private stateInCdata; private stateAfterCdata1; private stateAfterCdata2; private stateBeforeSpecialS; private stateBeforeSpecialSEnd; private stateBeforeSpecialLast; private stateAfterSpecialLast; private trieIndex; private trieCurrent; private trieResult; private trieExcess; private stateBeforeEntity; private stateInNamedEntity; private emitNamedEntity; private decodeNumericEntity; private stateInNumericEntity; private stateInHexEntity; private allowLegacyEntity; /** * Remove data that has already been consumed from the buffer. */ private cleanup; /** * Iterates through the buffer, calling the function corresponding to the current state. * * States that are more likely to be hit are higher up, as a performance improvement. */ private parse; private finish; /** Handle any trailing data. */ private handleTrailingData; private getSection; private emitPartial; } export {}; //# sourceMappingURL=Tokenizer.d.ts.map