From 377c17768d194186599249129fd59a26db65cae9 Mon Sep 17 00:00:00 2001 From: ota-meshi Date: Sat, 2 Dec 2023 18:06:56 +0900 Subject: [PATCH] chore: refactor parseAttributes --- src/parser/html.ts | 255 +++++++++++++++++++-------------------------- 1 file changed, 110 insertions(+), 145 deletions(-) diff --git a/src/parser/html.ts b/src/parser/html.ts index 09330f29..25438573 100644 --- a/src/parser/html.ts +++ b/src/parser/html.ts @@ -14,7 +14,52 @@ export type AttributeValueToken = { end: number; }; -const spacePattern = /\s/; +const RE_IS_SPACE = /^\s$/u; + +class State { + public readonly code: string; + + public index: number; + + public curr: string | null = null; + + public constructor(code: string, index: number) { + this.code = code; + this.index = index; + this.curr = code[index] || null; + } + + public skipSpaces() { + while (this.currIsSpace()) { + this.index++; + if (this.eof()) break; + } + } + + public currIsSpace() { + return RE_IS_SPACE.test(this.curr || ""); + } + + public currIs(expect: string): any { + return this.code.startsWith(expect, this.index); + } + + public eof(): boolean { + return this.index >= this.code.length; + } + + public eat(expect: E) { + if (!this.currIs(expect)) { + return null; + } + this.index += expect.length; + return expect; + } + + public advance() { + return (this.curr = this.code[++this.index] || null); + } +} /** Parse HTML attributes */ export function parseAttributes( @@ -23,182 +68,102 @@ export function parseAttributes( ): { attributes: AttributeToken[]; index: number } { const attributes: AttributeToken[] = []; - let index = startIndex; - while (index < code.length) { - const char = code[index]; - if (spacePattern.test(char)) { - index++; - continue; - } - if (char === ">" || (char === "/" && code[index + 1] === ">")) break; - const attrData = parseAttribute(code, index); - attributes.push(attrData.attribute); - index = attrData.index; + const state = new State(code, startIndex); + + while (!state.eof()) { + state.skipSpaces(); + if (state.currIs(">") || state.currIs("/>") || state.eof()) break; + attributes.push(parseAttribute(state)); } - return { attributes, index }; + return { attributes, index: state.index }; } /** Parse HTML attribute */ -function parseAttribute( - code: string, - startIndex: number, -): { attribute: AttributeToken; index: number } { +function parseAttribute(state: State): AttributeToken { // parse key - const keyData = parseAttributeKey(code, startIndex); - const key = keyData.key; - let index = keyData.index; - if (code[index] !== "=") { + const key = parseAttributeKey(state); + state.skipSpaces(); + if (!state.eat("=")) { return { - attribute: { - key, - value: null, - }, - index, + key, + value: null, }; } - - index++; - - // skip spaces - while (index < code.length) { - const char = code[index]; - if (spacePattern.test(char)) { - index++; - continue; - } - break; + state.skipSpaces(); + if (state.eof()) { + return { + key, + value: null, + }; } - // parse value - const valueData = parseAttributeValue(code, index); - + const value = parseAttributeValue(state); return { - attribute: { - key, - value: valueData.value, - }, - index: valueData.index, + key, + value, }; } /** Parse HTML attribute key */ -function parseAttributeKey( - code: string, - startIndex: number, -): { key: AttributeKeyToken; index: number } { - const key: AttributeKeyToken = { - name: code[startIndex], - start: startIndex, - end: startIndex + 1, - }; - let index = key.end; - while (index < code.length) { - const char = code[index]; +function parseAttributeKey(state: State): AttributeKeyToken { + const start = state.index; + while (state.advance()) { if ( - char === "=" || - char === ">" || - (char === "/" && code[index + 1] === ">") + state.currIs("=") || + state.currIs(">") || + state.currIs("/>") || + state.currIsSpace() ) { break; } - if (spacePattern.test(char)) { - for (let i = index; i < code.length; i++) { - const c = code[i]; - if (c === "=") { - return { - key, - index: i, - }; - } - if (spacePattern.test(c)) { - continue; - } - return { - key, - index, - }; - } - break; - } - key.name += char; - index++; - key.end = index; } + const end = state.index; return { - key, - index, + name: state.code.slice(start, end), + start, + end, }; } /** Parse HTML attribute value */ -function parseAttributeValue( - code: string, - startIndex: number, -): { value: AttributeValueToken | null; index: number } { - let index = startIndex; - const maybeQuote = code[index]; - if (maybeQuote == null) { - return { - value: null, - index, - }; - } - const quote = maybeQuote === '"' || maybeQuote === "'" ? maybeQuote : null; +function parseAttributeValue(state: State): AttributeValueToken { + const start = state.index; + const quote = state.eat('"') || state.eat("'"); if (quote) { - index++; - } - const valueFirstChar = code[index]; - if (valueFirstChar == null) { - return { - value: { - value: maybeQuote, + if (state.eof()) { + return { + value: quote, quote: null, - start: startIndex, - end: index, - }, - index, - }; - } - if (valueFirstChar === quote) { - return { - value: { - value: "", - quote, - start: startIndex, - end: index + 1, - }, - index: index + 1, - }; - } - const value: AttributeValueToken = { - value: valueFirstChar, - quote, - start: startIndex, - end: index + 1, - }; - index = value.end; - while (index < code.length) { - const char = code[index]; - if (quote) { - if (quote === char) { - index++; - value.end = index; + start, + end: state.index, + }; + } + let c: string | null; + while ((c = state.curr)) { + state.advance(); + if (c === quote) { + const end = state.index; + return { + value: state.code.slice(start + 1, end - 1), + quote, + start, + end, + }; + } + } + } else { + while (state.advance()) { + if (state.currIsSpace() || state.currIs(">") || state.currIs("/>")) { break; } - } else if ( - spacePattern.test(char) || - char === ">" || - (char === "/" && code[index + 1] === ">") - ) { - break; } - value.value += char; - index++; - value.end = index; } + const end = state.index; return { - value, - index, + value: state.code.slice(start, end), + quote: null, + start, + end, }; } pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy