add initial marp implementation with sample content and build configuration
This commit is contained in:
929
node_modules/@xmldom/xmldom/lib/sax.js
generated
vendored
Normal file
929
node_modules/@xmldom/xmldom/lib/sax.js
generated
vendored
Normal file
@@ -0,0 +1,929 @@
|
||||
'use strict';
|
||||
|
||||
var conventions = require('./conventions');
|
||||
var g = require('./grammar');
|
||||
var errors = require('./errors');
|
||||
|
||||
var isHTMLEscapableRawTextElement = conventions.isHTMLEscapableRawTextElement;
|
||||
var isHTMLMimeType = conventions.isHTMLMimeType;
|
||||
var isHTMLRawTextElement = conventions.isHTMLRawTextElement;
|
||||
var hasOwn = conventions.hasOwn;
|
||||
var NAMESPACE = conventions.NAMESPACE;
|
||||
var ParseError = errors.ParseError;
|
||||
var DOMException = errors.DOMException;
|
||||
|
||||
//var handlers = 'resolveEntity,getExternalSubset,characters,endDocument,endElement,endPrefixMapping,ignorableWhitespace,processingInstruction,setDocumentLocator,skippedEntity,startDocument,startElement,startPrefixMapping,notationDecl,unparsedEntityDecl,error,fatalError,warning,attributeDecl,elementDecl,externalEntityDecl,internalEntityDecl,comment,endCDATA,endDTD,endEntity,startCDATA,startDTD,startEntity'.split(',')
|
||||
|
||||
//S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE
|
||||
//S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE
|
||||
var S_TAG = 0; //tag name offerring
|
||||
var S_ATTR = 1; //attr name offerring
|
||||
var S_ATTR_SPACE = 2; //attr name end and space offer
|
||||
var S_EQ = 3; //=space?
|
||||
var S_ATTR_NOQUOT_VALUE = 4; //attr value(no quot value only)
|
||||
var S_ATTR_END = 5; //attr value end and no space(quot end)
|
||||
var S_TAG_SPACE = 6; //(attr value end || tag end ) && (space offer)
|
||||
var S_TAG_CLOSE = 7; //closed el<el />
|
||||
|
||||
function XMLReader() {}
|
||||
|
||||
XMLReader.prototype = {
|
||||
parse: function (source, defaultNSMap, entityMap) {
|
||||
var domBuilder = this.domBuilder;
|
||||
domBuilder.startDocument();
|
||||
_copy(defaultNSMap, (defaultNSMap = Object.create(null)));
|
||||
parse(source, defaultNSMap, entityMap, domBuilder, this.errorHandler);
|
||||
domBuilder.endDocument();
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Detecting everything that might be a reference,
|
||||
* including those without ending `;`, since those are allowed in HTML.
|
||||
* The entityReplacer takes care of verifying and transforming each occurrence,
|
||||
* and reports to the errorHandler on those that are not OK,
|
||||
* depending on the context.
|
||||
*/
|
||||
var ENTITY_REG = /&#?\w+;?/g;
|
||||
|
||||
function parse(source, defaultNSMapCopy, entityMap, domBuilder, errorHandler) {
|
||||
var isHTML = isHTMLMimeType(domBuilder.mimeType);
|
||||
if (source.indexOf(g.UNICODE_REPLACEMENT_CHARACTER) >= 0) {
|
||||
errorHandler.warning('Unicode replacement character detected, source encoding issues?');
|
||||
}
|
||||
|
||||
function fixedFromCharCode(code) {
|
||||
// String.prototype.fromCharCode does not supports
|
||||
// > 2 bytes unicode chars directly
|
||||
if (code > 0xffff) {
|
||||
code -= 0x10000;
|
||||
var surrogate1 = 0xd800 + (code >> 10),
|
||||
surrogate2 = 0xdc00 + (code & 0x3ff);
|
||||
|
||||
return String.fromCharCode(surrogate1, surrogate2);
|
||||
} else {
|
||||
return String.fromCharCode(code);
|
||||
}
|
||||
}
|
||||
|
||||
function entityReplacer(a) {
|
||||
var complete = a[a.length - 1] === ';' ? a : a + ';';
|
||||
if (!isHTML && complete !== a) {
|
||||
errorHandler.error('EntityRef: expecting ;');
|
||||
return a;
|
||||
}
|
||||
var match = g.Reference.exec(complete);
|
||||
if (!match || match[0].length !== complete.length) {
|
||||
errorHandler.error('entity not matching Reference production: ' + a);
|
||||
return a;
|
||||
}
|
||||
var k = complete.slice(1, -1);
|
||||
if (hasOwn(entityMap, k)) {
|
||||
return entityMap[k];
|
||||
} else if (k.charAt(0) === '#') {
|
||||
return fixedFromCharCode(parseInt(k.substring(1).replace('x', '0x')));
|
||||
} else {
|
||||
errorHandler.error('entity not found:' + a);
|
||||
return a;
|
||||
}
|
||||
}
|
||||
|
||||
function appendText(end) {
|
||||
//has some bugs
|
||||
if (end > start) {
|
||||
var xt = source.substring(start, end).replace(ENTITY_REG, entityReplacer);
|
||||
locator && position(start);
|
||||
domBuilder.characters(xt, 0, end - start);
|
||||
start = end;
|
||||
}
|
||||
}
|
||||
|
||||
var lineStart = 0;
|
||||
var lineEnd = 0;
|
||||
var linePattern = /\r\n?|\n|$/g;
|
||||
var locator = domBuilder.locator;
|
||||
|
||||
function position(p, m) {
|
||||
while (p >= lineEnd && (m = linePattern.exec(source))) {
|
||||
lineStart = lineEnd;
|
||||
lineEnd = m.index + m[0].length;
|
||||
locator.lineNumber++;
|
||||
}
|
||||
locator.columnNumber = p - lineStart + 1;
|
||||
}
|
||||
|
||||
var parseStack = [{ currentNSMap: defaultNSMapCopy }];
|
||||
var unclosedTags = [];
|
||||
var start = 0;
|
||||
while (true) {
|
||||
try {
|
||||
var tagStart = source.indexOf('<', start);
|
||||
if (tagStart < 0) {
|
||||
if (!isHTML && unclosedTags.length > 0) {
|
||||
return errorHandler.fatalError('unclosed xml tag(s): ' + unclosedTags.join(', '));
|
||||
}
|
||||
if (!source.substring(start).match(/^\s*$/)) {
|
||||
var doc = domBuilder.doc;
|
||||
var text = doc.createTextNode(source.substring(start));
|
||||
if (doc.documentElement) {
|
||||
return errorHandler.error('Extra content at the end of the document');
|
||||
}
|
||||
doc.appendChild(text);
|
||||
domBuilder.currentElement = text;
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (tagStart > start) {
|
||||
var fromSource = source.substring(start, tagStart);
|
||||
if (!isHTML && unclosedTags.length === 0) {
|
||||
fromSource = fromSource.replace(new RegExp(g.S_OPT.source, 'g'), '');
|
||||
fromSource && errorHandler.error("Unexpected content outside root element: '" + fromSource + "'");
|
||||
}
|
||||
appendText(tagStart);
|
||||
}
|
||||
switch (source.charAt(tagStart + 1)) {
|
||||
case '/':
|
||||
var end = source.indexOf('>', tagStart + 2);
|
||||
var tagNameRaw = source.substring(tagStart + 2, end > 0 ? end : undefined);
|
||||
if (!tagNameRaw) {
|
||||
return errorHandler.fatalError('end tag name missing');
|
||||
}
|
||||
var tagNameMatch = end > 0 && g.reg('^', g.QName_group, g.S_OPT, '$').exec(tagNameRaw);
|
||||
if (!tagNameMatch) {
|
||||
return errorHandler.fatalError('end tag name contains invalid characters: "' + tagNameRaw + '"');
|
||||
}
|
||||
if (!domBuilder.currentElement && !domBuilder.doc.documentElement) {
|
||||
// not enough information to provide a helpful error message,
|
||||
// but parsing will throw since there is no root element
|
||||
return;
|
||||
}
|
||||
var currentTagName =
|
||||
unclosedTags[unclosedTags.length - 1] ||
|
||||
domBuilder.currentElement.tagName ||
|
||||
domBuilder.doc.documentElement.tagName ||
|
||||
'';
|
||||
if (currentTagName !== tagNameMatch[1]) {
|
||||
var tagNameLower = tagNameMatch[1].toLowerCase();
|
||||
if (!isHTML || currentTagName.toLowerCase() !== tagNameLower) {
|
||||
return errorHandler.fatalError('Opening and ending tag mismatch: "' + currentTagName + '" != "' + tagNameRaw + '"');
|
||||
}
|
||||
}
|
||||
var config = parseStack.pop();
|
||||
unclosedTags.pop();
|
||||
var localNSMap = config.localNSMap;
|
||||
domBuilder.endElement(config.uri, config.localName, currentTagName);
|
||||
if (localNSMap) {
|
||||
for (var prefix in localNSMap) {
|
||||
if (hasOwn(localNSMap, prefix)) {
|
||||
domBuilder.endPrefixMapping(prefix);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
end++;
|
||||
break;
|
||||
// end element
|
||||
case '?': // <?...?>
|
||||
locator && position(tagStart);
|
||||
end = parseProcessingInstruction(source, tagStart, domBuilder, errorHandler);
|
||||
break;
|
||||
case '!': // <!doctype,<![CDATA,<!--
|
||||
locator && position(tagStart);
|
||||
end = parseDoctypeCommentOrCData(source, tagStart, domBuilder, errorHandler, isHTML);
|
||||
break;
|
||||
default:
|
||||
locator && position(tagStart);
|
||||
var el = new ElementAttributes();
|
||||
var currentNSMap = parseStack[parseStack.length - 1].currentNSMap;
|
||||
//elStartEnd
|
||||
var end = parseElementStartPart(source, tagStart, el, currentNSMap, entityReplacer, errorHandler, isHTML);
|
||||
var len = el.length;
|
||||
|
||||
if (!el.closed) {
|
||||
if (isHTML && conventions.isHTMLVoidElement(el.tagName)) {
|
||||
el.closed = true;
|
||||
} else {
|
||||
unclosedTags.push(el.tagName);
|
||||
}
|
||||
}
|
||||
if (locator && len) {
|
||||
var locator2 = copyLocator(locator, {});
|
||||
//try{//attribute position fixed
|
||||
for (var i = 0; i < len; i++) {
|
||||
var a = el[i];
|
||||
position(a.offset);
|
||||
a.locator = copyLocator(locator, {});
|
||||
}
|
||||
domBuilder.locator = locator2;
|
||||
if (appendElement(el, domBuilder, currentNSMap)) {
|
||||
parseStack.push(el);
|
||||
}
|
||||
domBuilder.locator = locator;
|
||||
} else {
|
||||
if (appendElement(el, domBuilder, currentNSMap)) {
|
||||
parseStack.push(el);
|
||||
}
|
||||
}
|
||||
|
||||
if (isHTML && !el.closed) {
|
||||
end = parseHtmlSpecialContent(source, end, el.tagName, entityReplacer, domBuilder);
|
||||
} else {
|
||||
end++;
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
if (e instanceof ParseError) {
|
||||
throw e;
|
||||
} else if (e instanceof DOMException) {
|
||||
throw new ParseError(e.name + ': ' + e.message, domBuilder.locator, e);
|
||||
}
|
||||
errorHandler.error('element parse error: ' + e);
|
||||
end = -1;
|
||||
}
|
||||
if (end > start) {
|
||||
start = end;
|
||||
} else {
|
||||
//Possible sax fallback here, risk of positional error
|
||||
appendText(Math.max(tagStart, start) + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function copyLocator(f, t) {
|
||||
t.lineNumber = f.lineNumber;
|
||||
t.columnNumber = f.columnNumber;
|
||||
return t;
|
||||
}
|
||||
|
||||
/**
|
||||
* @returns
|
||||
* end of the elementStartPart(end of elementEndPart for selfClosed el)
|
||||
* @see {@link #appendElement}
|
||||
*/
|
||||
function parseElementStartPart(source, start, el, currentNSMap, entityReplacer, errorHandler, isHTML) {
|
||||
/**
|
||||
* @param {string} qname
|
||||
* @param {string} value
|
||||
* @param {number} startIndex
|
||||
*/
|
||||
function addAttribute(qname, value, startIndex) {
|
||||
if (hasOwn(el.attributeNames, qname)) {
|
||||
return errorHandler.fatalError('Attribute ' + qname + ' redefined');
|
||||
}
|
||||
if (!isHTML && value.indexOf('<') >= 0) {
|
||||
return errorHandler.fatalError("Unescaped '<' not allowed in attributes values");
|
||||
}
|
||||
el.addValue(
|
||||
qname,
|
||||
// @see https://www.w3.org/TR/xml/#AVNormalize
|
||||
// since the xmldom sax parser does not "interpret" DTD the following is not implemented:
|
||||
// - recursive replacement of (DTD) entity references
|
||||
// - trimming and collapsing multiple spaces into a single one for attributes that are not of type CDATA
|
||||
value.replace(/[\t\n\r]/g, ' ').replace(ENTITY_REG, entityReplacer),
|
||||
startIndex
|
||||
);
|
||||
}
|
||||
|
||||
var attrName;
|
||||
var value;
|
||||
var p = ++start;
|
||||
var s = S_TAG; //status
|
||||
while (true) {
|
||||
var c = source.charAt(p);
|
||||
switch (c) {
|
||||
case '=':
|
||||
if (s === S_ATTR) {
|
||||
//attrName
|
||||
attrName = source.slice(start, p);
|
||||
s = S_EQ;
|
||||
} else if (s === S_ATTR_SPACE) {
|
||||
s = S_EQ;
|
||||
} else {
|
||||
//fatalError: equal must after attrName or space after attrName
|
||||
throw new Error('attribute equal must after attrName'); // No known test case
|
||||
}
|
||||
break;
|
||||
case "'":
|
||||
case '"':
|
||||
if (
|
||||
s === S_EQ ||
|
||||
s === S_ATTR //|| s == S_ATTR_SPACE
|
||||
) {
|
||||
//equal
|
||||
if (s === S_ATTR) {
|
||||
errorHandler.warning('attribute value must after "="');
|
||||
attrName = source.slice(start, p);
|
||||
}
|
||||
start = p + 1;
|
||||
p = source.indexOf(c, start);
|
||||
if (p > 0) {
|
||||
value = source.slice(start, p);
|
||||
addAttribute(attrName, value, start - 1);
|
||||
s = S_ATTR_END;
|
||||
} else {
|
||||
//fatalError: no end quot match
|
||||
throw new Error("attribute value no end '" + c + "' match");
|
||||
}
|
||||
} else if (s == S_ATTR_NOQUOT_VALUE) {
|
||||
value = source.slice(start, p);
|
||||
addAttribute(attrName, value, start);
|
||||
errorHandler.warning('attribute "' + attrName + '" missed start quot(' + c + ')!!');
|
||||
start = p + 1;
|
||||
s = S_ATTR_END;
|
||||
} else {
|
||||
//fatalError: no equal before
|
||||
throw new Error('attribute value must after "="'); // No known test case
|
||||
}
|
||||
break;
|
||||
case '/':
|
||||
switch (s) {
|
||||
case S_TAG:
|
||||
el.setTagName(source.slice(start, p));
|
||||
case S_ATTR_END:
|
||||
case S_TAG_SPACE:
|
||||
case S_TAG_CLOSE:
|
||||
s = S_TAG_CLOSE;
|
||||
el.closed = true;
|
||||
case S_ATTR_NOQUOT_VALUE:
|
||||
case S_ATTR:
|
||||
break;
|
||||
case S_ATTR_SPACE:
|
||||
el.closed = true;
|
||||
break;
|
||||
//case S_EQ:
|
||||
default:
|
||||
throw new Error("attribute invalid close char('/')"); // No known test case
|
||||
}
|
||||
break;
|
||||
case '': //end document
|
||||
errorHandler.error('unexpected end of input');
|
||||
if (s == S_TAG) {
|
||||
el.setTagName(source.slice(start, p));
|
||||
}
|
||||
return p;
|
||||
case '>':
|
||||
switch (s) {
|
||||
case S_TAG:
|
||||
el.setTagName(source.slice(start, p));
|
||||
case S_ATTR_END:
|
||||
case S_TAG_SPACE:
|
||||
case S_TAG_CLOSE:
|
||||
break; //normal
|
||||
case S_ATTR_NOQUOT_VALUE: //Compatible state
|
||||
case S_ATTR:
|
||||
value = source.slice(start, p);
|
||||
if (value.slice(-1) === '/') {
|
||||
el.closed = true;
|
||||
value = value.slice(0, -1);
|
||||
}
|
||||
case S_ATTR_SPACE:
|
||||
if (s === S_ATTR_SPACE) {
|
||||
value = attrName;
|
||||
}
|
||||
if (s == S_ATTR_NOQUOT_VALUE) {
|
||||
errorHandler.warning('attribute "' + value + '" missed quot(")!');
|
||||
addAttribute(attrName, value, start);
|
||||
} else {
|
||||
if (!isHTML) {
|
||||
errorHandler.warning('attribute "' + value + '" missed value!! "' + value + '" instead!!');
|
||||
}
|
||||
addAttribute(value, value, start);
|
||||
}
|
||||
break;
|
||||
case S_EQ:
|
||||
if (!isHTML) {
|
||||
return errorHandler.fatalError('AttValue: \' or " expected');
|
||||
}
|
||||
}
|
||||
return p;
|
||||
/*xml space '\x20' | #x9 | #xD | #xA; */
|
||||
case '\u0080':
|
||||
c = ' ';
|
||||
default:
|
||||
if (c <= ' ') {
|
||||
//space
|
||||
switch (s) {
|
||||
case S_TAG:
|
||||
el.setTagName(source.slice(start, p)); //tagName
|
||||
s = S_TAG_SPACE;
|
||||
break;
|
||||
case S_ATTR:
|
||||
attrName = source.slice(start, p);
|
||||
s = S_ATTR_SPACE;
|
||||
break;
|
||||
case S_ATTR_NOQUOT_VALUE:
|
||||
var value = source.slice(start, p);
|
||||
errorHandler.warning('attribute "' + value + '" missed quot(")!!');
|
||||
addAttribute(attrName, value, start);
|
||||
case S_ATTR_END:
|
||||
s = S_TAG_SPACE;
|
||||
break;
|
||||
//case S_TAG_SPACE:
|
||||
//case S_EQ:
|
||||
//case S_ATTR_SPACE:
|
||||
// void();break;
|
||||
//case S_TAG_CLOSE:
|
||||
//ignore warning
|
||||
}
|
||||
} else {
|
||||
//not space
|
||||
//S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE
|
||||
//S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE
|
||||
switch (s) {
|
||||
//case S_TAG:void();break;
|
||||
//case S_ATTR:void();break;
|
||||
//case S_ATTR_NOQUOT_VALUE:void();break;
|
||||
case S_ATTR_SPACE:
|
||||
if (!isHTML) {
|
||||
errorHandler.warning('attribute "' + attrName + '" missed value!! "' + attrName + '" instead2!!');
|
||||
}
|
||||
addAttribute(attrName, attrName, start);
|
||||
start = p;
|
||||
s = S_ATTR;
|
||||
break;
|
||||
case S_ATTR_END:
|
||||
errorHandler.warning('attribute space is required"' + attrName + '"!!');
|
||||
case S_TAG_SPACE:
|
||||
s = S_ATTR;
|
||||
start = p;
|
||||
break;
|
||||
case S_EQ:
|
||||
s = S_ATTR_NOQUOT_VALUE;
|
||||
start = p;
|
||||
break;
|
||||
case S_TAG_CLOSE:
|
||||
throw new Error("elements closed character '/' and '>' must be connected to");
|
||||
}
|
||||
}
|
||||
} //end outer switch
|
||||
p++;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @returns
|
||||
* `true` if a new namespace has been defined.
|
||||
*/
|
||||
function appendElement(el, domBuilder, currentNSMap) {
|
||||
var tagName = el.tagName;
|
||||
var localNSMap = null;
|
||||
var i = el.length;
|
||||
while (i--) {
|
||||
var a = el[i];
|
||||
var qName = a.qName;
|
||||
var value = a.value;
|
||||
var nsp = qName.indexOf(':');
|
||||
if (nsp > 0) {
|
||||
var prefix = (a.prefix = qName.slice(0, nsp));
|
||||
var localName = qName.slice(nsp + 1);
|
||||
var nsPrefix = prefix === 'xmlns' && localName;
|
||||
} else {
|
||||
localName = qName;
|
||||
prefix = null;
|
||||
nsPrefix = qName === 'xmlns' && '';
|
||||
}
|
||||
//can not set prefix,because prefix !== ''
|
||||
a.localName = localName;
|
||||
//prefix == null for no ns prefix attribute
|
||||
if (nsPrefix !== false) {
|
||||
//hack!!
|
||||
if (localNSMap == null) {
|
||||
localNSMap = Object.create(null);
|
||||
_copy(currentNSMap, (currentNSMap = Object.create(null)));
|
||||
}
|
||||
currentNSMap[nsPrefix] = localNSMap[nsPrefix] = value;
|
||||
a.uri = NAMESPACE.XMLNS;
|
||||
domBuilder.startPrefixMapping(nsPrefix, value);
|
||||
}
|
||||
}
|
||||
var i = el.length;
|
||||
while (i--) {
|
||||
a = el[i];
|
||||
if (a.prefix) {
|
||||
//no prefix attribute has no namespace
|
||||
if (a.prefix === 'xml') {
|
||||
a.uri = NAMESPACE.XML;
|
||||
}
|
||||
if (a.prefix !== 'xmlns') {
|
||||
a.uri = currentNSMap[a.prefix];
|
||||
}
|
||||
}
|
||||
}
|
||||
var nsp = tagName.indexOf(':');
|
||||
if (nsp > 0) {
|
||||
prefix = el.prefix = tagName.slice(0, nsp);
|
||||
localName = el.localName = tagName.slice(nsp + 1);
|
||||
} else {
|
||||
prefix = null; //important!!
|
||||
localName = el.localName = tagName;
|
||||
}
|
||||
//no prefix element has default namespace
|
||||
var ns = (el.uri = currentNSMap[prefix || '']);
|
||||
domBuilder.startElement(ns, localName, tagName, el);
|
||||
//endPrefixMapping and startPrefixMapping have not any help for dom builder
|
||||
//localNSMap = null
|
||||
if (el.closed) {
|
||||
domBuilder.endElement(ns, localName, tagName);
|
||||
if (localNSMap) {
|
||||
for (prefix in localNSMap) {
|
||||
if (hasOwn(localNSMap, prefix)) {
|
||||
domBuilder.endPrefixMapping(prefix);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
el.currentNSMap = currentNSMap;
|
||||
el.localNSMap = localNSMap;
|
||||
//parseStack.push(el);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
function parseHtmlSpecialContent(source, elStartEnd, tagName, entityReplacer, domBuilder) {
|
||||
// https://html.spec.whatwg.org/#raw-text-elements
|
||||
// https://html.spec.whatwg.org/#escapable-raw-text-elements
|
||||
// https://html.spec.whatwg.org/#cdata-rcdata-restrictions:raw-text-elements
|
||||
// TODO: https://html.spec.whatwg.org/#cdata-rcdata-restrictions
|
||||
var isEscapableRaw = isHTMLEscapableRawTextElement(tagName);
|
||||
if (isEscapableRaw || isHTMLRawTextElement(tagName)) {
|
||||
var elEndStart = source.indexOf('</' + tagName + '>', elStartEnd);
|
||||
var text = source.substring(elStartEnd + 1, elEndStart);
|
||||
|
||||
if (isEscapableRaw) {
|
||||
text = text.replace(ENTITY_REG, entityReplacer);
|
||||
}
|
||||
domBuilder.characters(text, 0, text.length);
|
||||
return elEndStart;
|
||||
}
|
||||
return elStartEnd + 1;
|
||||
}
|
||||
|
||||
function _copy(source, target) {
|
||||
for (var n in source) {
|
||||
if (hasOwn(source, n)) {
|
||||
target[n] = source[n];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @typedef ParseUtils
|
||||
* @property {function(relativeIndex: number?): string | undefined} char
|
||||
* Provides look ahead access to a singe character relative to the current index.
|
||||
* @property {function(): number} getIndex
|
||||
* Provides read-only access to the current index.
|
||||
* @property {function(reg: RegExp): string | null} getMatch
|
||||
* Applies the provided regular expression enforcing that it starts at the current index and
|
||||
* returns the complete matching string,
|
||||
* and moves the current index by the length of the matching string.
|
||||
* @property {function(): string} getSource
|
||||
* Provides read-only access to the complete source.
|
||||
* @property {function(places: number?): void} skip
|
||||
* moves the current index by places (defaults to 1)
|
||||
* @property {function(): number} skipBlanks
|
||||
* Moves the current index by the amount of white space that directly follows the current index
|
||||
* and returns the amount of whitespace chars skipped (0..n),
|
||||
* or -1 if the end of the source was reached.
|
||||
* @property {function(): string} substringFromIndex
|
||||
* creates a substring from the current index to the end of `source`
|
||||
* @property {function(compareWith: string): boolean} substringStartsWith
|
||||
* Checks if `source` contains `compareWith`, starting from the current index.
|
||||
* @property {function(compareWith: string): boolean} substringStartsWithCaseInsensitive
|
||||
* Checks if `source` contains `compareWith`, starting from the current index,
|
||||
* comparing the upper case of both sides.
|
||||
* @see {@link parseUtils}
|
||||
*/
|
||||
|
||||
/**
|
||||
* A temporary scope for parsing and look ahead operations in `source`,
|
||||
* starting from index `start`.
|
||||
*
|
||||
* Some operations move the current index by a number of positions,
|
||||
* after which `getIndex` returns the new index.
|
||||
*
|
||||
* @param {string} source
|
||||
* @param {number} start
|
||||
* @returns {ParseUtils}
|
||||
*/
|
||||
function parseUtils(source, start) {
|
||||
var index = start;
|
||||
|
||||
function char(n) {
|
||||
n = n || 0;
|
||||
return source.charAt(index + n);
|
||||
}
|
||||
|
||||
function skip(n) {
|
||||
n = n || 1;
|
||||
index += n;
|
||||
}
|
||||
|
||||
function skipBlanks() {
|
||||
var blanks = 0;
|
||||
while (index < source.length) {
|
||||
var c = char();
|
||||
if (c !== ' ' && c !== '\n' && c !== '\t' && c !== '\r') {
|
||||
return blanks;
|
||||
}
|
||||
blanks++;
|
||||
skip();
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
function substringFromIndex() {
|
||||
return source.substring(index);
|
||||
}
|
||||
function substringStartsWith(text) {
|
||||
return source.substring(index, index + text.length) === text;
|
||||
}
|
||||
function substringStartsWithCaseInsensitive(text) {
|
||||
return source.substring(index, index + text.length).toUpperCase() === text.toUpperCase();
|
||||
}
|
||||
|
||||
function getMatch(args) {
|
||||
var expr = g.reg('^', args);
|
||||
var match = expr.exec(substringFromIndex());
|
||||
if (match) {
|
||||
skip(match[0].length);
|
||||
return match[0];
|
||||
}
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
char: char,
|
||||
getIndex: function () {
|
||||
return index;
|
||||
},
|
||||
getMatch: getMatch,
|
||||
getSource: function () {
|
||||
return source;
|
||||
},
|
||||
skip: skip,
|
||||
skipBlanks: skipBlanks,
|
||||
substringFromIndex: substringFromIndex,
|
||||
substringStartsWith: substringStartsWith,
|
||||
substringStartsWithCaseInsensitive: substringStartsWithCaseInsensitive,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {ParseUtils} p
|
||||
* @param {DOMHandler} errorHandler
|
||||
* @returns {string}
|
||||
*/
|
||||
function parseDoctypeInternalSubset(p, errorHandler) {
|
||||
/**
|
||||
* @param {ParseUtils} p
|
||||
* @param {DOMHandler} errorHandler
|
||||
* @returns {string}
|
||||
*/
|
||||
function parsePI(p, errorHandler) {
|
||||
var match = g.PI.exec(p.substringFromIndex());
|
||||
if (!match) {
|
||||
return errorHandler.fatalError('processing instruction is not well-formed at position ' + p.getIndex());
|
||||
}
|
||||
if (match[1].toLowerCase() === 'xml') {
|
||||
return errorHandler.fatalError(
|
||||
'xml declaration is only allowed at the start of the document, but found at position ' + p.getIndex()
|
||||
);
|
||||
}
|
||||
p.skip(match[0].length);
|
||||
return match[0];
|
||||
}
|
||||
// Parse internal subset
|
||||
var source = p.getSource();
|
||||
if (p.char() === '[') {
|
||||
p.skip(1);
|
||||
var intSubsetStart = p.getIndex();
|
||||
while (p.getIndex() < source.length) {
|
||||
p.skipBlanks();
|
||||
if (p.char() === ']') {
|
||||
var internalSubset = source.substring(intSubsetStart, p.getIndex());
|
||||
p.skip(1);
|
||||
return internalSubset;
|
||||
}
|
||||
var current = null;
|
||||
// Only in external subset
|
||||
// if (char() === '<' && char(1) === '!' && char(2) === '[') {
|
||||
// parseConditionalSections(p, errorHandler);
|
||||
// } else
|
||||
if (p.char() === '<' && p.char(1) === '!') {
|
||||
switch (p.char(2)) {
|
||||
case 'E': // ELEMENT | ENTITY
|
||||
if (p.char(3) === 'L') {
|
||||
current = p.getMatch(g.elementdecl);
|
||||
} else if (p.char(3) === 'N') {
|
||||
current = p.getMatch(g.EntityDecl);
|
||||
}
|
||||
break;
|
||||
case 'A': // ATTRIBUTE
|
||||
current = p.getMatch(g.AttlistDecl);
|
||||
break;
|
||||
case 'N': // NOTATION
|
||||
current = p.getMatch(g.NotationDecl);
|
||||
break;
|
||||
case '-': // COMMENT
|
||||
current = p.getMatch(g.Comment);
|
||||
break;
|
||||
}
|
||||
} else if (p.char() === '<' && p.char(1) === '?') {
|
||||
current = parsePI(p, errorHandler);
|
||||
} else if (p.char() === '%') {
|
||||
current = p.getMatch(g.PEReference);
|
||||
} else {
|
||||
return errorHandler.fatalError('Error detected in Markup declaration');
|
||||
}
|
||||
if (!current) {
|
||||
return errorHandler.fatalError('Error in internal subset at position ' + p.getIndex());
|
||||
}
|
||||
}
|
||||
return errorHandler.fatalError('doctype internal subset is not well-formed, missing ]');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when the parser encounters an element starting with '<!'.
|
||||
*
|
||||
* @param {string} source
|
||||
* The xml.
|
||||
* @param {number} start
|
||||
* the start index of the '<!'
|
||||
* @param {DOMHandler} domBuilder
|
||||
* @param {DOMHandler} errorHandler
|
||||
* @param {boolean} isHTML
|
||||
* @returns {number | never}
|
||||
* The end index of the element.
|
||||
* @throws {ParseError}
|
||||
* In case the element is not well-formed.
|
||||
*/
|
||||
function parseDoctypeCommentOrCData(source, start, domBuilder, errorHandler, isHTML) {
|
||||
var p = parseUtils(source, start);
|
||||
|
||||
switch (isHTML ? p.char(2).toUpperCase() : p.char(2)) {
|
||||
case '-':
|
||||
// should be a comment
|
||||
var comment = p.getMatch(g.Comment);
|
||||
if (comment) {
|
||||
domBuilder.comment(comment, g.COMMENT_START.length, comment.length - g.COMMENT_START.length - g.COMMENT_END.length);
|
||||
return p.getIndex();
|
||||
} else {
|
||||
return errorHandler.fatalError('comment is not well-formed at position ' + p.getIndex());
|
||||
}
|
||||
case '[':
|
||||
// should be CDATA
|
||||
var cdata = p.getMatch(g.CDSect);
|
||||
if (cdata) {
|
||||
if (!isHTML && !domBuilder.currentElement) {
|
||||
return errorHandler.fatalError('CDATA outside of element');
|
||||
}
|
||||
domBuilder.startCDATA();
|
||||
domBuilder.characters(cdata, g.CDATA_START.length, cdata.length - g.CDATA_START.length - g.CDATA_END.length);
|
||||
domBuilder.endCDATA();
|
||||
return p.getIndex();
|
||||
} else {
|
||||
return errorHandler.fatalError('Invalid CDATA starting at position ' + start);
|
||||
}
|
||||
case 'D': {
|
||||
// should be DOCTYPE
|
||||
if (domBuilder.doc && domBuilder.doc.documentElement) {
|
||||
return errorHandler.fatalError('Doctype not allowed inside or after documentElement at position ' + p.getIndex());
|
||||
}
|
||||
if (isHTML ? !p.substringStartsWithCaseInsensitive(g.DOCTYPE_DECL_START) : !p.substringStartsWith(g.DOCTYPE_DECL_START)) {
|
||||
return errorHandler.fatalError('Expected ' + g.DOCTYPE_DECL_START + ' at position ' + p.getIndex());
|
||||
}
|
||||
p.skip(g.DOCTYPE_DECL_START.length);
|
||||
if (p.skipBlanks() < 1) {
|
||||
return errorHandler.fatalError('Expected whitespace after ' + g.DOCTYPE_DECL_START + ' at position ' + p.getIndex());
|
||||
}
|
||||
|
||||
var doctype = {
|
||||
name: undefined,
|
||||
publicId: undefined,
|
||||
systemId: undefined,
|
||||
internalSubset: undefined,
|
||||
};
|
||||
// Parse the DOCTYPE name
|
||||
doctype.name = p.getMatch(g.Name);
|
||||
if (!doctype.name)
|
||||
return errorHandler.fatalError('doctype name missing or contains unexpected characters at position ' + p.getIndex());
|
||||
|
||||
if (isHTML && doctype.name.toLowerCase() !== 'html') {
|
||||
errorHandler.warning('Unexpected DOCTYPE in HTML document at position ' + p.getIndex());
|
||||
}
|
||||
p.skipBlanks();
|
||||
|
||||
// Check for ExternalID
|
||||
if (p.substringStartsWith(g.PUBLIC) || p.substringStartsWith(g.SYSTEM)) {
|
||||
var match = g.ExternalID_match.exec(p.substringFromIndex());
|
||||
if (!match) {
|
||||
return errorHandler.fatalError('doctype external id is not well-formed at position ' + p.getIndex());
|
||||
}
|
||||
if (match.groups.SystemLiteralOnly !== undefined) {
|
||||
doctype.systemId = match.groups.SystemLiteralOnly;
|
||||
} else {
|
||||
doctype.systemId = match.groups.SystemLiteral;
|
||||
doctype.publicId = match.groups.PubidLiteral;
|
||||
}
|
||||
p.skip(match[0].length);
|
||||
} else if (isHTML && p.substringStartsWithCaseInsensitive(g.SYSTEM)) {
|
||||
// https://html.spec.whatwg.org/multipage/syntax.html#doctype-legacy-string
|
||||
p.skip(g.SYSTEM.length);
|
||||
if (p.skipBlanks() < 1) {
|
||||
return errorHandler.fatalError('Expected whitespace after ' + g.SYSTEM + ' at position ' + p.getIndex());
|
||||
}
|
||||
doctype.systemId = p.getMatch(g.ABOUT_LEGACY_COMPAT_SystemLiteral);
|
||||
if (!doctype.systemId) {
|
||||
return errorHandler.fatalError(
|
||||
'Expected ' + g.ABOUT_LEGACY_COMPAT + ' in single or double quotes after ' + g.SYSTEM + ' at position ' + p.getIndex()
|
||||
);
|
||||
}
|
||||
}
|
||||
if (isHTML && doctype.systemId && !g.ABOUT_LEGACY_COMPAT_SystemLiteral.test(doctype.systemId)) {
|
||||
errorHandler.warning('Unexpected doctype.systemId in HTML document at position ' + p.getIndex());
|
||||
}
|
||||
if (!isHTML) {
|
||||
p.skipBlanks();
|
||||
doctype.internalSubset = parseDoctypeInternalSubset(p, errorHandler);
|
||||
}
|
||||
p.skipBlanks();
|
||||
if (p.char() !== '>') {
|
||||
return errorHandler.fatalError('doctype not terminated with > at position ' + p.getIndex());
|
||||
}
|
||||
p.skip(1);
|
||||
domBuilder.startDTD(doctype.name, doctype.publicId, doctype.systemId, doctype.internalSubset);
|
||||
domBuilder.endDTD();
|
||||
return p.getIndex();
|
||||
}
|
||||
default:
|
||||
return errorHandler.fatalError('Not well-formed XML starting with "<!" at position ' + start);
|
||||
}
|
||||
}
|
||||
|
||||
function parseProcessingInstruction(source, start, domBuilder, errorHandler) {
|
||||
var match = source.substring(start).match(g.PI);
|
||||
if (!match) {
|
||||
return errorHandler.fatalError('Invalid processing instruction starting at position ' + start);
|
||||
}
|
||||
if (match[1].toLowerCase() === 'xml') {
|
||||
if (start > 0) {
|
||||
return errorHandler.fatalError(
|
||||
'processing instruction at position ' + start + ' is an xml declaration which is only at the start of the document'
|
||||
);
|
||||
}
|
||||
if (!g.XMLDecl.test(source.substring(start))) {
|
||||
return errorHandler.fatalError('xml declaration is not well-formed');
|
||||
}
|
||||
}
|
||||
domBuilder.processingInstruction(match[1], match[2]);
|
||||
return start + match[0].length;
|
||||
}
|
||||
|
||||
function ElementAttributes() {
|
||||
this.attributeNames = Object.create(null);
|
||||
}
|
||||
|
||||
ElementAttributes.prototype = {
|
||||
setTagName: function (tagName) {
|
||||
if (!g.QName_exact.test(tagName)) {
|
||||
throw new Error('invalid tagName:' + tagName);
|
||||
}
|
||||
this.tagName = tagName;
|
||||
},
|
||||
addValue: function (qName, value, offset) {
|
||||
if (!g.QName_exact.test(qName)) {
|
||||
throw new Error('invalid attribute:' + qName);
|
||||
}
|
||||
this.attributeNames[qName] = this.length;
|
||||
this[this.length++] = { qName: qName, value: value, offset: offset };
|
||||
},
|
||||
length: 0,
|
||||
getLocalName: function (i) {
|
||||
return this[i].localName;
|
||||
},
|
||||
getLocator: function (i) {
|
||||
return this[i].locator;
|
||||
},
|
||||
getQName: function (i) {
|
||||
return this[i].qName;
|
||||
},
|
||||
getURI: function (i) {
|
||||
return this[i].uri;
|
||||
},
|
||||
getValue: function (i) {
|
||||
return this[i].value;
|
||||
},
|
||||
// ,getIndex:function(uri, localName)){
|
||||
// if(localName){
|
||||
//
|
||||
// }else{
|
||||
// var qName = uri
|
||||
// }
|
||||
// },
|
||||
// getValue:function(){return this.getValue(this.getIndex.apply(this,arguments))},
|
||||
// getType:function(uri,localName){}
|
||||
// getType:function(i){},
|
||||
};
|
||||
|
||||
exports.XMLReader = XMLReader;
|
||||
exports.parseUtils = parseUtils;
|
||||
exports.parseDoctypeCommentOrCData = parseDoctypeCommentOrCData;
|
||||
Reference in New Issue
Block a user