XML/Input: Difference between revisions

m
→‎PEG-based Parsing: allow </foo >; def q(_):
m (→‎PEG-based Parsing: allow </foo >; def q(_):)
Line 2,024:
| .result = .result + [.match] ;
 
# consume the literal string $s
def consumeliteral($s):
def q($s):
select(.remainder | startswith($s))
| .remainder |= .[$s | length :] ;
 
def literal($s):
consumeliteralq($s)
| .result += [$s];
 
Line 2,066 ⟶ 2,067:
(consume("'") | parse("[^']*") | consume("'")));
 
def CDataSec : box("@CDATA"; consume q("<!\\[CDATA\\[") | string_except("]]>") | consumeq("]]>") ) | ws;
def PROLOG : box("@PROLOG"; consume q("<\\?xml") | string_except("\\?>") | consumeq("\\?>"));
def DTD : box("@DTD"; consume q("<!") | parse("[^>]") | consumeq(">"));
# The XML spec specifically disallows double-hyphen within comments
def COMMENT : box("@COMMENT"; consumeq("<!--") | string_except("-->") | consumeq("-->"));
 
def CharData : parse("[^<]+"); # only `<` is disallowed
 
# This is more permissive than required:
def Name : parse("[A-Za-z:_][^/=<>\n\r\t ]*");
 
def Attribute : keyvalue(Name | ws | consumeq("=") | ws | String | ws);
def Attributes: box( plus(Attribute) ) | .result[-1] |= {"@attributes": add} ;
 
Line 2,081 ⟶ 2,084:
def Element :
def Content : star(Element // CDataSec // CharData // COMMENT);
objectify( consumeq("<")
| Name
| .result[-1] as $name
| ws
| (Attributes // ws)
| ( (consumeq("/>")
// (consumeq(">") | Content | consumeq("</") | consumeliteralq($name) | consumews | q(">")))
| ws) ) ;
 
2,484

edits