XML/Input: Difference between revisions

Content added Content deleted
m (→‎PEG-based Parsing: allow </foo >; def q(_):)
Line 2,024: Line 2,024:
| .result = .result + [.match] ;
| .result = .result + [.match] ;


# consume the literal string $s
def consumeliteral($s):
def q($s):
select(.remainder | startswith($s))
select(.remainder | startswith($s))
| .remainder |= .[$s | length :] ;
| .remainder |= .[$s | length :] ;


def literal($s):
def literal($s):
consumeliteral($s)
q($s)
| .result += [$s];
| .result += [$s];


Line 2,066: Line 2,067:
(consume("'") | parse("[^']*") | consume("'")));
(consume("'") | parse("[^']*") | consume("'")));


def CDataSec : box("@CDATA"; consume("<!\\[CDATA\\[") | string_except("]]") | consume("]]>") ) | ws;
def CDataSec : box("@CDATA"; q("<![CDATA[") | string_except("]]>") | q("]]>") ) | ws;
def PROLOG : box("@PROLOG"; consume("<\\?xml") | string_except("\\?>") | consume("\\?>"));
def PROLOG : box("@PROLOG"; q("<?xml") | string_except("\\?>") | q("?>"));
def DTD : box("@DTD"; consume("<!") | parse("[^>]") | consume(">"));
def DTD : box("@DTD"; q("<!") | parse("[^>]") | q(">"));
# The XML spec specifically disallows double-hyphen within comments
def COMMENT : box("@COMMENT"; consume("<!--") | string_except("-->") | consume("-->"));
def COMMENT : box("@COMMENT"; q("<!--") | string_except("--") | q("-->"));


def CharData : parse("[^<]+"); # only `<` is disallowed
def CharData : parse("[^<]+"); # only `<` is disallowed


# This is more permissive than required:
def Name : parse("[A-Za-z:_][^/=<>\n\r\t ]*");
def Name : parse("[A-Za-z:_][^/=<>\n\r\t ]*");


def Attribute : keyvalue(Name | ws | consume("=") | ws | String | ws);
def Attribute : keyvalue(Name | ws | q("=") | ws | String | ws);
def Attributes: box( plus(Attribute) ) | .result[-1] |= {"@attributes": add} ;
def Attributes: box( plus(Attribute) ) | .result[-1] |= {"@attributes": add} ;


Line 2,081: Line 2,084:
def Element :
def Element :
def Content : star(Element // CDataSec // CharData // COMMENT);
def Content : star(Element // CDataSec // CharData // COMMENT);
objectify( consume("<")
objectify( q("<")
| Name
| Name
| .result[-1] as $name
| .result[-1] as $name
| ws
| ws
| (Attributes // ws)
| (Attributes // ws)
| ( (consume("/>")
| ( (q("/>")
// (consume(">") | Content | consume("</") | consumeliteral($name) | consume(">")))
// (q(">") | Content | q("</") | q($name) | ws | q(">")))
| ws) ) ;
| ws) ) ;