XML/Input: Difference between revisions
Content added Content deleted
m (→PEG-based Parsing: allow </foo >; def q(_):) |
|||
Line 2,024: | Line 2,024: | ||
| .result = .result + [.match] ; |
| .result = .result + [.match] ; |
||
# consume the literal string $s |
|||
def consumeliteral($s): |
|||
def q($s): |
|||
select(.remainder | startswith($s)) |
select(.remainder | startswith($s)) |
||
| .remainder |= .[$s | length :] ; |
| .remainder |= .[$s | length :] ; |
||
def literal($s): |
def literal($s): |
||
q($s) |
|||
| .result += [$s]; |
| .result += [$s]; |
||
Line 2,066: | Line 2,067: | ||
(consume("'") | parse("[^']*") | consume("'"))); |
(consume("'") | parse("[^']*") | consume("'"))); |
||
def CDataSec : box("@CDATA"; |
def CDataSec : box("@CDATA"; q("<![CDATA[") | string_except("]]>") | q("]]>") ) | ws; |
||
def PROLOG : box("@PROLOG"; |
def PROLOG : box("@PROLOG"; q("<?xml") | string_except("\\?>") | q("?>")); |
||
def DTD : box("@DTD"; |
def DTD : box("@DTD"; q("<!") | parse("[^>]") | q(">")); |
||
# The XML spec specifically disallows double-hyphen within comments |
|||
def COMMENT : box("@COMMENT"; |
def COMMENT : box("@COMMENT"; q("<!--") | string_except("--") | q("-->")); |
||
def CharData : parse("[^<]+"); # only `<` is disallowed |
def CharData : parse("[^<]+"); # only `<` is disallowed |
||
# This is more permissive than required: |
|||
def Name : parse("[A-Za-z:_][^/=<>\n\r\t ]*"); |
def Name : parse("[A-Za-z:_][^/=<>\n\r\t ]*"); |
||
def Attribute : keyvalue(Name | ws | |
def Attribute : keyvalue(Name | ws | q("=") | ws | String | ws); |
||
def Attributes: box( plus(Attribute) ) | .result[-1] |= {"@attributes": add} ; |
def Attributes: box( plus(Attribute) ) | .result[-1] |= {"@attributes": add} ; |
||
Line 2,081: | Line 2,084: | ||
def Element : |
def Element : |
||
def Content : star(Element // CDataSec // CharData // COMMENT); |
def Content : star(Element // CDataSec // CharData // COMMENT); |
||
objectify( |
objectify( q("<") |
||
| Name |
| Name |
||
| .result[-1] as $name |
| .result[-1] as $name |
||
| ws |
| ws |
||
| (Attributes // ws) |
| (Attributes // ws) |
||
| ( ( |
| ( (q("/>") |
||
// ( |
// (q(">") | Content | q("</") | q($name) | ws | q(">"))) |
||
| ws) ) ; |
| ws) ) ; |
||