XML/Input: Difference between revisions

10,210 bytes added ,  3 months ago
m
m (→‎{{header|Wren}}: Minor tidy)
 
(12 intermediate revisions by 5 users not shown)
Line 1,056:
=={{header|C}}==
 
==={{libheader|LibXML}}===
{{uses from|Library|libxml|component1=xmlDoc|component2=xmlNode|component3=xmlReadMemory|component4=xmlDocGetRootElement|component5=xmlFreeDoc|component6=xmlCleanupParser|component7=xmlNode|component8=XML_ELEMENT_NODE|component9=xmlAttr|component10=xmlHasProp}}
{{uses from|Library|C Runtime|component1=printf}}
Line 1,107:
return 0;
}</syntaxhighlight>
 
==={{libheader|Gadget}}===
<p>Gadget is a library for strings handler, not XML handler. But...</p>
<syntaxhighlight lang="c">
#include <gadget/gadget.h>
 
LIB_GADGET_START
 
Main
Assert( Arg_count == 2, end_input );
Get_arg_str( xml_file, 1 );
Assert( Exist_file(xml_file), file_not_exist );
 
char* xml = Load_string(xml_file);
ST_GETTAG field = Unparser( &xml, "Students");
Assert ( field.content, fail_content );
 
while ( Occurs ("Student",field.content ) )
{
ST_GETTAG sub_field = Unparser( &field.content, "Student");
 
if(sub_field.attrib)
{
int i=0;
Iterator up i [ 0: 1: sub_field.len ]
{
if ( strcmp(sub_field.name[i], "Name" )==0 )
{
Get_fn_let( sub_field.attrib[i], Str_tran( sub_field.attrib[i], "&#x00C9;","É" ) );
/* OK... I must write the function that change this diabolic characters :D */
Print "%s\n",sub_field.attrib[i];
break;
}
}
}
Free tag sub_field;
}
Free tag field;
/* Exceptions areas */
Exception( fail_content ){
Msg_red("Not content for \"Students\" field\n");
}
Free secure xml;
Exception( file_not_exist ){
Msg_redf("File \"%s\" not found\n", xml_file);
}
Free secure xml_file;
Exception( end_input ){
Msg_yellow("Use:\n RC_xml <xml_file.xml>");
}
End
</syntaxhighlight>
{{out}}
<pre>
$ ./tests/RC_xml xml_data.xml
April
Bob
Chad
Dave
Émily
 
$ ./tests/RC_xml somefile.xml
File "somefile.xml" not found
 
</pre>
<p>File: xml_data.xml:</p>
<syntaxhighlight lang="xml"><Students>
<Student Name="April" Gender="F" DateOfBirth="1989-01-02" />
<Student Name="Bob" Gender="M" DateOfBirth="1990-03-04" />
<Student Name="Chad" Gender="M" DateOfBirth="1991-05-06" />
<Student Name="Dave" Gender="M" DateOfBirth="1992-07-08">
<Pet Type="dog" Name="Rover" />
</Student>
<Student DateOfBirth="1993-09-10" Gender="F" Name="&#x00C9;mily" />
</Students></syntaxhighlight>
 
=={{header|C sharp}}==
Line 1,598 ⟶ 1,680:
&mily
</pre>
 
=={{header|FreeBASIC}}==
{{trans|Yabasic}}
<syntaxhighlight lang="vb">Data 32, 173, 189, 156, 207, 190, 221, 245, 249, 184, 166, 174, 170, 32, 169, 238
Data 248, 241, 253, 252, 239, 230, 244, 250, 247, 251, 167, 175, 172, 171, 243, 168
Data 183, 181, 182, 199, 142, 143, 146, 128, 212, 144, 210, 211, 222, 214, 215, 216
Data 209, 165, 227, 224, 226, 229, 153, 158, 157, 235, 233, 234, 154, 237, 232, 225
Data 133, 160, 131, 198, 132, 134, 145, 135, 138, 130, 136, 137, 141, 161, 140, 139
Data 208, 164, 149, 162, 147, 228, 148, 246, 155, 151, 163, 150, 129, 236, 231, 152
 
Dim Shared As Integer numCodes, initCode
initCode = 160
numCodes = 255 - initCode + 1
 
Dim Shared As Integer codes(numCodes)
For i As Integer = 0 To numCodes - 1 : Read codes(i)
Next i
 
Function codeConversion(charcode As Integer, tocode As Integer = False) As Integer
If tocode Then
For i As Integer = 0 To numCodes - 1
If codes(i) = charcode Then Return i + initCode
Next i
Else
Return codes(charcode - initCode)
End If
End Function
 
Function convASCII(nombre As String, mark As String) As String
Dim As Integer p, c, lm = Len(mark)
Do
p = Instr(p, nombre, mark)
If p = 0 Then Exit Do
c = Valint(Mid(nombre, p + lm, 4))
c = codeConversion(c)
nombre = Left(nombre, p-1) + Chr(c) + Right(nombre, Len(nombre) - (p + lm + 4))
p += 1
Loop
Return nombre
End Function
 
Dim As String strXml = "<Students>"
strXml += " <Student Name=\'April\' Gender=\'F\' DateOfBirth=\'1989-01-02\' />"
strXml += " <Student Name=\'Bob\' Gender=\'M\' DateOfBirth=\'1990-03-04\' />"
strXml += " <Student Name=\'Chad\' Gender=\'M\' DateOfBirth=\'1991-05-06\' />"
strXml += " <Student Name=\'Dave\' Gender=\'M\' DateOfBirth=\'1992-07-08\'>"
strXml += " <Pet Type=\'dog\' Name=\'Rover\' />"
strXml += " </Student>"
strXml += " <Student DateOfBirth=\'1993-09-10\' Gender=\'F\' Name=\'&#x00C9;mily\' />"
strXml += "</Students>"
 
Dim As String tag1 = "<Student"
Dim As String tag2 = "Name=\'", nombre
Dim As Integer ltag = Len(tag2), p = 1, p2
 
Do
p = Instr(p, strXml, tag1)
If p = 0 Then Exit Do
p = Instr(p, strXml, tag2)
p += ltag
p2 = Instr(p, strXml, "\'")
nombre = convASCII(Mid(strXml, p, p2 - p), "&#x")
Print nombre
Loop
 
Sleep</syntaxhighlight>
{{out}}
<pre>April
Bob
Chad
Dave
&#x00C9;mily</pre>
 
=={{header|FutureBasic}}==
Line 1,933 ⟶ 2,088:
alert(output);
</syntaxhighlight>
 
=={{header|jq}}==
Neither the C nor the Go implementations of jq natively support XML,
so in this entry we present three solutions:
 
* the first uses `xq`, a jq "wrapper";
* the second uses a third-party XML-to-JSON translator, `knead`;
* the third is a "pure jq" solution based on a Parsing Expression Grammar for XML.
 
===xq===
xq is part of the python-yq package [https://github.com/kislyuk/yq].
<syntaxhighlight lang=jq>
xq -r '.Students.Student[]."@Name"' students.xml
</syntaxhighlight>
{{output}}
<pre>
April
Bob
Chad
Dave
Émily
</pre>
 
===knead | jq===
`knead` is part of the `dataknead` package at https://hay.github.io/dataknead/
<pre>
knead students.xml | jq -r '.Students.Student[]["@Name"]'
</pre>
{{Output}}
As above.
 
===PEG-based Parsing===
In this section, a PEG-based XML parser is presented. Its main goal is
to translate valid XML documents into valid JSON losslessly, rather
than to check for validity.
 
In particular, the relative ordering of embedded tags and "text"
fragments is preserved, as is "white space" when significant in
accordance with the XML specification.
 
Being PEG-based, however, the parser should be quite easy to adapt for other purposes.
 
A jq filter, `jsonify`, is also provided for converting hex character codes
of the form `&#x....;' to the corresponding character, e.g. "&#x00C9;mily" -> "Émily".
It also removes strings of the form '^\n *$' in the "text" portions of the XML document.
 
Some other noteworthy points:
 
* since "duplicate attribute names within a tag are not permitted with XML", we can group the attributes within a tag as a JSON object, as jq respects key ordering.
 
* since XML tags cannot begin with `@`, the "PROLOG" is rendered as a JSON object with key "@PROLOG" and likewise for "COMMENT", "DTD" and "CDATA".
 
* consecutive attribute-value pairs are grouped together under the key named "@attributes".
 
The grammar is primarily adapted from:
* (1) https://peerj.com/preprints/1503/
* (2) https://cs.lmu.edu/~ray/notes/xmlgrammar/
====PEG Infrastructure====
The jq module at [[:Category:Jq/peg.jq]] can be included by copying it to a file,
and adding an `include` statement to top of the main program, e.g. as follows:
<syntaxhighlight lang=jq>
include "peg" {search: "."};
</syntaxhighlight>
 
====XML Grammar====
<syntaxhighlight lang=jq>
def XML:
def String : ((consume("\"") | parse("[^\"]*") | consume("\"")) //
(consume("'") | parse("[^']*") | consume("'")));
 
def CDataSec : box("@CDATA"; q("<![CDATA[") | string_except("]]>") | q("]]>") ) | ws;
def PROLOG : box("@PROLOG"; q("<?xml") | string_except("\\?>") | q("?>"));
def DTD : box("@DTD"; q("<!") | parse("[^>]") | q(">"));
# The XML spec specifically disallows double-hyphen within comments
def COMMENT : box("@COMMENT"; q("<!--") | string_except("--") | q("-->"));
 
def CharData : parse("[^<]+"); # only `<` is disallowed
 
# This is more permissive than required:
def Name : parse("[A-Za-z:_][^/=<>\n\r\t ]*");
 
def Attribute : keyvalue(Name | ws | q("=") | ws | String | ws);
def Attributes: box( plus(Attribute) ) | .result[-1] |= {"@attributes": add} ;
 
# <foo> must be matched with </foo>
def Element :
def Content : star(Element // CDataSec // CharData // COMMENT);
objectify( q("<")
| Name
| .result[-1] as $name
| ws
| (Attributes // ws)
| ( (q("/>")
// (q(">") | Content | q("</") | q($name) | ws | q(">")))
| ws) ) ;
 
{remainder: . }
| ws
| optional(PROLOG) | ws
| optional(DTD) | ws
| star(COMMENT | ws)
| Element | ws # for HTML, one would use star(Element) here
| star(COMMENT | ws)
| .result;
</syntaxhighlight>
====The Task====
<syntaxhighlight lang=jq>
# For handling hex character codes &#x
def hex2i:
def toi: if . >= 87 then .-87 else . - 48 end;
reduce ( ascii_downcase | explode | map(toi) | reverse[]) as $i ([1, 0]; # [power, sum]
.[1] += $i * .[0]
| .[0] *= 16 )
| .[1];
 
def hexcode2json:
gsub("&#x(?<x>....);" ; .x | [hex2i] | implode) ;
 
def jsonify:
walk( if type == "array"
then map(select(type == "string" and test("^\n *$") | not))
elif type == "string" then hexcode2json
else . end);
 
# First convert to JSON ...
XML | jsonify
# ... and then extract Student Names
| .[]
| (.Students[].Student[]["@attributes"] // empty).Name
</syntaxhighlight>
'''Invocation''': jq -Rrs -f xml.jq students.xml
{{output}}
As above.
 
=={{header|Julia}}==
Line 2,186 ⟶ 2,474:
beautify=-4
Report 3, doc$
With Databank, "Attr" as Attr$()
Method databank, "GetListByTag", "Student", -1 as Result
c=1 // Result is type of M2000 stack.
Line 2,201 ⟶ 2,488:
Read fieldsNo
}
// this place hexadecimal value for char É
// this object offer by default 5 escaped characters: quot, amp, apos, lt, gt
// inner local function conv$() can be used to escape characters above 127.
fieldsno.tag$("Name")=@conv$("Émily")
Report 3, doc$
end if
 
// this place hexadecimal value for char É
// this object offer by default 5 escaped characters: quot, amp, apos, lt, gt
// inner local function conv$() can be used to escape characters above 127.
fieldsno.tag$("Name")=@conv$("Émily")
declare databank Nothing
Function Conv$(a$)
Line 3,352 ⟶ 3,641:
&#x00C9;mily
Nil</syntaxhighlight>
 
=={{header|Swift}}==
<syntaxhighlight lang="swift">
import Foundation
 
let xmlString = """
<Students>
<Student Name="April" Gender="F" DateOfBirth="1989-01-02" />
<Student Name="Bob" Gender="M" DateOfBirth="1990-03-04" />
<Student Name="Chad" Gender="M" DateOfBirth="1991-05-06" />
<Student Name="Dave" Gender="M" DateOfBirth="1992-07-08">
<Pet Type="dog" Name="Rover" />
</Student>
<Student DateOfBirth="1993-09-10" Gender="F" Name="&#x00C9;mily" />
</Students>
"""
if let xmlData = xmlString.data(using: .utf8) {
do {
let doc = try XMLDocument(data: xmlData)
print("Using XPath:")
for node in try doc.nodes(forXPath: "/Students/Student/@Name") {
guard let name = node.stringValue else { continue }
print(name)
}
print("Using node walk")
if let root = doc.rootElement() {
for child in root.elements(forName: "Student") {
guard let name = child.attribute(forName: "Name")?.stringValue else { continue }
print(name)
}
}
} catch {
debugPrint(error)
}
}
</syntaxhighlight>
Output:
<syntaxhighlight lang="shell">
~ % ./XMLInput
Using XPath:
April
Bob
Chad
Dave
Émily
Using node walk
April
Bob
Chad
Dave
Émily
</syntaxhighlight>
 
=={{header|Tcl}}==
Line 3,580 ⟶ 3,921:
{{libheader|Wren-fmt}}
Wren doesn't currently have an XML parser though we don't really need one for this task as string pattern matching is sufficient to extract the student names.
<syntaxhighlight lang="ecmascriptwren">import "./pattern" for Pattern
import "./fmt" for Conv
 
var xml =
Line 3,626 ⟶ 3,967:
{{libheader|Wren-xsequence}}
Since the first version was written, the above XML parser has appeared and support for 'raw' strings has also been added to the language. Consequently, the solution can now be rewritten as follows, the output being the same as before.
<syntaxhighlight lang="ecmascriptwren">import "./xsequence" for XDocument
 
var xml = """
9,485

edits