XML Validation

From Rosetta Code
XML Validation is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.

Given an XML document and an XSD schema definition validate that the document follows the schema described.

Sample XML and XSD for tests can be found on this W3 Schools page.

C[edit]

Library: LibXML

At the time of writing, the XML and XSD files at the URLs used in the other examples were inaccessible. The files from the W3 Schools page were used for tests.

 
#include <libxml/xmlschemastypes.h>
 
int main(int argC, char** argV)
{
if (argC <= 2) {
printf("Usage: %s <XML Document Name> <XSD Document Name>\n", argV[0]);
return 0;
}
 
xmlDocPtr doc;
xmlSchemaPtr schema = NULL;
xmlSchemaParserCtxtPtr ctxt;
char *XMLFileName = argV[1];
char *XSDFileName = argV[2];
int ret;
 
xmlLineNumbersDefault(1);
 
ctxt = xmlSchemaNewParserCtxt(XSDFileName);
 
xmlSchemaSetParserErrors(ctxt, (xmlSchemaValidityErrorFunc) fprintf, (xmlSchemaValidityWarningFunc) fprintf, stderr);
schema = xmlSchemaParse(ctxt);
xmlSchemaFreeParserCtxt(ctxt);
 
 
doc = xmlReadFile(XMLFileName, NULL, 0);
 
if (doc == NULL){
fprintf(stderr, "Could not parse %s\n", XMLFileName);
}
else{
xmlSchemaValidCtxtPtr ctxt;
 
ctxt = xmlSchemaNewValidCtxt(schema);
xmlSchemaSetValidErrors(ctxt, (xmlSchemaValidityErrorFunc) fprintf, (xmlSchemaValidityWarningFunc) fprintf, stderr);
ret = xmlSchemaValidateDoc(ctxt, doc);
 
if (ret == 0){
printf("%s validates\n", XMLFileName);
}
else if (ret > 0){
printf("%s fails to validate\n", XMLFileName);
}
else{
printf("%s validation generated an internal error\n", XMLFileName);
}
xmlSchemaFreeValidCtxt(ctxt);
xmlFreeDoc(doc);
}
 
 
if(schema != NULL)
xmlSchemaFree(schema);
 
xmlSchemaCleanupTypes();
xmlCleanupParser();
xmlMemoryDump();
 
return 0;
}
 

Output, files used from the W3 Schools page :

C:\rosettaCode>xmlValidator.exe shiporder.xml shiporder.xsd
shiporder.xml validates

C#[edit]

 
using System;
using System.Xml;
using System.Xml.Schema;
using System.IO;
 
public class Test
{
public static void Main()
{
// your code goes here
XmlSchemaSet sc = new XmlSchemaSet();
sc.Add(null, "http://venus.eas.asu.edu/WSRepository/xml/Courses.xsd");
XmlReaderSettings settings = new XmlReaderSettings();
settings.ValidationType = ValidationType.Schema;
settings.Schemas = sc;
settings.ValidationEventHandler += new ValidationEventHandler(ValidationCallBack);
// Create the XmlReader object.
XmlReader reader = XmlReader.Create("http://venus.eas.asu.edu/WSRepository/xml/Courses.xml", settings);
// Parse the file.
while (reader.Read());
// will call event handler if invalid
Console.WriteLine("The XML file is valid for the given xsd file");
}
 
// Display any validation errors.
private static void ValidationCallBack(object sender, ValidationEventArgs e) {
Console.WriteLine("Validation Error: {0}", e.Message);
}
}
 

F#[edit]

Using an inline stylesheet:

open System.Xml
open System.Xml.Schema
open System.IO
 
let xml = @"<root>
<!--Start of schema-->
<xs:schema id='an-element' targetNamespace='example' xmlns:mstns='example' xmlns='example' xmlns:xs='http://www.w3.org/2001/XMLSchema' attributeFormDefault='unqualified' elementFormDefault='qualified'>
<xs:element name='an-element'>
<xs:complexType>
<xs:sequence minOccurs='0' maxOccurs='unbounded'>
<xs:element name='another-element' nillable='true'>
<xs:complexType>
<xs:simpleContent>
<xs:extension base='xs:string'>
<xs:attribute name='an-attribute' form='unqualified' type='xs:boolean' />
</xs:extension>
</xs:simpleContent>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:schema>
<!--End of schema-->
<an-element xmlns='example'>
<another-element an-attribute='false'>...</another-element>
<another-element an-attribute='wrong'>123</another-element>
</an-element>
</root>"

 
let validationData withWarnings =
let errors = ref 0
let warnings = ref 0
fun input ->
match input with
| Some(msg, severity) ->
if severity = XmlSeverityType.Error then
errors := !errors + 1
printfn "Validation error: %s" msg
elif withWarnings then
warnings := !warnings + 1
printfn "Validation warning: %s" msg
None
| None ->
if withWarnings then
Some(dict[XmlSeverityType.Error, !errors; XmlSeverityType.Warning, !warnings])
else
Some(dict[XmlSeverityType.Error, !errors])
 
[<EntryPoint>]
let main argv =
let withWarnings = argv.Length > 0 && argv.[0] = "-w"
let vData = validationData withWarnings
let validationEvent = new ValidationEventHandler(fun _ e ->
vData (Some(e.Message, e.Severity)) |> ignore)
let settings = new XmlReaderSettings()
settings.ValidationType <- ValidationType.Schema
settings.ValidationEventHandler.AddHandler(validationEvent)
settings.ValidationFlags <- settings.ValidationFlags ||| XmlSchemaValidationFlags.ProcessInlineSchema ||| XmlSchemaValidationFlags.ReportValidationWarnings
let reader = XmlReader.Create(new StringReader(xml), settings);
while reader.Read() do ()
printfn "%A" (Seq.toList (vData None).Value)
0
 
Output:
>RosettaCode
Validation error: The 'an-attribute' element is invalid - The value 'wrong' is invalid according to its datatype 'http://www.w3.org/2001/XMLSchema:boolean' - The string 'wrong' is not a valid boolean value.
[[Error, 1]]

>RosettaCode -w
Validation warning: Could not find schema information for the element 'root'.
Validation error: The 'an-attribute' element is invalid - The value 'wrong' is invalid according to its datatype 'http://www.w3.org/2001/XMLSchema:boolean' - The string 'wrong' is not a valid boolean value.
[[Error, 1]; [Warning, 1]]

Changing wrong to a boolean, e. g. true, The result (without -w) is

[[Error, 0]]

Java[edit]

import static javax.xml.XMLConstants.W3C_XML_SCHEMA_NS_URI;
 
import java.net.MalformedURLException;
import java.net.URL;
 
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.SchemaFactory;
import javax.xml.validation.Validator;
import javax.xml.ws.Holder;
 
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
 
public class XmlValidation {
public static void main(String... args) throws MalformedURLException {
URL schemaLocation = new URL("http://venus.eas.asu.edu/WSRepository/xml/Courses.xsd");
URL documentLocation = new URL("http://venus.eas.asu.edu/WSRepository/xml/Courses.xml");
if (validate(schemaLocation, documentLocation)) {
System.out.println("document is valid");
} else {
System.out.println("document is invalid");
}
}
 
// The least code you need for validation
public static boolean minimalValidate(URL schemaLocation, URL documentLocation) {
SchemaFactory factory = SchemaFactory.newInstance(W3C_XML_SCHEMA_NS_URI);
try {
Validator validator = factory.newSchema(schemaLocation).newValidator();
validator.validate(new StreamSource(documentLocation.toString()));
return true;
} catch (Exception e) {
return false;
}
}
 
// A more complete validator
public static boolean validate(URL schemaLocation, URL documentLocation) {
SchemaFactory factory = SchemaFactory.newInstance(W3C_XML_SCHEMA_NS_URI);
final Holder<Boolean> valid = new Holder<>(true);
try {
Validator validator = factory.newSchema(schemaLocation).newValidator();
// Get some better diagnostics out
validator.setErrorHandler(new ErrorHandler(){
@Override
public void warning(SAXParseException exception) {
System.out.println("warning: " + exception.getMessage());
}
 
@Override
public void error(SAXParseException exception) {
System.out.println("error: " + exception.getMessage());
valid.value = false;
}
 
@Override
public void fatalError(SAXParseException exception) throws SAXException {
System.out.println("fatal error: " + exception.getMessage());
throw exception;
}});
validator.validate(new StreamSource(documentLocation.toString()));
return valid.value;
} catch (SAXException e) {
// Already reported above
return false;
} catch (Exception e) {
// If this is the only thing that throws, it's a gross error
System.err.println(e);
return false;
}
}
}

Kotlin[edit]

Translation of: C
Library: libxml2
Works with: Ubuntu 14.04

Assuming that libxml2 is already installed on your system in the default location(s), you first need to build libxml_schemas.klib using the following .def file and the cinterop tool:

// libxml_schemas.def
headers = /usr/include/libxml2/libxml/xmlschemastypes.h
compilerOpts = -I/usr/include/libxml2
linkerOpts = -L/usr/lib/x86_64-linux-gnu -lxml2

Next, you need to compile the following Kotlin program, linking against libxml_schemas.klib.

// Kotlin Native v0.6
 
import kotlinx.cinterop.*
import platform.posix.*
import libxml_schemas.*
 
fun err(ctx: COpaquePointer?, msg: CPointer<ByteVar>?, extra: CPointer<ByteVar>?) {
val fp = ctx?.reinterpret<FILE>()
fprintf(fp, msg?.toKString(), extra?.toKString())
}
 
fun warn(ctx: COpaquePointer?, msg: CPointer<ByteVar>?, extra: CPointer<ByteVar>?) {
err(ctx, msg, extra)
}
 
fun main(args: Array<String>) {
if (args.size != 2) {
println("You need to pass exactly 2 command line arguments, namely:")
println(" <XML Document Name> <XSD Document Name>")
return
}
 
val xmlFileName = args[0]
val xsdFileName = args[1]
xmlLineNumbersDefault(1)
val ctxt = xmlSchemaNewParserCtxt(xsdFileName)
 
xmlSchemaSetParserErrors(
ctxt,
staticCFunction(::err) as xmlSchemaValidityErrorFunc?,
staticCFunction(::warn) as xmlSchemaValidityWarningFunc?,
stderr
)
 
val schema = xmlSchemaParse(ctxt)
xmlSchemaFreeParserCtxt(ctxt)
 
val doc = xmlReadFile(xmlFileName, null, 0)
if (doc == null) {
println("Could not parse $xmlFileName")
}
else {
val ctxt2 = xmlSchemaNewValidCtxt(schema)
 
xmlSchemaSetValidErrors(
ctxt2,
staticCFunction(::err) as xmlSchemaValidityErrorFunc?,
staticCFunction(::warn) as xmlSchemaValidityWarningFunc?,
stderr
)
 
val ret = xmlSchemaValidateDoc(ctxt2, doc)
if (ret == 0)
println("$xmlFileName validates")
else if (ret > 0)
println("$xmlFileName fails to validate")
else
println("$xmlFileName generated an internal error")
xmlSchemaFreeValidCtxt(ctxt2)
xmlFreeDoc(doc)
}
 
if (schema != null) xmlSchemaFree(schema)
xmlSchemaCleanupTypes()
xmlCleanupParser()
xmlMemoryDump()
}

Finally, the resulting .kexe file should be executed passing it similar command line arguments to the C entry to produce the following output.

$ ./xmlval.kexe shiporder.xml shiporder.xsd
shiporder.xml validates

Perl[edit]

#!/usr/bin/env perl -T
use 5.018_002;
use warnings;
use Try::Tiny;
use XML::LibXML;
 
our $VERSION = 1.000_000;
 
my $parser = XML::LibXML->new();
 
my $good_xml = '<a>5</a>';
my $bad_xml = '<a>5<b>foobar</b></a>';
my $xmlschema_markup = <<'END';
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<xsd:element name="a" type="xsd:integer"/>
</xsd:schema>
END

 
my $xmlschema = XML::LibXML::Schema->new( string => $xmlschema_markup );
 
for ( $good_xml, $bad_xml ) {
my $doc = $parser->parse_string($_);
try {
$xmlschema->validate($doc);
}
finally {
if (@_) {
say "Not valid: @_";
}
else {
say 'Valid';
}
};
}
Output:
Valid
Not valid: unknown-7fe99976a9a0:0: Schemas validity error :
  Element 'a': Element content is not allowed, because the type definition is simple.

Python[edit]

#!/bin/python
from __future__ import print_function
import lxml
from lxml import etree
 
if __name__=="__main__":
 
parser = etree.XMLParser(dtd_validation=True)
schema_root = etree.XML('''\
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<xsd:element name="a" type="xsd:integer"/>
</xsd:schema>
'''
)
schema = etree.XMLSchema(schema_root)
 
#Good xml
parser = etree.XMLParser(schema = schema)
try:
root = etree.fromstring("<a>5</a>", parser)
print ("Finished validating good xml")
except lxml.etree.XMLSyntaxError as err:
print (err)
 
#Bad xml
parser = etree.XMLParser(schema = schema)
try:
root = etree.fromstring("<a>5<b>foobar</b></a>", parser)
except lxml.etree.XMLSyntaxError as err:
print (err)
Output:
Finished validating good xml
Element 'a': Element content is not allowed, because the type definition is simple.

Scala[edit]

import java.net.URL
 
import javax.xml.XMLConstants.W3C_XML_SCHEMA_NS_URI
import javax.xml.transform.stream.StreamSource
import javax.xml.validation.SchemaFactory
import javax.xml.ws.Holder
import org.xml.sax.{ErrorHandler, SAXException, SAXParseException}
 
object XmlValidation extends App {
val (schemaLocation, documentLocation) = (new URL("http://venus.eas.asu.edu/WSRepository/xml/Courses.xsd")
, new URL("http://venus.eas.asu.edu/WSRepository/xml/Courses.xml"))
 
println(s"Document is ${if (validate(schemaLocation, documentLocation)) "valid" else "invalid"}.")
 
// A more complete validator
def validate(schemaLocation: URL, documentLocation: URL): Boolean = {
val factory = SchemaFactory.newInstance(W3C_XML_SCHEMA_NS_URI)
val valid = new Holder[Boolean](true)
try {
val validator = factory.newSchema(schemaLocation).newValidator
// Get some better diagnostics out
validator.setErrorHandler(new ErrorHandler() {
override def warning(exception: SAXParseException) = println("warning: " + exception.getMessage)
 
override def error(exception: SAXParseException) = {
println("error: " + exception.getMessage)
valid.value = false
}
 
override def fatalError(exception: SAXParseException) = {
println("fatal error: " + exception.getMessage)
throw exception
}
})
validator.validate(new StreamSource(documentLocation.toString))
valid.value
} catch {
case _: SAXException =>
// Already reported above
false
case e: Exception =>
// If this is the only thing that throws, it's a gross error
println(e)
false
}
}
}

Sidef[edit]

Translation of: Perl
require('XML::LibXML')
 
func is_valid_xml(str, schema) {
 
var parser = %O<XML::LibXML>.new
var xmlschema = %O<XML::LibXML::Schema>.new(string => schema)
 
try {
xmlschema.validate(parser.parse_string(str))
true
} catch {
false
}
}
 
var good_xml = '<a>5</a>'
var bad_xml = '<a>5<b>foobar</b></a>'
 
var xmlschema_markup = <<'END'
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<xsd:element name="a" type="xsd:integer"/>
</xsd:schema>
END
 
[good_xml, bad_xml].each { |xml|
say "is_valid_xml(#{xml.dump}) : #{is_valid_xml(xml, xmlschema_markup)}"
}
Output:
is_valid_xml("<a>5</a>") : true
is_valid_xml("<a>5<b>foobar</b></a>") : false