XML Validation

From Rosetta Code
XML Validation is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.

Given an XML document and an XSD schema definition validate that the document follows the schema described.

C#[edit]

 
using System;
using System.Xml;
using System.Xml.Schema;
using System.IO;
 
public class Test
{
public static void Main()
{
// your code goes here
XmlSchemaSet sc = new XmlSchemaSet();
sc.Add(null, "http://venus.eas.asu.edu/WSRepository/xml/Courses.xsd");
XmlReaderSettings settings = new XmlReaderSettings();
settings.ValidationType = ValidationType.Schema;
settings.Schemas = sc;
settings.ValidationEventHandler += new ValidationEventHandler(ValidationCallBack);
// Create the XmlReader object.
XmlReader reader = XmlReader.Create("http://venus.eas.asu.edu/WSRepository/xml/Courses.xml", settings);
// Parse the file.
while (reader.Read());
// will call event handler if invalid
Console.WriteLine("The XML file is valid for the given xsd file");
}
 
// Display any validation errors.
private static void ValidationCallBack(object sender, ValidationEventArgs e) {
Console.WriteLine("Validation Error: {0}", e.Message);
}
}
 

F#[edit]

Using an inline stylesheet:

open System.Xml
open System.Xml.Schema
open System.IO
 
let xml = @"<root>
<!--Start of schema-->
<xs:schema id='an-element' targetNamespace='example' xmlns:mstns='example' xmlns='example' xmlns:xs='http://www.w3.org/2001/XMLSchema' attributeFormDefault='unqualified' elementFormDefault='qualified'>
<xs:element name='an-element'>
<xs:complexType>
<xs:sequence minOccurs='0' maxOccurs='unbounded'>
<xs:element name='another-element' nillable='true'>
<xs:complexType>
<xs:simpleContent>
<xs:extension base='xs:string'>
<xs:attribute name='an-attribute' form='unqualified' type='xs:boolean' />
</xs:extension>
</xs:simpleContent>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:schema>
<!--End of schema-->
<an-element xmlns='example'>
<another-element an-attribute='false'>...</another-element>
<another-element an-attribute='wrong'>123</another-element>
</an-element>
</root>"

 
let validationData withWarnings =
let errors = ref 0
let warnings = ref 0
fun input ->
match input with
| Some(msg, severity) ->
if severity = XmlSeverityType.Error then
errors := !errors + 1
printfn "Validation error: %s" msg
elif withWarnings then
warnings := !warnings + 1
printfn "Validation warning: %s" msg
None
| None ->
if withWarnings then
Some(dict[XmlSeverityType.Error, !errors; XmlSeverityType.Warning, !warnings])
else
Some(dict[XmlSeverityType.Error, !errors])
 
[<EntryPoint>]
let main argv =
let withWarnings = argv.Length > 0 && argv.[0] = "-w"
let vData = validationData withWarnings
let validationEvent = new ValidationEventHandler(fun _ e ->
vData (Some(e.Message, e.Severity)) |> ignore)
let settings = new XmlReaderSettings()
settings.ValidationType <- ValidationType.Schema
settings.ValidationEventHandler.AddHandler(validationEvent)
settings.ValidationFlags <- settings.ValidationFlags ||| XmlSchemaValidationFlags.ProcessInlineSchema ||| XmlSchemaValidationFlags.ReportValidationWarnings
let reader = XmlReader.Create(new StringReader(xml), settings);
while reader.Read() do ()
printfn "%A" (Seq.toList (vData None).Value)
0
 
Output:
>RosettaCode
Validation error: The 'an-attribute' element is invalid - The value 'wrong' is invalid according to its datatype 'http://www.w3.org/2001/XMLSchema:boolean' - The string 'wrong' is not a valid boolean value.
[[Error, 1]]

>RosettaCode -w
Validation warning: Could not find schema information for the element 'root'.
Validation error: The 'an-attribute' element is invalid - The value 'wrong' is invalid according to its datatype 'http://www.w3.org/2001/XMLSchema:boolean' - The string 'wrong' is not a valid boolean value.
[[Error, 1]; [Warning, 1]]

Changing wrong to a boolean, e. g. true, The result (without -w) is

[[Error, 0]]

Java[edit]

import static javax.xml.XMLConstants.W3C_XML_SCHEMA_NS_URI;
 
import java.net.MalformedURLException;
import java.net.URL;
 
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.SchemaFactory;
import javax.xml.validation.Validator;
import javax.xml.ws.Holder;
 
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
 
public class XmlValidation {
public static void main(String... args) throws MalformedURLException {
URL schemaLocation = new URL("http://venus.eas.asu.edu/WSRepository/xml/Courses.xsd");
URL documentLocation = new URL("http://venus.eas.asu.edu/WSRepository/xml/Courses.xml");
if (validate(schemaLocation, documentLocation)) {
System.out.println("document is valid");
} else {
System.out.println("document is invalid");
}
}
 
// The least code you need for validation
public static boolean minimalValidate(URL schemaLocation, URL documentLocation) {
SchemaFactory factory = SchemaFactory.newInstance(W3C_XML_SCHEMA_NS_URI);
try {
Validator validator = factory.newSchema(schemaLocation).newValidator();
validator.validate(new StreamSource(documentLocation.toString()));
return true;
} catch (Exception e) {
return false;
}
}
 
// A more complete validator
public static boolean validate(URL schemaLocation, URL documentLocation) {
SchemaFactory factory = SchemaFactory.newInstance(W3C_XML_SCHEMA_NS_URI);
final Holder<Boolean> valid = new Holder<>(true);
try {
Validator validator = factory.newSchema(schemaLocation).newValidator();
// Get some better diagnostics out
validator.setErrorHandler(new ErrorHandler(){
@Override
public void warning(SAXParseException exception) {
System.out.println("warning: " + exception.getMessage());
}
 
@Override
public void error(SAXParseException exception) {
System.out.println("error: " + exception.getMessage());
valid.value = false;
}
 
@Override
public void fatalError(SAXParseException exception) throws SAXException {
System.out.println("fatal error: " + exception.getMessage());
throw exception;
}});
validator.validate(new StreamSource(documentLocation.toString()));
return valid.value;
} catch (SAXException e) {
// Already reported above
return false;
} catch (Exception e) {
// If this is the only thing that throws, it's a gross error
System.err.println(e);
return false;
}
}
}

Python[edit]

#!/bin/python
from __future__ import print_function
import lxml
from lxml import etree
 
if __name__=="__main__":
 
parser = etree.XMLParser(dtd_validation=True)
schema_root = etree.XML('''\
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<xsd:element name="a" type="xsd:integer"/>
</xsd:schema>
'''
)
schema = etree.XMLSchema(schema_root)
 
#Good xml
parser = etree.XMLParser(schema = schema)
try:
root = etree.fromstring("<a>5</a>", parser)
print ("Finished validating good xml")
except lxml.etree.XMLSyntaxError as err:
print (err)
 
#Bad xml
parser = etree.XMLParser(schema = schema)
try:
root = etree.fromstring("<a>5<b>foobar</b></a>", parser)
except lxml.etree.XMLSyntaxError as err:
print (err)
Output:
Finished validating good xml
Element 'a': Element content is not allowed, because the type definition is simple.