WiktionaryDumps to words: Difference between revisions

Content added Content deleted
(Work in progress Java example)
(→‎{{header|Java}}: Fixed regex using Pattern.DOTALL)
Line 14: Line 14:
import javax.xml.parsers.SAXParserFactory;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.ParserConfigurationException;

import java.util.regex.Pattern;
import java.util.regex.Matcher;


class MyHandler extends DefaultHandler {
class MyHandler extends DefaultHandler {
Line 24: Line 27:
@Override
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
public void characters(char[] ch, int start, int length) throws SAXException {
String regex = ".*==French==.*";
Pattern pat = Pattern.compile(regex, Pattern.DOTALL);

switch (lastTag) {
switch (lastTag) {
case TITLE:
case TITLE:
Line 30: Line 36:
case TEXT:
case TEXT:
String text = new String(ch, start, length);
String text = new String(ch, start, length);
if (text.matches("(.*)\n==French==\n(.*)")) {
Matcher mat = pat.matcher(text);
if (mat.matches()) {
System.out.println(title);
System.out.println(title);
}
}
Line 39: Line 46:
@Override
@Override
public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException {
public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException {
lastTag = qName;
lastTag = qName;
}
}


@Override
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
public void endElement(String uri, String localName, String qName) throws SAXException {
lastTag = "";
lastTag = "";
}
}
}
}