Separate the house number from the street name
In Germany and the Netherlands postal addresses have the form: street name, followed by the house number, in accordance with the national standards DIN 5008 respectively NEN 5825. The problem is that some street names have numbers (e.g. special years) and some house numbers have characters as an extension. It's a real life problem and difficult because in the Netherlands some street names are a tribute to our liberators. The street names have the numbers 40 and 45 indicating the years of war between 1940 and 1945.
- Task
Write code that correctly separates the house number from the street name and presents them both. No static data must be shown, only processed data.
The suggested approach is to either use the regular expression in the Scala entry or to devise an equivalent algorithm.
The test-set:
Plataanstraat 5 Straat 12 Straat 12 II Dr. J. Straat 12 Dr. J. Straat 12 a Dr. J. Straat 12-14 Laan 1940 – 1945 37 Plein 1940 2 1213-laan 11 16 april 1944 Pad 1 1e Kruisweg 36 Laan 1940-’45 66 Laan ’40-’45 Langeloërduinen 3 46 Marienwaerdt 2e Dreef 2 Provincialeweg N205 1 Rivium 2e Straat 59. Nieuwe gracht 20rd Nieuwe gracht 20rd 2 Nieuwe gracht 20zw /2 Nieuwe gracht 20zw/3 Nieuwe gracht 20 zw/4 Bahnhofstr. 4 Wertstr. 10 Lindenhof 1 Nordesch 20 Weilstr. 6 Harthauer Weg 2 Mainaustr. 49 August-Horch-Str. 3 Marktplatz 31 Schmidener Weg 3 Karl-Weysser-Str. 6
- Reference The Netherlands - Addresses
11l
F separateHouseNumber(address)
V fields = address.split_py()
V last = fields.last
V penult = fields[(len)-2]
V house = ‘’
I last[0].is_digit()
V isdig = penult[0].is_digit()
I fields.len > 2 & isdig & !penult.starts_with(‘194’)
house = penult‘ ’last
E
house = last
E I fields.len > 2
house = penult‘ ’last
R (address[0 .< address.len - house.len].rtrim(‘ ’), house)
V Addresses = [‘Plataanstraat 5’,
‘Straat 12’,
‘Straat 12 II’,
‘Dr. J. Straat 12’,
‘Dr. J. Straat 12 a’,
‘Dr. J. Straat 12-14’,
‘Laan 1940 - 1945 37’,
‘Plein 1940 2’,
‘1213-laan 11’,
‘16 april 1944 Pad 1’,
‘1e Kruisweg 36’,
‘Laan 1940-'45 66’,
‘Laan '40-'45’,
‘Langeloerduinen 3 46’,
‘Marienwaerdt 2e Dreef 2’,
‘Provincialeweg N205 1’,
‘Rivium 2e Straat 59.’,
‘Nieuwe gracht 20rd’,
‘Nieuwe gracht 20rd 2’,
‘Nieuwe gracht 20zw /2’,
‘Nieuwe gracht 20zw/3’,
‘Nieuwe gracht 20 zw/4’,
‘Bahnhofstr. 4’,
‘Wertstr. 10’,
‘Lindenhof 1’,
‘Nordesch 20’,
‘Weilstr. 6’,
‘Harthauer Weg 2’,
‘Mainaustr. 49’,
‘August-Horch-Str. 3’,
‘Marktplatz 31’,
‘Schmidener Weg 3’,
‘Karl-Weysser-Str. 6’]
print(‘ Street House Number’)
print(‘--------------------- ------------’)
L(address) Addresses
V (street, house) = separateHouseNumber(address)
print(street.rjust(22)‘ ’(I !house.empty {house} E ‘(none)’))
- Output:
Street House Number --------------------- ------------ Plataanstraat 5 Straat 12 Straat 12 II Dr. J. Straat 12 Dr. J. Straat 12 a Dr. J. Straat 12-14 Laan 1940 - 1945 37 Plein 1940 2 1213-laan 11 16 april 1944 Pad 1 1e Kruisweg 36 Laan 1940-'45 66 Laan '40-'45 (none) Langeloerduinen 3 46 Marienwaerdt 2e Dreef 2 Provincialeweg N205 1 Rivium 2e Straat 59. Nieuwe gracht 20rd Nieuwe gracht 20rd 2 Nieuwe gracht 20zw /2 Nieuwe gracht 20zw/3 Nieuwe gracht 20 zw/4 Bahnhofstr. 4 Wertstr. 10 Lindenhof 1 Nordesch 20 Weilstr. 6 Harthauer Weg 2 Mainaustr. 49 August-Horch-Str. 3 Marktplatz 31 Schmidener Weg 3 Karl-Weysser-Str. 6
ALGOL 68
Although Algol 68G has a grep in string
procedure which does regular expression matching, this is non-standard so this sample parses the address line without regular expressions.
BEGIN # separate house numbers from street names in German/Netherlands #
# addresses #
# returns the position of the start of the house number in a #
PROC house position = ( STRING a )INT:
BEGIN
CHAR eol ch = REPR 0;
INT a pos := UPB a;
PROC eol = BOOL: a pos < LWB a OR a pos > UPB a;
PROC ch = ( INT pos )CHAR:
IF pos < LWB a OR pos > UPB a THEN eol ch ELSE a[ pos ] FI;
PROC curr = CHAR: ch( a pos );
PROC prev = CHAR: IF a pos >= LWB a THEN a pos -:= 1; curr ELSE eol ch FI;
PROC next = CHAR: IF a pos <= UPB a THEN a pos +:= 1; curr ELSE eol ch FI;
PROC have = ( CHAR c )BOOL: curr = c;
PROC range = ( CHAR a, z, INT pos )BOOL: ch( pos ) >= a AND ch( pos ) <= z;
PROC digit = ( INT pos )BOOL: range( "0", "9", pos );
WHILE have( " " ) DO prev OD;
IF have( "." ) THEN prev; WHILE have( " " ) DO prev OD FI;
WHILE have( "I" ) DO prev OD;
WHILE have( " " ) DO prev OD;
WHILE range( "a", "z", a pos ) OR digit( a pos )
OR have( " " ) OR have( "." ) OR have( "/" ) OR have( "-" )
DO
prev
OD;
IF have( "'" ) THEN # abbreviated year #
WHILE next;
digit( a pos )
DO SKIP OD
FI;
IF eol THEN # must not be the whole line #
WHILE next;
NOT have( " " ) AND NOT eol
DO SKIP OD
FI;
# must start with a number that doesn't look like a 1940s year #
WHILE WHILE have( " " ) DO next OD;
IF NOT digit( a pos ) THEN
WHILE NOT have( " " ) AND NOT eol DO next OD
FI;
WHILE NOT digit( a pos ) AND NOT eol DO next OD;
ch( a pos ) = "1" AND ch( a pos + 1 ) = "9"
AND ch( a pos + 2 ) = "4" AND digit( a pos + 3 )
DO
WHILE digit( a pos ) DO next OD
OD;
a pos
END # house position # ;
[]STRING test cases
= ( "Plataanstraat 5" , "Straat 12" , "Straat 12 II"
, "Dr. J. Straat 12" , "Dr. J. Straat 12 a" , "Dr. J. Straat 12-14"
, "Laan 1940 - 1945 37" , "Plein 1940 2" , "1213-laan 11"
, "16 april 1944 Pad 1" , "1e Kruisweg 36" , "Laan 1940-'45 66"
, "Laan '40-'45" , "Langeloërduinen 3 46" , "Marienwaerdt 2e Dreef 2"
, "Provincialeweg N205 1", "Rivium 2e Straat 59." , "Nieuwe gracht 20rd"
, "Nieuwe gracht 20rd 2" , "Nieuwe gracht 20zw /2", "Nieuwe gracht 20zw/3"
, "Nieuwe gracht 20 zw/4", "Bahnhofstr. 4" , "Wertstr. 10"
, "Lindenhof 1" , "Nordesch 20" , "Weilstr. 6"
, "Harthauer Weg 2" , "Mainaustr. 49" , "August-Horch-Str. 3"
, "Marktplatz 31" , "Schmidener Weg 3" , "Karl-Weysser-Str. 6"
);
FOR i FROM LWB test cases TO UPB test cases DO
PROC rtrim = ( STRING s )STRING:
BEGIN
INT s end := UPB s;
WHILE IF s end < LWB s THEN FALSE ELSE s[ s end ] = " " FI DO
s end -:= 1
OD;
s[ LWB s : s end ]
END # rtrim # ;
PROC lpad = ( STRING s, INT len )STRING:
IF INT s len = ( UPB s + 1 ) - LWB s;
s len >= len
THEN s
ELSE " " * ( len - s len ) + s
FI # lpad # ;
STRING test = rtrim( test cases[ i ] );
INT h pos = house position( test );
STRING street = IF h pos > UPB test THEN test ELSE test[ LWB test : h pos - 1 ] FI;
STRING house = IF h pos > UPB test THEN "(none)" ELSE test[ h pos : ] FI;
print( ( lpad( rtrim( street ), 40 ), " ", rtrim( house ), newline ) )
OD
END
- Output:
Plataanstraat 5 Straat 12 Straat 12 II Dr. J. Straat 12 Dr. J. Straat 12 a Dr. J. Straat 12-14 Laan 1940 - 1945 37 Plein 1940 2 1213-laan 11 16 april 1944 Pad 1 1e Kruisweg 36 Laan 1940-'45 66 Laan '40-'45 (none) Langeloërduinen 3 46 Marienwaerdt 2e Dreef 2 Provincialeweg N205 1 Rivium 2e Straat 59. Nieuwe gracht 20rd Nieuwe gracht 20rd 2 Nieuwe gracht 20zw /2 Nieuwe gracht 20zw/3 Nieuwe gracht 20 zw/4 Bahnhofstr. 4 Wertstr. 10 Lindenhof 1 Nordesch 20 Weilstr. 6 Harthauer Weg 2 Mainaustr. 49 August-Horch-Str. 3 Marktplatz 31 Schmidener Weg 3 Karl-Weysser-Str. 6
EchoLisp
(lib 'struct)
(lib 'sql)
(define adresses (make-table (struct adresse (name number))))
;; parse street<space>number| II...
(define extractor (make-regexp
"^(.*?)\\s(\\d+$|\\d+.*$|II*$)"))
;; making our best with the special names
(define specials '("1940" "1945" "'45" "'40"))
(define (rep-special str specials)
(for/fold (ostr str) ((special specials))
(string-replace ostr special (string-append "@" special))))
(define (un-rep-special record)
(set-car! record (string-replace (car record) "/@/g" "" )))
(define (task)
(for-each (lambda (x)
(table-insert adresses
(un-rep-special
(or* (regexp-exec extractor (rep-special x specials))
(list x '❓❓❓))))) adressen))
(define adressen '(
"Plataanstraat 5"
"Straat 12"
"Straat 12 II"
"Straat 1940 II"
"Dr. J. Straat 40"
"Dr. J. Straat 12 a"
"Dr. J. Straat 12-14"
"Laan 1940 – 1945 37"
"Plein 1940 2"
"1213-laan 11"
"16 april 1944 Pad 1"
"1e Kruisweg 36"
"Laan 1940-'45 66"
"Laan '40-'45"
"Langeloërduinen 3 46"
"Marienwaerdt 2e Dreef 2"
"Provincialeweg N205 1"
"Rivium 2e Straat 59."
"Nieuwe gracht 20rd"
"Nieuwe gracht 20rd 2"
"Nieuwe gracht 20zw /2"
"Nieuwe gracht 20zw/3"
"Nieuwe gracht 20 zw/4"
"Bahnhofstr. 4"
"Wertstr. 10"
"Lindenhof 1"
"Nordesch 20"
"Weilstr. 6"
"Harthauer Weg 2"
"Mainaustr. 49"
"August-Horch-Str. 3"
"Marktplatz 31"
"Schmidener Weg 3"
"Karl-Weysser-Str. 6"))
- Output:
(task) (table-print adresses) [0] Plataanstraat 5 [1] Straat 12 [2] Straat 12 II [3] Straat 1940 II [4] Dr. J. Straat 40 [5] Dr. J. Straat 12 a [6] Dr. J. Straat 12-14 [7] Laan 1940 – 1945 37 [8] Plein 1940 2 [9] 1213-laan 11 [10] 16 april 1944 Pad 1 [11] 1e Kruisweg 36 [12] Laan 1940-'45 66 [13] Laan '40-'45 ❓❓❓ [14] Langeloërduinen 3 46 [15] Marienwaerdt 2e Dreef 2 [16] Provincialeweg N205 1 [17] Rivium 2e Straat 59. [18] Nieuwe gracht 20rd [19] Nieuwe gracht 20rd 2 [20] Nieuwe gracht 20zw /2 [21] Nieuwe gracht 20zw/3 [22] Nieuwe gracht 20 zw/4 [23] Bahnhofstr. 4 [24] Wertstr. 10 [25] Lindenhof 1 [26] Nordesch 20 [27] Weilstr. 6 [28] Harthauer Weg 2 [29] Mainaustr. 49 [30] August-Horch-Str. 3 [31] Marktplatz 31 [32] Schmidener Weg 3 [33] Karl-Weysser-Str. 6
F#
// Seperate house number and street in Dutch addresses. Nigel Galloway: September 23rd., 2021
let fN g=let n=System.Text.RegularExpressions.Regex.Match(g,@"(\s\d+[-/]\d+)|(\s(?!1940|1945)\d+[a-zI. /]*\d*)$") in if n.Success then Some(g.[0..n.Index],n.Value) else None
let td=["Plataanstraat 5";"Straat 12";"Straat 12 II";"Straat 1940 II";"Dr. J. Straat 40";"Dr. J. Straat 12 a";"Dr. J. Straat 12-14";"Laan 1940 – 1945 37";"Plein 1940 2";"1213-laan 11";"16 april 1944 Pad 1";"1e Kruisweg 36";"Laan 1940-’45 66";"Laan ’40-’45";"Langeloërduinen 3 46";"Marienwaerdt 2e Dreef 2";"Provincialeweg N205 1";"Rivium 2e Straat 59.";"Nieuwe gracht 20rd";"Nieuwe gracht 20rd 2";"Nieuwe gracht 20zw /2";"Nieuwe gracht 20zw/3";"Nieuwe gracht 20 zw/4";"Bahnhofstr. 4";"Wertstr. 10";"Lindenhof 1";"Nordesch 20";"Weilstr. 6";"Harthauer Weg 2";"Mainaustr. 49";"August-Horch-Str. 3";"Marktplatz 31";"Schmidener Weg 3";"Karl-Weysser-Str. 6"]
printfn " Street Number\n ______ ______"
td|>List.iter(fun g->match fN g with Some(n,g)->printfn $"%27s{n.Trim()} %s{g}" |_->printfn $"FAILED %s{g}")
- Output:
Street Number ______ ______ Plataanstraat 5 Straat 12 Straat 12 II FAILED Straat 1940 II Dr. J. Straat 40 Dr. J. Straat 12 a Dr. J. Straat 12-14 Laan 1940 - 1945 37 Plein 1940 2 1213-laan 11 16 april 1944 Pad 1 1e Kruisweg 36 Laan 1940-'45 66 FAILED Laan '40-'45 LangeloÙrduinen 3 46 Marienwaerdt 2e Dreef 2 Provincialeweg N205 1 Rivium 2e Straat 59. Nieuwe gracht 20rd Nieuwe gracht 20rd 2 Nieuwe gracht 20zw /2 Nieuwe gracht 20zw/3 Nieuwe gracht 20 zw/4 Bahnhofstr. 4 Wertstr. 10 Lindenhof 1 Nordesch 20 Weilstr. 6 Harthauer Weg 2 Mainaustr. 49 August-Horch-Str. 3 Marktplatz 31 Schmidener Weg 3 Karl-Weysser-Str. 6
FreeBASIC
#define isDigit(b) b >= Asc("0") And b <= Asc("9")
Sub separateHouseNumber(address As String, Byref street As String, Byref house As String)
Dim length As Integer = Len(address)
Dim size As Integer = 0
Dim fields(100) As String
Dim word As String = ""
For i As Integer = 1 To length
Dim ch As String = Mid(address, i, 1)
If ch = " " Then
If word <> "" Then
fields(size) = word
size += 1
word = ""
End If
Else
word &= ch
End If
Next
If word <> "" Then
fields(size) = word
size += 1
End If
Dim last As String = fields(size - 1)
Dim penult As String = fields(size - 2)
If isDigit(Asc(last, 1)) Then
Dim isdig As Boolean = isDigit(Asc(penult, 1))
If size > 2 And isdig And Not Left(penult, 3) = "194" Then
house = penult & " " & last
Else
house = last
End If
Elseif size > 2 Then
house = penult & " " & last
End If
street = Trim(address)
If Len(house) Then street = Left(street, Len(street) - Len(house) - 1)
End Sub
Dim As String addresses(32) = { _
"Plataanstraat 5", "Straat 12", "Straat 12 II", "Dr. J. Straat 12", _
"Dr. J. Straat 12 a", "Dr. J. Straat 12-14", "Laan 1940 - 1945 37", _
"Plein 1940 2", "1213-laan 11", "16 april 1944 Pad 1", "1e Kruisweg 36", _
"Laan 1940-'45 66", "Laan '40-'45", "Langeloërduinen 3 46", _
"Marienwaerdt 2e Dreef 2", "Provincialeweg N205 1", "Rivium 2e Straat 59.", _
"Nieuwe gracht 20rd", "Nieuwe gracht 20rd 2", "Nieuwe gracht 20zw /2", _
"Nieuwe gracht 20zw/3", "Nieuwe gracht 20 zw/4", "Bahnhofstr. 4", "Wertstr. 10", _
"Lindenhof 1", "Nordesch 20", "Weilstr. 6", "Harthauer Weg 2", "Mainaustr. 49", _
"August-Horch-Str. 3", "Marktplatz 31", "Schmidener Weg 3", "Karl-Weysser-Str. 6" }
Print "Street"; Spc(19); "House Number"
Print String(21, "-"); Spc(4); String(12, "-")
For i As Integer = Lbound(addresses) To Ubound(addresses)
Dim As String street, house
separateHouseNumber(addresses(i), street, house)
If house = "" Then house = "(none)"
Print Using "\ \ &"; street; house
Next
Sleep
- Output:
Same as Go entry.
Go
package main
import (
"fmt"
"strings"
)
func isDigit(b byte) bool {
return '0' <= b && b <= '9'
}
func separateHouseNumber(address string) (street string, house string) {
length := len(address)
fields := strings.Fields(address)
size := len(fields)
last := fields[size-1]
penult := fields[size-2]
if isDigit(last[0]) {
isdig := isDigit(penult[0])
if size > 2 && isdig && !strings.HasPrefix(penult, "194") {
house = fmt.Sprintf("%s %s", penult, last)
} else {
house = last
}
} else if size > 2 {
house = fmt.Sprintf("%s %s", penult, last)
}
street = strings.TrimRight(address[:length-len(house)], " ")
return
}
func main() {
addresses := [...]string{
"Plataanstraat 5",
"Straat 12",
"Straat 12 II",
"Dr. J. Straat 12",
"Dr. J. Straat 12 a",
"Dr. J. Straat 12-14",
"Laan 1940 - 1945 37",
"Plein 1940 2",
"1213-laan 11",
"16 april 1944 Pad 1",
"1e Kruisweg 36",
"Laan 1940-'45 66",
"Laan '40-'45",
"Langeloërduinen 3 46",
"Marienwaerdt 2e Dreef 2",
"Provincialeweg N205 1",
"Rivium 2e Straat 59.",
"Nieuwe gracht 20rd",
"Nieuwe gracht 20rd 2",
"Nieuwe gracht 20zw /2",
"Nieuwe gracht 20zw/3",
"Nieuwe gracht 20 zw/4",
"Bahnhofstr. 4",
"Wertstr. 10",
"Lindenhof 1",
"Nordesch 20",
"Weilstr. 6",
"Harthauer Weg 2",
"Mainaustr. 49",
"August-Horch-Str. 3",
"Marktplatz 31",
"Schmidener Weg 3",
"Karl-Weysser-Str. 6",
}
fmt.Println("Street House Number")
fmt.Println("--------------------- ------------")
for _, address := range addresses {
street, house := separateHouseNumber(address)
if house == "" {
house = "(none)"
}
fmt.Printf("%-22s %s\n", street, house)
}
}
- Output:
Street House Number --------------------- ------------ Plataanstraat 5 Straat 12 Straat 12 II Dr. J. Straat 12 Dr. J. Straat 12 a Dr. J. Straat 12-14 Laan 1940 - 1945 37 Plein 1940 2 1213-laan 11 16 april 1944 Pad 1 1e Kruisweg 36 Laan 1940-'45 66 Laan '40-'45 (none) Langeloërduinen 3 46 Marienwaerdt 2e Dreef 2 Provincialeweg N205 1 Rivium 2e Straat 59. Nieuwe gracht 20rd Nieuwe gracht 20rd 2 Nieuwe gracht 20zw /2 Nieuwe gracht 20zw/3 Nieuwe gracht 20 zw/4 Bahnhofstr. 4 Wertstr. 10 Lindenhof 1 Nordesch 20 Weilstr. 6 Harthauer Weg 2 Mainaustr. 49 August-Horch-Str. 3 Marktplatz 31 Schmidener Weg 3 Karl-Weysser-Str. 6
Haskell
{-# LANGUAGE OverloadedStrings #-}
{- Recommended package versions to use:
base >= 4.7 && < 5
regex-pcre-builtin >= 0.95 && < 0.96
text >= 1.2.3 && < 1.3
-}
module Main where
import Control.Monad
import Data.Char
import Data.Monoid
import qualified Data.Text as T
import qualified Data.Text.IO as T
import Text.Regex.PCRE.Text
testSet :: [T.Text]
testSet =
[ "Plataanstraat 5"
, "Straat 12"
, "Straat 12 II"
, "Dr. J. Straat 12"
, "Dr. J. Straat 12 a"
, "Dr. J. Straat 12-14"
, "Laan 1940 – 1945 37"
, "Plein 1940 2"
, "1213-laan 11"
, "16 april 1944 Pad 1"
, "1e Kruisweg 36"
, "Laan 1940-’45 66"
, "Laan ’40-’45"
, "Langeloërduinen 3 46"
, "Marienwaerdt 2e Dreef 2"
, "Provincialeweg N205 1"
, "Rivium 2e Straat 59."
, "Nieuwe gracht 20rd"
, "Nieuwe gracht 20rd 2"
, "Nieuwe gracht 20zw /2"
, "Nieuwe gracht 20zw/3"
, "Nieuwe gracht 20 zw/4"
, "Bahnhofstr. 4"
, "Wertstr. 10"
, "Lindenhof 1"
, "Nordesch 20"
, "Weilstr. 6"
, "Harthauer Weg 2"
, "Mainaustr. 49"
, "August-Horch-Str. 3"
, "Marktplatz 31"
, "Schmidener Weg 3"
, "Karl-Weysser-Str. 6"
]
-- This is the regex from the Perl implementation of the task.
addressPattern :: T.Text
addressPattern = T.unlines
[ "^ (.*?) \\s+"
, " ("
, " \\d* (\\-|\\/)? \\d*"
, " | \\d{1,3} [a-zI./ ]* \\d{0,3}"
, " )"
, "$"
]
splitAddressASCII :: Regex -> T.Text -> IO (T.Text, T.Text)
splitAddressASCII rx txt = do
result <- regexec rx txt
case result of
Left w -> fail (show w)
Right (Just (_, _, _, (street:house:_))) -> return (street, house)
_ -> return (txt, "")
-- For reasons I don't understand, PCRE isn't handling UTF-8 correctly,
-- even when the compUTF8 option is given. So, hack around it by
-- assuming any non-ASCII characters are in the street name, not the number.
splitAddress :: Regex -> T.Text -> IO (T.Text, T.Text)
splitAddress rx txt = do
let prefix = T.dropWhileEnd isAscii txt
ascii = T.takeWhileEnd isAscii txt
(street, house) <- splitAddressASCII rx ascii
return (prefix <> street, house)
formatPairs :: [(T.Text, T.Text)] -> [T.Text]
formatPairs pairs =
let headings = ("Street", "House Number")
(streets, houses) = unzip (headings : pairs)
sLen = maximum $ map T.length streets
hLen = maximum $ map T.length houses
sep = (T.replicate sLen "-", T.replicate hLen "-")
fmt (s, h) = T.justifyLeft (sLen + 4) ' ' s <> h
in map (T.strip . fmt) (headings : sep : pairs)
main :: IO ()
main = do
erx <- compile (compExtended + compUTF8) execBlank addressPattern
rx <- case erx of
Left (offset, str) -> fail $ show offset ++ ": " ++ str
Right r -> return r
pairs <- mapM (splitAddress rx) testSet
mapM_ T.putStrLn $ formatPairs pairs
- Output:
Street House Number --------------------- ------------ Plataanstraat 5 Straat 12 Straat 12 II Dr. J. Straat 12 Dr. J. Straat 12 a Dr. J. Straat 12-14 Laan 1940 – 1945 37 Plein 1940 2 1213-laan 11 16 april 1944 Pad 1 1e Kruisweg 36 Laan 1940-’45 66 Laan ’40-’45 Langeloërduinen 3 46 Marienwaerdt 2e Dreef 2 Provincialeweg N205 1 Rivium 2e Straat 59. Nieuwe gracht 20rd Nieuwe gracht 20rd 2 Nieuwe gracht 20zw /2 Nieuwe gracht 20zw/3 Nieuwe gracht 20 zw/4 Bahnhofstr. 4 Wertstr. 10 Lindenhof 1 Nordesch 20 Weilstr. 6 Harthauer Weg 2 Mainaustr. 49 August-Horch-Str. 3 Marktplatz 31 Schmidener Weg 3 Karl-Weysser-Str. 6
J
Solution:
special=: '4',.'012345'
digit=: '0123456789'
nope=: {{>./({.I.y=' '),1+I. special +./@:(E."1) y}}
here=: {{I.1,~y e.digit}}
din5008=: ({.;}.)~ here {.@#~ nope < here
Sample data:
sampledata=: ];._2 noun define
Straat 12
Straat 12 II
Dr. J. Straat 12
Dr. J. Straat 12 a
Dr. J. Straat 12-14
Laan 1940 – 1945 37
Plein 1940 2
1213-laan 11
16 april 1944 Pad 1
1e Kruisweg 36
Laan 1940-’45 66
Laan ’40-’45
Langeloërduinen 3 46
Marienwaerdt 2e Dreef 2
Provincialeweg N205 1
Rivium 2e Straat 59.
Nieuwe gracht 20rd
Nieuwe gracht 20rd 2
Nieuwe gracht 20zw /2
Nieuwe gracht 20zw/3
Nieuwe gracht 20 zw/4
Bahnhofstr. 4
Wertstr. 10
Lindenhof 1
Nordesch 20
Weilstr. 6
Harthauer Weg 2
Mainaustr. 49
August-Horch-Str. 3
Marktplatz 31
Schmidener Weg 3
Karl-Weysser-Str. 6
)
Example:
din5008"1 sampledata
┌───────────────────┬────────────────┐
│Straat │12 │
├───────────────────┼────────────────┤
│Straat │12 II │
├───────────────────┼────────────────┤
│Dr. J. Straat │12 │
├───────────────────┼────────────────┤
│Dr. J. Straat │12 a │
├───────────────────┼────────────────┤
│Dr. J. Straat │12-14 │
├───────────────────┼────────────────┤
│Laan 1940 – 1945 │37 │
├───────────────────┼────────────────┤
│Plein 1940 │2 │
├───────────────────┼────────────────┤
│1213-laan │11 │
├───────────────────┼────────────────┤
│16 april 1944 Pad │1 │
├───────────────────┼────────────────┤
│1e Kruisweg │36 │
├───────────────────┼────────────────┤
│Laan 1940-’45 │66 │
├───────────────────┼────────────────┤
│Laan ’40-’45 │ │
├───────────────────┼────────────────┤
│Langeloërduinen │3 46 │
├───────────────────┼────────────────┤
│Marienwaerdt │2e Dreef 2 │
├───────────────────┼────────────────┤
│Provincialeweg N │205 1 │
├───────────────────┼────────────────┤
│Rivium │2e Straat 59. │
├───────────────────┼────────────────┤
│Nieuwe gracht │20rd │
├───────────────────┼────────────────┤
│Nieuwe gracht │20rd 2 │
├───────────────────┼────────────────┤
│Nieuwe gracht │20zw /2 │
├───────────────────┼────────────────┤
│Nieuwe gracht │20zw/3 │
├───────────────────┼────────────────┤
│Nieuwe gracht │20 zw/4 │
├───────────────────┼────────────────┤
│Bahnhofstr. │4 │
├───────────────────┼────────────────┤
│Wertstr. │10 │
├───────────────────┼────────────────┤
│Lindenhof │1 │
├───────────────────┼────────────────┤
│Nordesch │20 │
├───────────────────┼────────────────┤
│Weilstr. │6 │
├───────────────────┼────────────────┤
│Harthauer Weg │2 │
├───────────────────┼────────────────┤
│Mainaustr. │49 │
├───────────────────┼────────────────┤
│August-Horch-Str. │3 │
├───────────────────┼────────────────┤
│Marktplatz │31 │
├───────────────────┼────────────────┤
│Schmidener Weg │3 │
├───────────────────┼────────────────┤
│Karl-Weysser-Str. │6 │
└───────────────────┴────────────────┘
jq
The following uses the regex from the Perl version, together with a regex covering the case of "Laan '40-'45" since that is apparently acceptable.
An alternative would be to use the Scala regex, which in the present context (in particular, using the "x" regex modifier) could be rendered as:
"(?<s>.*) (?<n> (\\s\\d+[-/]\\d+) | (\\s(?!1940|1945)\\d+[a-zI. /]*\\d*)$ | \\d+\\['][40|45]$ )"
def regex:
"^ (?<s>.*?) \\s+"
+ " (?<n>\\d* ( \\-|\\/)? \\d*"
+ " | \\d{1,3} [a-zI./ ]* \\d{0,3}"
+ " )$";
# Output: {s, n}
# If the input cannot be parsed,
# then .s is a copy of the the input, and .n is "(Error)"
def parseStreetNumber:
capture(regex; "x")
// capture( "^(?<s>.*'40 *- *'45) *$" )
// {s: ., n: "(Error)"}
| .n |= if . == "" or . == null then "(none)" else . end ;
def lpad($len): tostring | ($len - length) as $l | (" " * $l)[:$l] + .;
The Task
def addresses:
"Plataanstraat 5",
"Straat 12",
"Straat 12 II",
"Dr. J. Straat 12",
"Dr. J. Straat 12 a",
"Dr. J. Straat 12-14",
"Laan 1940 - 1945 37",
"Plein 1940 2",
"1213-laan 11",
"16 april 1944 Pad 1",
"1e Kruisweg 36",
"Laan 1940-'45 66",
"Laan '40-'45",
"Langeloërduinen 3 46",
"Marienwaerdt 2e Dreef 2",
"Provincialeweg N205 1",
"Rivium 2e Straat 59.",
"Nieuwe gracht 20rd",
"Nieuwe gracht 20rd 2",
"Nieuwe gracht 20zw /2",
"Nieuwe gracht 20zw/3",
"Nieuwe gracht 20 zw/4",
"Bahnhofstr. 4",
"Wertstr. 10",
"Lindenhof 1",
"Nordesch 20",
"Weilstr. 6",
"Harthauer Weg 2",
"Mainaustr. 49",
"August-Horch-Str. 3",
"Marktplatz 31",
"Schmidener Weg 3",
"Karl-Weysser-Str. 6"
;
def task:
"Street House Number",
"--------------------- ------------",
(addresses
| parseStreetNumber
| "\(.s|lpad(22)) \(.n)" ) ;
task
- Output:
Street House Number --------------------- ------------ Plataanstraat 5 Straat 12 Straat 12 II Dr. J. Straat 12 Dr. J. Straat 12 a Dr. J. Straat 12-14 Laan 1940 - 1945 37 Plein 1940 2 1213-laan 11 16 april 1944 Pad 1 1e Kruisweg 36 Laan 1940-'45 66 Laan '40-'45 (none) Langeloërduinen 3 46 Marienwaerdt 2e Dreef 2 Provincialeweg N205 1 Rivium 2e Straat 59. Nieuwe gracht 20rd Nieuwe gracht 20rd 2 Nieuwe gracht 20zw /2 Nieuwe gracht 20zw/3 Nieuwe gracht 20 zw/4 Bahnhofstr. 4 Wertstr. 10 Lindenhof 1 Nordesch 20 Weilstr. 6 Harthauer Weg 2 Mainaustr. 49 August-Horch-Str. 3 Marktplatz 31 Schmidener Weg 3 Karl-Weysser-Str. 6
Julia
Uses the regex from the Perl version.
const regex = r"""^ (.*?) \s+
(
\d* (\-|\/)? \d*
| \d{1,3} [a-zI./ ]* \d{0,3}
)
$"""x
const adressen = """
Plataanstraat 5
Straat 12
Straat 12 II
Dr. J. Straat 12
Dr. J. Straat 12 a
Dr. J. Straat 12-14
Laan 1940 – 1945 37
Plein 1940 2
1213-laan 11
16 april 1944 Pad 1
1e Kruisweg 36
Laan 1940-’45 66
Laan ’40-’45
Langeloërduinen 3 46
Marienwaerdt 2e Dreef 2
Provincialeweg N205 1
Rivium 2e Straat 59.
Nieuwe gracht 20rd
Nieuwe gracht 20rd 2
Nieuwe gracht 20zw /2
Nieuwe gracht 20zw/3
Nieuwe gracht 20 zw/4
Bahnhofstr. 4
Wertstr. 10
Lindenhof 1
Nordesch 20
Weilstr. 6
Harthauer Weg 2
Mainaustr. 49
August-Horch-Str. 3
Marktplatz 31
Schmidener Weg 3
Karl-Weysser-Str. 6"""
for line in strip.(split(adressen, "\n"))
if (matched = match(regex, line)) != nothing
street, number = matched.captures
println(rpad(line, 30), "split as street => $street, number => $number")
else
println(rpad(line, 30), "(Error)")
end
end
- Output:
Plataanstraat 5 split as street => Plataanstraat, number => 5 Straat 12 split as street => Straat, number => 12 Straat 12 II split as street => Straat, number => 12 II Dr. J. Straat 12 split as street => Dr. J. Straat, number => 12 Dr. J. Straat 12 a split as street => Dr. J. Straat, number => 12 a Dr. J. Straat 12-14 split as street => Dr. J. Straat, number => 12-14 Laan 1940 – 1945 37 split as street => Laan 1940 – 1945, number => 37 Plein 1940 2 split as street => Plein 1940, number => 2 1213-laan 11 split as street => 1213-laan, number => 11 16 april 1944 Pad 1 split as street => 16 april 1944 Pad, number => 1 1e Kruisweg 36 split as street => 1e Kruisweg, number => 36 Laan 1940-’45 66 split as street => Laan 1940-’45, number => 66 Laan ’40-’45 (Error) Langeloërduinen 3 46 split as street => Langeloërduinen, number => 3 46 Marienwaerdt 2e Dreef 2 split as street => Marienwaerdt 2e Dreef, number => 2 Provincialeweg N205 1 split as street => Provincialeweg N205, number => 1 Rivium 2e Straat 59. split as street => Rivium 2e Straat, number => 59. Nieuwe gracht 20rd split as street => Nieuwe gracht, number => 20rd Nieuwe gracht 20rd 2 split as street => Nieuwe gracht, number => 20rd 2 Nieuwe gracht 20zw /2 split as street => Nieuwe gracht, number => 20zw /2 Nieuwe gracht 20zw/3 split as street => Nieuwe gracht, number => 20zw/3 Nieuwe gracht 20 zw/4 split as street => Nieuwe gracht, number => 20 zw/4 Bahnhofstr. 4 split as street => Bahnhofstr., number => 4 Wertstr. 10 split as street => Wertstr., number => 10 Lindenhof 1 split as street => Lindenhof, number => 1 Nordesch 20 split as street => Nordesch, number => 20 Weilstr. 6 split as street => Weilstr., number => 6 Harthauer Weg 2 split as street => Harthauer Weg, number => 2 Mainaustr. 49 split as street => Mainaustr., number => 49 August-Horch-Str. 3 split as street => August-Horch-Str., number => 3 Marktplatz 31 split as street => Marktplatz, number => 31 Schmidener Weg 3 split as street => Schmidener Weg, number => 3 Karl-Weysser-Str. 6 split as street => Karl-Weysser-Str., number => 6
Kotlin
// version 1.0.6
val r = Regex("""\s+""")
fun separateHouseNumber(address: String): Pair<String, String> {
val street: String
val house: String
val len = address.length
val splits = address.split(r)
val size = splits.size
val last = splits[size - 1]
val penult = splits[size - 2]
if (last[0] in '0'..'9') {
if (size > 2 && penult[0] in '0'..'9' && !penult.startsWith("194")) house = penult + " " + last
else house = last
}
else if (size > 2) house = penult + " " + last
else house = ""
street = address.take(len - house.length).trimEnd()
return Pair(street, house)
}
fun main(args: Array<String>) {
val addresses = arrayOf(
"Plataanstraat 5",
"Straat 12",
"Straat 12 II",
"Dr. J. Straat 12",
"Dr. J. Straat 12 a",
"Dr. J. Straat 12-14",
"Laan 1940 - 1945 37",
"Plein 1940 2",
"1213-laan 11",
"16 april 1944 Pad 1",
"1e Kruisweg 36",
"Laan 1940-'45 66",
"Laan '40-'45",
"Langeloërduinen 3 46",
"Marienwaerdt 2e Dreef 2",
"Provincialeweg N205 1",
"Rivium 2e Straat 59.",
"Nieuwe gracht 20rd",
"Nieuwe gracht 20rd 2",
"Nieuwe gracht 20zw /2",
"Nieuwe gracht 20zw/3",
"Nieuwe gracht 20 zw/4",
"Bahnhofstr. 4",
"Wertstr. 10",
"Lindenhof 1",
"Nordesch 20",
"Weilstr. 6",
"Harthauer Weg 2",
"Mainaustr. 49",
"August-Horch-Str. 3",
"Marktplatz 31",
"Schmidener Weg 3",
"Karl-Weysser-Str. 6"
)
println("Street House Number")
println("--------------------- ------------")
for (address in addresses) {
val (street, house) = separateHouseNumber(address)
println("${street.padEnd(22)} ${if (house != "") house else "(none)"}")
}
}
- Output:
Street House Number --------------------- ------------ Plataanstraat 5 Straat 12 Straat 12 II Dr. J. Straat 12 Dr. J. Straat 12 a Dr. J. Straat 12-14 Laan 1940 - 1945 37 Plein 1940 2 1213-laan 11 16 april 1944 Pad 1 1e Kruisweg 36 Laan 1940-'45 66 Laan '40-'45 (none) Langeloërduinen 3 46 Marienwaerdt 2e Dreef 2 Provincialeweg N205 1 Rivium 2e Straat 59. Nieuwe gracht 20rd Nieuwe gracht 20rd 2 Nieuwe gracht 20zw /2 Nieuwe gracht 20zw/3 Nieuwe gracht 20 zw/4 Bahnhofstr. 4 Wertstr. 10 Lindenhof 1 Nordesch 20 Weilstr. 6 Harthauer Weg 2 Mainaustr. 49 August-Horch-Str. 3 Marktplatz 31 Schmidener Weg 3 Karl-Weysser-Str. 6
Nim
import strutils except align
from unicode import align
func separateHouseNumber(address: string): tuple[street, house: string] =
let fields = address.splitWhitespace()
let last = fields[^1]
let penult = fields[^2]
if last[0].isDigit():
let isdig = penult[0].isDigit()
if fields.len > 2 and isdig and not penult.startsWith("194"):
result.house = penult & ' ' & last
else:
result.house = last
elif fields.len > 2:
result.house = penult & ' ' & last
result.street = address[0..address.high-result.house.len].strip(leading = false, trailing = true)
const Addresses = ["Plataanstraat 5",
"Straat 12",
"Straat 12 II",
"Dr. J. Straat 12",
"Dr. J. Straat 12 a",
"Dr. J. Straat 12-14",
"Laan 1940 - 1945 37",
"Plein 1940 2",
"1213-laan 11",
"16 april 1944 Pad 1",
"1e Kruisweg 36",
"Laan 1940-'45 66",
"Laan '40-'45",
"Langeloërduinen 3 46",
"Marienwaerdt 2e Dreef 2",
"Provincialeweg N205 1",
"Rivium 2e Straat 59.",
"Nieuwe gracht 20rd",
"Nieuwe gracht 20rd 2",
"Nieuwe gracht 20zw /2",
"Nieuwe gracht 20zw/3",
"Nieuwe gracht 20 zw/4",
"Bahnhofstr. 4",
"Wertstr. 10",
"Lindenhof 1",
"Nordesch 20",
"Weilstr. 6",
"Harthauer Weg 2",
"Mainaustr. 49",
"August-Horch-Str. 3",
"Marktplatz 31",
"Schmidener Weg 3",
"Karl-Weysser-Str. 6"]
echo " Street House Number"
echo "————————————————————— ————————————"
for address in Addresses:
let (street, house) = address.separateHouseNumber()
echo street.align(22), " ", if house.len != 0: house else: "(none)"
- Output:
Street House Number ————————————————————— ———————————— Plataanstraat 5 Straat 12 Straat 12 II Dr. J. Straat 12 Dr. J. Straat 12 a Dr. J. Straat 12-14 Laan 1940 - 1945 37 Plein 1940 2 1213-laan 11 16 april 1944 Pad 1 1e Kruisweg 36 Laan 1940-'45 66 Laan '40-'45 (none) Langeloërduinen 3 46 Marienwaerdt 2e Dreef 2 Provincialeweg N205 1 Rivium 2e Straat 59. Nieuwe gracht 20rd Nieuwe gracht 20rd 2 Nieuwe gracht 20zw /2 Nieuwe gracht 20zw/3 Nieuwe gracht 20 zw/4 Bahnhofstr. 4 Wertstr. 10 Lindenhof 1 Nordesch 20 Weilstr. 6 Harthauer Weg 2 Mainaustr. 49 August-Horch-Str. 3 Marktplatz 31 Schmidener Weg 3 Karl-Weysser-Str. 6
Perl
@addresses = (
'Plataanstraat 5', 'Straat 12', 'Straat 12 II', 'Dr. J. Straat 12',
'Dr. J. Straat 12 a', 'Dr. J. Straat 12-14', 'Laan 1940 – 1945 37', 'Plein 1940 2',
'1213-laan 11', '16 april 1944 Pad 1', '1e Kruisweg 36', 'Laan 1940-’45 66',
'Laan ’40-’45', 'Langeloërduinen 3 46', 'Marienwaerdt 2e Dreef 2', 'Provincialeweg N205 1',
'Rivium 2e Straat 59.', 'Nieuwe gracht 20rd', 'Nieuwe gracht 20rd 2', 'Nieuwe gracht 20zw /2',
'Nieuwe gracht 20zw/3', 'Nieuwe gracht 20 zw/4', 'Bahnhofstr. 4', 'Wertstr. 10',
'Lindenhof 1', 'Nordesch 20', 'Weilstr. 6', 'Harthauer Weg 2',
'Mainaustr. 49', 'August-Horch-Str. 3', 'Marktplatz 31', 'Schmidener Weg 3',
'Karl-Weysser-Str. 6');
for (@addresses) {
my($street,$number) =
m[^ (.*?) \s+
(
\d* (\-|\/)? \d*
| \d{1,3} [a-zI./ ]* \d{0,3}
)
$]x;
$number ? printf "%-26s\t%s\n", ($street, $number) : ($_, "\t(no match)");
}
- Output:
Plataanstraat 5 Straat 12 Straat 12 II Dr. J. Straat 12 Dr. J. Straat 12 a Dr. J. Straat 12-14 Laan 1940 – 1945 37 Plein 1940 2 1213-laan 11 16 april 1944 Pad 1 1e Kruisweg 36 Laan 1940-’45 66 Laan ’40-’45 (no match) Langeloërduinen 3 46 Marienwaerdt 2e Dreef 2 Provincialeweg N205 1 Rivium 2e Straat 59. Nieuwe gracht 20rd Nieuwe gracht 20rd 2 Nieuwe gracht 20zw /2 Nieuwe gracht 20zw/3 Nieuwe gracht 20 zw/4 Bahnhofstr. 4 Wertstr. 10 Lindenhof 1 Nordesch 20 Weilstr. 6 Harthauer Weg 2 Mainaustr. 49 August-Horch-Str. 3 Marktplatz 31 Schmidener Weg 3 Karl-Weysser-Str. 6
Phix
with javascript_semantics function isDigit(integer ch) return ch>='0' and ch<='9' end function function separateHouseNumber(sequence address) address = split(address) string street, house integer h = 0 if length(address)>1 then string last = address[$] if isDigit(last[1]) then h = 1 string penult = address[$-1] if length(address)>2 and isDigit(penult[1]) and match("194",penult)!=1 then h = 2 end if elsif length(address)>2 then h = 2 end if end if if h then street = join(address[1..$-h]) house = join(address[$-h+1..$]) else street = join(address) house = "(none)" end if return {street,house} end function constant addresses = {"Plataanstraat 5", "Straat 12", "Straat 12 II", "Dr. J. Straat 12", "Dr. J. Straat 12 a", "Dr. J. Straat 12-14", "Laan 1940 - 1945 37", "Plein 1940 2", "1213-laan 11", "16 april 1944 Pad 1", "1e Kruisweg 36", "Laan 1940-'45 66", "Laan '40-'45", "Langeloërduinen 3 46", "Marienwaerdt 2e Dreef 2", "Provincialeweg N205 1", "Rivium 2e Straat 59.", "Nieuwe gracht 20rd", "Nieuwe gracht 20rd 2", "Nieuwe gracht 20zw /2", "Nieuwe gracht 20zw/3", "Nieuwe gracht 20 zw/4", "Bahnhofstr. 4", "Wertstr. 10", "Lindenhof 1", "Nordesch 20", "Weilstr. 6", "Harthauer Weg 2", "Mainaustr. 49", "August-Horch-Str. 3", "Marktplatz 31", "Schmidener Weg 3", "Karl-Weysser-Str. 6"} procedure main() printf(1,"Street House Number\n") printf(1,"--------------------- ------------\n") for i=1 to length(addresses) do printf(1,"%-22s %s\n", separateHouseNumber(addresses[i])) end for end procedure main()
- Output:
Street House Number --------------------- ------------ Plataanstraat 5 Straat 12 Straat 12 II Dr. J. Straat 12 Dr. J. Straat 12 a Dr. J. Straat 12-14 Laan 1940 - 1945 37 Plein 1940 2 1213-laan 11 16 april 1944 Pad 1 1e Kruisweg 36 Laan 1940-'45 66 Laan '40-'45 (none) LangeloÙrduinen 3 46 Marienwaerdt 2e Dreef 2 Provincialeweg N205 1 Rivium 2e Straat 59. Nieuwe gracht 20rd Nieuwe gracht 20rd 2 Nieuwe gracht 20zw /2 Nieuwe gracht 20zw/3 Nieuwe gracht 20 zw/4 Bahnhofstr. 4 Wertstr. 10 Lindenhof 1 Nordesch 20 Weilstr. 6 Harthauer Weg 2 Mainaustr. 49 August-Horch-Str. 3 Marktplatz 31 Schmidener Weg 3 Karl-Weysser-Str. 6
Python
Plataanstraat 5 split as (Plataanstraat, 5)
Straat 12 split as (Straat, 12)
Straat 12 II split as (Straat, 12 II)
Dr. J. Straat 12 split as (Dr. J. Straat , 12)
Dr. J. Straat 12 a split as (Dr. J. Straat, 12 a)
Dr. J. Straat 12-14 split as (Dr. J. Straat, 12-14)
Laan 1940 – 1945 37 split as (Laan 1940 – 1945, 37)
Plein 1940 2 split as (Plein 1940, 2)
1213-laan 11 split as (1213-laan, 11)
16 april 1944 Pad 1 split as (16 april 1944 Pad, 1)
1e Kruisweg 36 split as (1e Kruisweg, 36)
Laan 1940-’45 66 split as (Laan 1940-’45, 66)
Laan ’40-’45 split as (Laan ’40-’45,)
Langeloërduinen 3 46 split as (Langeloërduinen, 3 46)
Marienwaerdt 2e Dreef 2 split as (Marienwaerdt 2e Dreef, 2)
Provincialeweg N205 1 split as (Provincialeweg N205, 1)
Rivium 2e Straat 59. split as (Rivium 2e Straat, 59.)
Nieuwe gracht 20rd split as (Nieuwe gracht, 20rd)
Nieuwe gracht 20rd 2 split as (Nieuwe gracht, 20rd 2)
Nieuwe gracht 20zw /2 split as (Nieuwe gracht, 20zw /2)
Nieuwe gracht 20zw/3 split as (Nieuwe gracht, 20zw/3)
Nieuwe gracht 20 zw/4 split as (Nieuwe gracht, 20 zw/4)
Bahnhofstr. 4 split as (Bahnhofstr., 4)
Wertstr. 10 split as (Wertstr., 10)
Lindenhof 1 split as (Lindenhof, 1)
Nordesch 20 split as (Nordesch, 20)
Weilstr. 6 split as (Weilstr., 6)
Harthauer Weg 2 split as (Harthauer Weg, 2)
Mainaustr. 49 split as (Mainaustr., 49)
August-Horch-Str. 3 split as (August-Horch-Str., 3)
Marktplatz 31 split as (Marktplatz, 31)
Schmidener Weg 3 split as (Schmidener Weg, 3)
Karl-Weysser-Str. 6 split as (Karl-Weysser-Str., 6)''')
Racket
Same as other regexp-splittings on this page. (I don't see much point in this, but the related Starting_a_web_browser seems like a good idea.)
#lang racket
(define extractor-rx
(pregexp (string-append "^(.*?)\\s+((?:"
"(?:\\d+[-/]\\d+)"
"|(?:(?!1940|1945)\\d+[a-zI. /]*\\d*)"
")$)")))
(define adressen
"Plataanstraat 5
Straat 12
Straat 12 II
Straat 1940 II
Dr. J. Straat 40
Dr. J. Straat 12 a
Dr. J. Straat 12-14
Laan 1940 – 1945 37
Plein 1940 2
1213-laan 11
16 april 1944 Pad 1
1e Kruisweg 36
Laan 1940-’45 66
Laan ’40-’45
Langeloërduinen 3 46
Marienwaerdt 2e Dreef 2
Provincialeweg N205 1
Rivium 2e Straat 59.
Nieuwe gracht 20rd
Nieuwe gracht 20rd 2
Nieuwe gracht 20zw /2
Nieuwe gracht 20zw/3
Nieuwe gracht 20 zw/4
Bahnhofstr. 4
Wertstr. 10
Lindenhof 1
Nordesch 20
Weilstr. 6
Harthauer Weg 2
Mainaustr. 49
August-Horch-Str. 3
Marktplatz 31
Schmidener Weg 3
Karl-Weysser-Str. 6")
(define (splits-adressen str)
(regexp-match extractor-rx str))
(for ([str (in-list (string-split adressen #rx" *\r?\n *"))])
(printf "~s -> ~s\n" str
(cond [(splits-adressen str) => cdr]
[else '???])))
- Output:
"Plataanstraat 5" -> ("Plataanstraat" "5") "Straat 12" -> ("Straat" "12") "Straat 12 II" -> ("Straat" "12 II") "Straat 1940 II" -> ??? "Dr. J. Straat 40" -> ("Dr. J. Straat" "40") "Dr. J. Straat 12 a" -> ("Dr. J. Straat" "12 a") "Dr. J. Straat 12-14" -> ("Dr. J. Straat" "12-14") "Laan 1940 – 1945 37" -> ("Laan 1940 – 1945" "37") "Plein 1940 2" -> ("Plein 1940" "2") "1213-laan 11" -> ("1213-laan" "11") "16 april 1944 Pad 1" -> ("16 april 1944 Pad" "1") "1e Kruisweg 36" -> ("1e Kruisweg" "36") "Laan 1940-’45 66" -> ("Laan 1940-’45" "66") "Laan ’40-’45" -> ??? "Langeloërduinen 3 46" -> ("Langeloërduinen" "3 46") "Marienwaerdt 2e Dreef 2" -> ("Marienwaerdt 2e Dreef" "2") "Provincialeweg N205 1" -> ("Provincialeweg N205" "1") "Rivium 2e Straat 59." -> ("Rivium 2e Straat" "59.") "Nieuwe gracht 20rd" -> ("Nieuwe gracht" "20rd") "Nieuwe gracht 20rd 2" -> ("Nieuwe gracht" "20rd 2") "Nieuwe gracht 20zw /2" -> ("Nieuwe gracht" "20zw /2") "Nieuwe gracht 20zw/3" -> ("Nieuwe gracht" "20zw/3") "Nieuwe gracht 20 zw/4" -> ("Nieuwe gracht" "20 zw/4") "Bahnhofstr. 4" -> ("Bahnhofstr." "4") "Wertstr. 10" -> ("Wertstr." "10") "Lindenhof 1" -> ("Lindenhof" "1") "Nordesch 20" -> ("Nordesch" "20") "Weilstr. 6" -> ("Weilstr." "6") "Harthauer Weg 2" -> ("Harthauer Weg" "2") "Mainaustr. 49" -> ("Mainaustr." "49") "August-Horch-Str. 3" -> ("August-Horch-Str." "3") "Marktplatz 31" -> ("Marktplatz" "31") "Schmidener Weg 3" -> ("Schmidener Weg" "3") "Karl-Weysser-Str. 6" -> ("Karl-Weysser-Str." "6")
Raku
(formerly Perl 6) An unquestioning translation of the Scala example's regex to show how we lay out such regexes for readability in Raku, except that we take the liberty of leaving the space out of the house number. (Hard constants like 1940 and 1945 are a code smell, and the task should probably not require such constants unless there is a standard to point to that mandates them.) So expect this solution to change if the task is actually defined reasonably, such as by specifying that four-digit house numbers are excluded in Europe. (In contrast, four- and five-digit house numbers are not uncommon in places such as the U.S. where each block gets a hundred house numbers to play with, and there are cities with hundreds of blocks along a street.)
say m[
( .*? )
[
\s+
(
| \d+ [ \- | \/ ] \d+
| <!before 1940 | 1945> \d+ <[ a..z I . / \x20 ]>* \d*
)
]?
$
] for lines;
- Output:
「Plataanstraat 5」 0 => 「Plataanstraat」 1 => 「5」 「Straat 12」 0 => 「Straat」 1 => 「12」 「Straat 12 II」 0 => 「Straat」 1 => 「12 II」 「Dr. J. Straat 12」 0 => 「Dr. J. Straat」 1 => 「12」 「Dr. J. Straat 12 a」 0 => 「Dr. J. Straat」 1 => 「12 a」 「Dr. J. Straat 12-14」 0 => 「Dr. J. Straat」 1 => 「12-14」 「Laan 1940 – 1945 37」 0 => 「Laan 1940 – 1945」 1 => 「37」 「Plein 1940 2」 0 => 「Plein 1940」 1 => 「2」 「1213-laan 11」 0 => 「1213-laan」 1 => 「11」 「16 april 1944 Pad 1」 0 => 「16 april 1944 Pad」 1 => 「1」 「1e Kruisweg 36」 0 => 「1e Kruisweg」 1 => 「36」 「Laan 1940-’45 66」 0 => 「Laan 1940-’45」 1 => 「66」 「Laan ’40-’45」 0 => 「Laan ’40-’45」 「Langeloërduinen 3 46」 0 => 「Langeloërduinen」 1 => 「3 46」 「Marienwaerdt 2e Dreef 2」 0 => 「Marienwaerdt 2e Dreef」 1 => 「2」 「Provincialeweg N205 1」 0 => 「Provincialeweg N205」 1 => 「1」 「Rivium 2e Straat 59.」 0 => 「Rivium 2e Straat」 1 => 「59.」 「Nieuwe gracht 20rd」 0 => 「Nieuwe gracht」 1 => 「20rd」 「Nieuwe gracht 20rd 2」 0 => 「Nieuwe gracht」 1 => 「20rd 2」 「Nieuwe gracht 20zw /2」 0 => 「Nieuwe gracht」 1 => 「20zw /2」 「Nieuwe gracht 20zw/3」 0 => 「Nieuwe gracht」 1 => 「20zw/3」 「Nieuwe gracht 20 zw/4」 0 => 「Nieuwe gracht」 1 => 「20 zw/4」 「Bahnhofstr. 4」 0 => 「Bahnhofstr.」 1 => 「4」 「Wertstr. 10」 0 => 「Wertstr.」 1 => 「10」 「Lindenhof 1」 0 => 「Lindenhof」 1 => 「1」 「Nordesch 20」 0 => 「Nordesch」 1 => 「20」 「Weilstr. 6」 0 => 「Weilstr.」 1 => 「6」 「Harthauer Weg 2」 0 => 「Harthauer Weg」 1 => 「2」 「Mainaustr. 49」 0 => 「Mainaustr.」 1 => 「49」 「August-Horch-Str. 3」 0 => 「August-Horch-Str.」 1 => 「3」 「Marktplatz 31」 0 => 「Marktplatz」 1 => 「31」 「Schmidener Weg 3」 0 => 「Schmidener Weg」 1 => 「3」 「Karl-Weysser-Str. 6」 0 => 「Karl-Weysser-Str.」 1 => 「6」
REXX
/*REXX program splits an European mail address into an address and a house number. */
!= '│' /*a pipe-ish symbol for $ concatenation*/
$= "Plataanstraat 5" ! ,
"Straat 12" ! ,
"Straat 12 II" ! ,
"Dr. J. Straat 12" ! ,
"Dr. J. Straat 12 a" ! ,
"Dr. J. Straat 12-14" ! ,
"Laan 1940 - 1945 37" ! ,
"Plein 1940 2" ! ,
"1213-laan 11" ! ,
"16 april 1944 Pad 1" ! ,
"1e Kruisweg 36" ! ,
"Laan 1940-'45 66" ! ,
"Laan '40-'45" ! ,
"Langeloërduinen 3 46" ! ,
"Provincialeweg N205 1" ! ,
"Rivium 2e Straat 59." ! ,
"Nieuwe gracht 20rd" ! ,
"Nieuwe gracht 20rd 2" ! ,
"Nieuwe gracht 20zw /2" ! ,
"Nieuwe gracht 20zw/3" ! ,
"Nieuwe gracht 20 zw/4" ! ,
"Bahnhofstr. 4" ! ,
"Wertstr. 10" ! ,
"Lindenhof 1" ! ,
"Nordesch 20" ! ,
"Weilstr. 6" ! ,
"Harthauer Weg 2" ! ,
"Mainaustr. 49" ! ,
"August-Horch-Str. 3" ! ,
"Marktplatz 31" ! ,
"Schmidener Weg 3" ! ,
"Karl-Weysser-Str. 6"
$=space($)
w=0
do j=1 until $==''; parse var $ addr '│' $
@.j=space(addr); w=max(w, length(@.j) )
end /*j*/ /* [↑] parse $ string, make @ array.*/
w=w+2 /*expand the width for the display. */
say center('address', w) center('house number', 12)
say center('', w, "═") center('' , 12, "═")
#=j-1 /*define the number of addresses in $.*/
do k=1 for #; sp=split(@.k) /*split each @. address: addr, house#*/
HN=subword(@.k, sp+1); if HN=='' then HN=' (none) ' /*handle a null house#*/
say left( subword(@.k, 1, sp), w) HN
end /*k*/
exit /*stick a fork in it, we're all done. */
/*──────────────────────────────────────────────────────────────────────────────────────*/
split: procedure; parse arg txt; n=words(txt); s=n-1; p=word(txt,s); e=word(txt,n)
if p>1939 & p<1946 | s<2 then p=. ; if verify("'",e,"M")\==0 then return n
pl=verify(0123456789, left(p,1), 'M')\==0
if (verify('/', e, "M")\==0 & pl) | datatype(p, 'W') | ,
(datatype(e, 'N') & pl & \verify("'", p, "M")) then s=s-1
if s==0 then s=n /*if no split, then relocate split to ∞*/
return s /* [↑] indicate where to split the txt*/
- output when using the default (internal) input:
address house number ═══════════════════════ ════════════ Plataanstraat 5 Straat 12 Straat 12 II Dr. J. Straat 12 Dr. J. Straat 12 a Dr. J. Straat 12-14 Laan 1940 - 1945 37 Plein 1940 2 1213-laan 11 16 april 1944 Pad 1 1e Kruisweg 36 Laan 1940-'45 66 Laan '40-'45 (none) Langeloërduinen 3 46 Provincialeweg N205 1 Rivium 2e Straat 59. Nieuwe gracht 20rd Nieuwe gracht 20rd 2 Nieuwe gracht 20zw /2 Nieuwe gracht 20zw/3 Nieuwe gracht 20 zw/4 Bahnhofstr. 4 Wertstr. 10 Lindenhof 1 Nordesch 20 Weilstr. 6 Harthauer Weg 2 Mainaustr. 49 August-Horch-Str. 3 Marktplatz 31 Schmidener Weg 3
RPL
Common apporach
« 34 CHR → s q « "{" q + 1 s SIZE FOR j s j DUP SUB IF DUP " " == THEN DROP q DUP + END + NEXT STR→ » » '→FIELDS' STO @ ( "word1 .. word n" → { "word1" .. "wordn" } ) « DUP →FIELDS SIZE LASTARG REVLIST 1 2 SUB REVLIST EVAL LASTARG 1 « "0123456789" SWAP 1 1 SUB POS » DOLIST @ check first digit of penu and last once for all → size penu last isdigit « IF isdigit 2 GET THEN IF size 2 > isdigit 1 GET AND penu 1 3 SUB "194" ≠ AND THEN penu " " + last + ELSE last END ELSE IF size 2 > THEN penu " " + last + ELSE "" END END IF DUP SIZE THEN SWAP DUP2 SIZE SWAP SIZE - 1 - SWAP 1 ROT SUB SWAP END » » 'SEPADDR' STO @ ( "address" → "street" "house" )
{ "Plataanstraat 5" "Straat 12" "Straat 12 II" "Dr. J. Straat 12" "Dr. J. Straat 12 a" "Dr. J. Straat 12-14" "Laan 1940 - 1945 37" "Plein 1940 2" "1213-laan 11" "16 april 1944 Pad 1" "1e Kruisweg 36" "Laan 1940-'45 66" "Laan '40-'45" "Langeloërduinen 3 46" "Marienwaerdt 2e Dreef 2" "Provincialeweg N205 1" "Rivium 2e Straat 59." "Nieuwe gracht 20rd" "Nieuwe gracht 20rd 2" "Nieuwe gracht 20zw /2" "Nieuwe gracht 20zw/3" "Nieuwe gracht 20 zw/4" "Bahnhofstr. 4" "Wertstr. 10" "Lindenhof 1" "Nordesch 20" "Weilstr. 6" "Harthauer Weg 2" "Mainaustr. 49" "August-Horch-Str. 3" "Marktplatz 31" "Schmidener Weg 3" "Karl-Weysser-Str. 6" }
1 « SEPADDR 2 →LIST » DOLIST
- Output:
1: { { "Plataanstraat" "5" } { "Straat" "12" } { "Straat" "12 II" } { "Dr. J. Straat" "12" } { "Dr. J. Straat" "12 a" } { "Dr. J. Straat" "12-14" } { "Laan 1940 - 1945" "37" } { "Plein 1940" "2" } { "1213-laan" "11" } { "16 april 1944 Pad" "1" } { "1e Kruisweg" "36" } { "Laan 1940-'45" "66" } { "Laan '40-'45" "" } { "Langeloërduinen" "3 46" } { "Marienwaerdt 2e Dreef" "2" } { "Provincialeweg N205" "1" } { "Rivium 2e Straat" "59." } { "Nieuwe gracht" "20rd" } { "Nieuwe gracht" "20rd 2" } { "Nieuwe gracht" "20zw /2" } { "Nieuwe gracht" "20zw/3" } { "Nieuwe gracht" "20 zw/4" } { "Bahnhofstr." "4" } { "Wertstr." "10" } { "Lindenhof" "1" } { "Nordesch" "20" } { "Weilstr." "6" } { "Harthauer Weg" "2" } { "Mainaustr." "49" } { "August-Horch-Str." "3" } { "Marktplatz" "31" } { "Schmidener Weg" "3" } { "Karl-Weysser-Str." "6" } }
Better approach?
This shorter program splits "Langeloërduinen 3 46" into "Langeloërduinen 3" and "46", which seems to be the appropriate way according to this real-life example. Other cases are the same as above.
« DUP SIZE → s j « 1 CF DO { "0123456789" " " } @ scan the address string from the end s j DUP SUB s 'j' DECR DUP SUB 2 →LIST 2 « POS » DOLIST IF ΠLIST THEN 1 SF END UNTIL j 1 == 1 FS? OR END @ until string start or a space character followed by a digit IF 1 FC? THEN s "" ELSE s 1 j 1 - SUB s j 1 + OVER SIZE SUB END » » 'SEPADDR' STO @ ( "address" → "street" "house" )
Scala
object SplitHouseNumber extends App {
val extractor = new scala.util.matching.Regex( """(\s\d+[-/]\d+)|(\s(?!1940|1945)\d+[a-zI. /]*\d*)$|\d+\['][40|45]$""")
def adressen: Iterator[String] =
"""Plataanstraat 5
|Straat 12
|Straat 12 II
|Straat 1940 II
|Dr. J. Straat 40
|Dr. J. Straat 12 a
|Dr. J. Straat 12-14
|Laan 1940 – 1945 37
|Plein 1940 2
|1213-laan 11
|16 april 1944 Pad 1
|1e Kruisweg 36
|Laan 1940-’45 66
|Laan ’40-’45
|Langeloërduinen 3 46
|Marienwaerdt 2e Dreef 2
|Provincialeweg N205 1
|Rivium 2e Straat 59.
|Nieuwe gracht 20rd
|Nieuwe gracht 20rd 2
|Nieuwe gracht 20zw /2
|Nieuwe gracht 20zw/3
|Nieuwe gracht 20 zw/4
|Bahnhofstr. 4
|Wertstr. 10
|Lindenhof 1
|Nordesch 20
|Weilstr. 6
|Harthauer Weg 2
|Mainaustr. 49
|August-Horch-Str. 3
|Marktplatz 31
|Schmidener Weg 3
|Karl-Weysser-Str. 6""".stripMargin.lines
def splitsAdressen(input: String): (String, String) =
(extractor.split(input).mkString, extractor.findFirstIn(input).getOrElse(""))
adressen.foreach(s => println(f"$s%-25s split as ${splitsAdressen(s)}"))
}
- Output:
Plataanstraat 5 split as (Plataanstraat, 5) Straat 12 split as (Straat, 12) Straat 12 II split as (Straat, 12 II) Dr. J. Straat 12 split as (Dr. J. Straat , 12) Dr. J. Straat 12 a split as (Dr. J. Straat, 12 a) Dr. J. Straat 12-14 split as (Dr. J. Straat, 12-14) Laan 1940 – 1945 37 split as (Laan 1940 – 1945, 37) Plein 1940 2 split as (Plein 1940, 2) 1213-laan 11 split as (1213-laan, 11) 16 april 1944 Pad 1 split as (16 april 1944 Pad, 1) 1e Kruisweg 36 split as (1e Kruisweg, 36) Laan 1940-’45 66 split as (Laan 1940-’45, 66) Laan ’40-’45 split as (Laan ’40-’45,) Langeloërduinen 3 46 split as (Langeloërduinen, 3 46) Marienwaerdt 2e Dreef 2 split as (Marienwaerdt 2e Dreef, 2) Provincialeweg N205 1 split as (Provincialeweg N205, 1) Rivium 2e Straat 59. split as (Rivium 2e Straat, 59.) Nieuwe gracht 20rd split as (Nieuwe gracht, 20rd) Nieuwe gracht 20rd 2 split as (Nieuwe gracht, 20rd 2) Nieuwe gracht 20zw /2 split as (Nieuwe gracht, 20zw /2) Nieuwe gracht 20zw/3 split as (Nieuwe gracht, 20zw/3) Nieuwe gracht 20 zw/4 split as (Nieuwe gracht, 20 zw/4) Bahnhofstr. 4 split as (Bahnhofstr., 4) Wertstr. 10 split as (Wertstr., 10) Lindenhof 1 split as (Lindenhof, 1) Nordesch 20 split as (Nordesch, 20) Weilstr. 6 split as (Weilstr., 6) Harthauer Weg 2 split as (Harthauer Weg, 2) Mainaustr. 49 split as (Mainaustr., 49) August-Horch-Str. 3 split as (August-Horch-Str., 3) Marktplatz 31 split as (Marktplatz, 31) Schmidener Weg 3 split as (Schmidener Weg, 3) Karl-Weysser-Str. 6 split as (Karl-Weysser-Str., 6)
Sidef
var re = %r[
( .*? )
(?:
\s+
(
| \d+ (?: \- | \/ ) \d+
| (?! 1940 | 1945) \d+ [ a-z I . / \x20 ]* \d*
)
)?
$]x
ARGF.each { |line|
line.chomp!
if (var m = line.match(re)) {
printf("%-25s split as (#{m[0]}, #{m[1]})\n", line)
}
else {
warn "Can't parse: «#{line}»"
}
}
- Output:
Plataanstraat 5 split as (Plataanstraat, 5) Straat 12 split as (Straat, 12) Straat 12 II split as (Straat, 12 II) Dr. J. Straat 12 split as (Dr. J. Straat, 12) Dr. J. Straat 12 a split as (Dr. J. Straat, 12 a) Dr. J. Straat 12-14 split as (Dr. J. Straat, 12-14) Laan 1940 – 1945 37 split as (Laan 1940 – 1945, 37) Plein 1940 2 split as (Plein 1940, 2) 1213-laan 11 split as (1213-laan, 11) 16 april 1944 Pad 1 split as (16 april 1944 Pad, 1) 1e Kruisweg 36 split as (1e Kruisweg, 36) Laan 1940-’45 66 split as (Laan 1940-’45, 66) Laan ’40-’45 split as (Laan ’40-’45, ) Langeloërduinen 3 46 split as (Langeloërduinen, 3 46) Marienwaerdt 2e Dreef 2 split as (Marienwaerdt 2e Dreef, 2) Provincialeweg N205 1 split as (Provincialeweg N205, 1) Rivium 2e Straat 59. split as (Rivium 2e Straat, 59.) Nieuwe gracht 20rd split as (Nieuwe gracht, 20rd) Nieuwe gracht 20rd 2 split as (Nieuwe gracht, 20rd 2) Nieuwe gracht 20zw /2 split as (Nieuwe gracht, 20zw /2) Nieuwe gracht 20zw/3 split as (Nieuwe gracht, 20zw/3) Nieuwe gracht 20 zw/4 split as (Nieuwe gracht, 20 zw/4) Bahnhofstr. 4 split as (Bahnhofstr., 4) Wertstr. 10 split as (Wertstr., 10) Lindenhof 1 split as (Lindenhof, 1) Nordesch 20 split as (Nordesch, 20) Weilstr. 6 split as (Weilstr., 6) Harthauer Weg 2 split as (Harthauer Weg, 2) Mainaustr. 49 split as (Mainaustr., 49) August-Horch-Str. 3 split as (August-Horch-Str., 3) Marktplatz 31 split as (Marktplatz, 31) Schmidener Weg 3 split as (Schmidener Weg, 3) Karl-Weysser-Str. 6 split as (Karl-Weysser-Str., 6)
Tcl
proc split_DE_NL_address {streetAddress} {
set RE {(?x)
^ (.*?) (
(?:\s \d+ [-/] \d+)
|
(?:\s (?!1940|1945)\d+ [a-zI. /]* \d*)
)? $
}
regexp $RE $streetAddress -> str num
return [list [string trim $str] [string trim $num]]
}
set data {
Plataanstraat 5
Straat 12
Straat 12 II
Dr. J. Straat 12
Dr. J. Straat 12 a
Dr. J. Straat 12-14
Laan 1940 – 1945 37
Plein 1940 2
1213-laan 11
16 april 1944 Pad 1
1e Kruisweg 36
Laan 1940-’45 66
Laan ’40-’45
Langeloërduinen 3 46
Marienwaerdt 2e Dreef 2
Provincialeweg N205 1
Rivium 2e Straat 59.
Nieuwe gracht 20rd
Nieuwe gracht 20rd 2
Nieuwe gracht 20zw /2
Nieuwe gracht 20zw/3
Nieuwe gracht 20 zw/4
Bahnhofstr. 4
Wertstr. 10
Lindenhof 1
Nordesch 20
Weilstr. 6
Harthauer Weg 2
Mainaustr. 49
August-Horch-Str. 3
Marktplatz 31
Schmidener Weg 3
Karl-Weysser-Str. 6
}
foreach streetAddress [split $data "\n"] {
set streetAddress [string trim $streetAddress]
if {$streetAddress eq ""} continue
lassign [split_DE_NL_address $streetAddress] str num
puts "split <$streetAddress> as <$str> <$num>"
}
- Output:
split <Plataanstraat 5> as <Plataanstraat> <5> split <Straat 12> as <Straat> <12> split <Straat 12 II> as <Straat> <12 II> split <Dr. J. Straat 12> as <Dr. J. Straat> <12> split <Dr. J. Straat 12 a> as <Dr. J. Straat> <12 a> split <Dr. J. Straat 12-14> as <Dr. J. Straat> <12-14> split <Laan 1940 – 1945 37> as <Laan 1940 – 1945> <37> split <Plein 1940 2> as <Plein 1940> <2> split <1213-laan 11> as <1213-laan> <11> split <16 april 1944 Pad 1> as <16 april 1944 Pad> <1> split <1e Kruisweg 36> as <1e Kruisweg> <36> split <Laan 1940-’45 66> as <Laan 1940-’45> <66> split <Laan ’40-’45> as <Laan ’40-’45> <> split <Langeloërduinen 3 46> as <Langeloërduinen> <3 46> split <Marienwaerdt 2e Dreef 2> as <Marienwaerdt 2e Dreef> <2> split <Provincialeweg N205 1> as <Provincialeweg N205> <1> split <Rivium 2e Straat 59.> as <Rivium 2e Straat> <59.> split <Nieuwe gracht 20rd> as <Nieuwe gracht> <20rd> split <Nieuwe gracht 20rd 2> as <Nieuwe gracht> <20rd 2> split <Nieuwe gracht 20zw /2> as <Nieuwe gracht> <20zw /2> split <Nieuwe gracht 20zw/3> as <Nieuwe gracht> <20zw/3> split <Nieuwe gracht 20 zw/4> as <Nieuwe gracht> <20 zw/4> split <Bahnhofstr. 4> as <Bahnhofstr.> <4> split <Wertstr. 10> as <Wertstr.> <10> split <Lindenhof 1> as <Lindenhof> <1> split <Nordesch 20> as <Nordesch> <20> split <Weilstr. 6> as <Weilstr.> <6> split <Harthauer Weg 2> as <Harthauer Weg> <2> split <Mainaustr. 49> as <Mainaustr.> <49> split <August-Horch-Str. 3> as <August-Horch-Str.> <3> split <Marktplatz 31> as <Marktplatz> <31> split <Schmidener Weg 3> as <Schmidener Weg> <3> split <Karl-Weysser-Str. 6> as <Karl-Weysser-Str.> <6>
TUSCRIPT
$$ MODE DATA
$$ addressen=*
Plataanstraat 5
Straat 12
Straat 12 II
Dr. J. Straat 12
Dr. J. Straat 12 a
Dr. J. Straat 12-14
Laan 1940 - 1945 37
Plein 1940 2
1213-laan 11
16 april 1944 Pad 1
1e Kruisweg 36
Laan 1940-'45 66
Laan '40-'45
Langeloërduinen 3 46
Marienwaerdt 2e Dreef 2
Provincialeweg N205 1
Rivium 2e Straat 59.
Nieuwe gracht 20rd
Nieuwe gracht 20rd 2
Nieuwe gracht 20zw /2
Nieuwe gracht 20zw/3
Nieuwe gracht 20 zw/4
Bahnhofstr. 4
Wertstr. 10
Lindenhof 1
Nordesch 20
Weilstr. 6
Harthauer Weg 2
Mainaustr. 49
August-Horch-Str. 3
Marktplatz 31
Schmidener Weg 3
Karl-Weysser-Str. 6
$$ MODE TUSCRIPT,{}
BUILD S_TABLE regex=*
DATA : {\0}*{]}:
DATA : {\0}*{1-3}[IVXLC]{]}:
DATA :: 194{\0}::
DATA :: '{2}{\0}::
DATA :: {\0}*{\A}::
x= SPLIT(addressen,|regex,street,number)
output=JOIN(street," <--> ",number)
TRACE *output
Output:
TRACE * 50 -*SKRIPTE 202 output = * 1 = Plataanstraat <--> 5 2 = Straat <--> 12 3 = Straat <--> 12 II 4 = Dr. J. Straat <--> 12 5 = Dr. J. Straat <--> 12 a 6 = Dr. J. Straat <--> 12-14 7 = Laan 1940 - 1945 <--> 37 8 = Plein 1940 <--> 2 9 = 1213-laan <--> 11 10 = 16 april 1944 Pad <--> 1 11 = 1e Kruisweg <--> 36 12 = Laan 1940-'45 <--> 66 13 = Laan '40-'45 <--> 14 = Langeloërduinen <--> 3 46 15 = Marienwaerdt 2e Dreef <--> 2 16 = Provincialeweg N205 <--> 1 17 = Rivium 2e Straat <--> 59. 18 = Nieuwe gracht <--> 20rd 19 = Nieuwe gracht <--> 20rd 2 20 = Nieuwe gracht <--> 20zw /2 21 = Nieuwe gracht <--> 20zw/3 22 = Nieuwe gracht <--> 20 zw/4 23 = Bahnhofstr. <--> 4 24 = Wertstr. <--> 10 25 = Lindenhof <--> 1 26 = Nordesch <--> 20 27 = Weilstr. <--> 6 28 = Harthauer Weg <--> 2 29 = Mainaustr. <--> 49 30 = August-Horch-Str. <--> 3 31 = Marktplatz <--> 31 32 = Schmidener Weg <--> 3 33 = Karl-Weysser-Str. <--> 6
Wren
import "./pattern" for Pattern
import "./fmt" for Fmt
var digits = "0123456789"
var p = Pattern.new("+1/s")
var separateHouseNumber = Fn.new { |address|
var len = address.count
var splits = p.splitAll(address)
var size = splits.count
var last = splits[-1]
var penult = splits[-2]
var house
if (digits.contains(last[0])) {
if (size > 2 && digits.contains(penult[0]) && !penult.startsWith("194")) {
house = penult + " " + last
} else {
house = last
}
} else if (size > 2) {
house = penult + " " + last
} else {
house = ""
}
var street = address.take(len - house.count).join().trimEnd()
return [street, house]
}
var addresses = [
"Plataanstraat 5",
"Straat 12",
"Straat 12 II",
"Dr. J. Straat 12",
"Dr. J. Straat 12 a",
"Dr. J. Straat 12-14",
"Laan 1940 - 1945 37",
"Plein 1940 2",
"1213-laan 11",
"16 april 1944 Pad 1",
"1e Kruisweg 36",
"Laan 1940-'45 66",
"Laan '40-'45",
"Langeloërduinen 3 46",
"Marienwaerdt 2e Dreef 2",
"Provincialeweg N205 1",
"Rivium 2e Straat 59.",
"Nieuwe gracht 20rd",
"Nieuwe gracht 20rd 2",
"Nieuwe gracht 20zw /2",
"Nieuwe gracht 20zw/3",
"Nieuwe gracht 20 zw/4",
"Bahnhofstr. 4",
"Wertstr. 10",
"Lindenhof 1",
"Nordesch 20",
"Weilstr. 6",
"Harthauer Weg 2",
"Mainaustr. 49",
"August-Horch-Str. 3",
"Marktplatz 31",
"Schmidener Weg 3",
"Karl-Weysser-Str. 6"
]
System.print("Street House Number")
System.print("--------------------- ------------")
for (address in addresses) {
var res = separateHouseNumber.call(address)
var street = res[0]
var house = res[1]
if (house == "") house = "(none)"
Fmt.print("$-22s $s", street, house)
}
- Output:
Street House Number --------------------- ------------ Plataanstraat 5 Straat 12 Straat 12 II Dr. J. Straat 12 Dr. J. Straat 12 a Dr. J. Straat 12-14 Laan 1940 - 1945 37 Plein 1940 2 1213-laan 11 16 april 1944 Pad 1 1e Kruisweg 36 Laan 1940-'45 66 Laan '40-'45 (none) Langeloërduinen 3 46 Marienwaerdt 2e Dreef 2 Provincialeweg N205 1 Rivium 2e Straat 59. Nieuwe gracht 20rd Nieuwe gracht 20rd 2 Nieuwe gracht 20zw /2 Nieuwe gracht 20zw/3 Nieuwe gracht 20 zw/4 Bahnhofstr. 4 Wertstr. 10 Lindenhof 1 Nordesch 20 Weilstr. 6 Harthauer Weg 2 Mainaustr. 49 August-Horch-Str. 3 Marktplatz 31 Schmidener Weg 3 Karl-Weysser-Str. 6