Separate the house number from the street name: Difference between revisions
Separate the house number from the street name (view source)
Revision as of 11:23, 4 February 2024
, 3 months ago→{{header|Wren}}: Minor tidy
m (→{{header|Phix}}: syntax coloured) |
m (→{{header|Wren}}: Minor tidy) |
||
(4 intermediate revisions by 3 users not shown) | |||
Line 49:
{{trans|Nim}}
<
V fields = address.split_py()
V last = fields.last
Line 102:
L(address) Addresses
V (street, house) = separateHouseNumber(address)
print(street.rjust(22)‘ ’(I !house.empty {house} E ‘(none)’))</
{{out}}
Line 141:
Schmidener Weg 3
Karl-Weysser-Str. 6
</pre>
=={{header|ALGOL 68}}==
Although Algol 68G has a <code>grep in string</code> procedure which does regular expression matching, this is non-standard so this sample parses the address line without regular expressions.
<syntaxhighlight lang="algol68">
BEGIN # separate house numbers from street names in German/Netherlands #
# addresses #
# returns the position of the start of the house number in a #
PROC house position = ( STRING a )INT:
BEGIN
CHAR eol ch = REPR 0;
INT a pos := UPB a;
PROC eol = BOOL: a pos < LWB a OR a pos > UPB a;
PROC ch = ( INT pos )CHAR:
IF pos < LWB a OR pos > UPB a THEN eol ch ELSE a[ pos ] FI;
PROC curr = CHAR: ch( a pos );
PROC prev = CHAR: IF a pos >= LWB a THEN a pos -:= 1; curr ELSE eol ch FI;
PROC next = CHAR: IF a pos <= UPB a THEN a pos +:= 1; curr ELSE eol ch FI;
PROC have = ( CHAR c )BOOL: curr = c;
PROC range = ( CHAR a, z, INT pos )BOOL: ch( pos ) >= a AND ch( pos ) <= z;
PROC digit = ( INT pos )BOOL: range( "0", "9", pos );
WHILE have( " " ) DO prev OD;
IF have( "." ) THEN prev; WHILE have( " " ) DO prev OD FI;
WHILE have( "I" ) DO prev OD;
WHILE have( " " ) DO prev OD;
WHILE range( "a", "z", a pos ) OR digit( a pos )
OR have( " " ) OR have( "." ) OR have( "/" ) OR have( "-" )
DO
prev
OD;
IF have( "'" ) THEN # abbreviated year #
WHILE next;
digit( a pos )
DO SKIP OD
FI;
IF eol THEN # must not be the whole line #
WHILE next;
NOT have( " " ) AND NOT eol
DO SKIP OD
FI;
# must start with a number that doesn't look like a 1940s year #
WHILE WHILE have( " " ) DO next OD;
IF NOT digit( a pos ) THEN
WHILE NOT have( " " ) AND NOT eol DO next OD
FI;
WHILE NOT digit( a pos ) AND NOT eol DO next OD;
ch( a pos ) = "1" AND ch( a pos + 1 ) = "9"
AND ch( a pos + 2 ) = "4" AND digit( a pos + 3 )
DO
WHILE digit( a pos ) DO next OD
OD;
a pos
END # house position # ;
[]STRING test cases
= ( "Plataanstraat 5" , "Straat 12" , "Straat 12 II"
, "Dr. J. Straat 12" , "Dr. J. Straat 12 a" , "Dr. J. Straat 12-14"
, "Laan 1940 - 1945 37" , "Plein 1940 2" , "1213-laan 11"
, "16 april 1944 Pad 1" , "1e Kruisweg 36" , "Laan 1940-'45 66"
, "Laan '40-'45" , "Langeloërduinen 3 46" , "Marienwaerdt 2e Dreef 2"
, "Provincialeweg N205 1", "Rivium 2e Straat 59." , "Nieuwe gracht 20rd"
, "Nieuwe gracht 20rd 2" , "Nieuwe gracht 20zw /2", "Nieuwe gracht 20zw/3"
, "Nieuwe gracht 20 zw/4", "Bahnhofstr. 4" , "Wertstr. 10"
, "Lindenhof 1" , "Nordesch 20" , "Weilstr. 6"
, "Harthauer Weg 2" , "Mainaustr. 49" , "August-Horch-Str. 3"
, "Marktplatz 31" , "Schmidener Weg 3" , "Karl-Weysser-Str. 6"
);
FOR i FROM LWB test cases TO UPB test cases DO
PROC rtrim = ( STRING s )STRING:
BEGIN
INT s end := UPB s;
WHILE IF s end < LWB s THEN FALSE ELSE s[ s end ] = " " FI DO
s end -:= 1
OD;
s[ LWB s : s end ]
END # rtrim # ;
PROC lpad = ( STRING s, INT len )STRING:
IF INT s len = ( UPB s + 1 ) - LWB s;
s len >= len
THEN s
ELSE " " * ( len - s len ) + s
FI # lpad # ;
STRING test = rtrim( test cases[ i ] );
INT h pos = house position( test );
STRING street = IF h pos > UPB test THEN test ELSE test[ LWB test : h pos - 1 ] FI;
STRING house = IF h pos > UPB test THEN "(none)" ELSE test[ h pos : ] FI;
print( ( lpad( rtrim( street ), 40 ), " ", rtrim( house ), newline ) )
OD
END
</syntaxhighlight>
{{out}}
<pre>
Plataanstraat 5
Straat 12
Straat 12 II
Dr. J. Straat 12
Dr. J. Straat 12 a
Dr. J. Straat 12-14
Laan 1940 - 1945 37
Plein 1940 2
1213-laan 11
16 april 1944 Pad 1
1e Kruisweg 36
Laan 1940-'45 66
Laan '40-'45 (none)
Langeloërduinen 3 46
Marienwaerdt 2e Dreef 2
Provincialeweg N205 1
Rivium 2e Straat 59.
Nieuwe gracht 20rd
Nieuwe gracht 20rd 2
Nieuwe gracht 20zw /2
Nieuwe gracht 20zw/3
Nieuwe gracht 20 zw/4
Bahnhofstr. 4
Wertstr. 10
Lindenhof 1
Nordesch 20
Weilstr. 6
Harthauer Weg 2
Mainaustr. 49
August-Horch-Str. 3
Marktplatz 31
Schmidener Weg 3
Karl-Weysser-Str. 6
</pre>
=={{header|EchoLisp}}==
<
(lib 'struct)
(lib 'sql)
Line 208 ⟶ 336:
"Schmidener Weg 3"
"Karl-Weysser-Str. 6"))
</syntaxhighlight>
{{out}}
<pre>
Line 250 ⟶ 378:
=={{header|F_Sharp|F#}}==
<
// Seperate house number and street in Dutch addresses. Nigel Galloway: September 23rd., 2021
let fN g=let n=System.Text.RegularExpressions.Regex.Match(g,@"(\s\d+[-/]\d+)|(\s(?!1940|1945)\d+[a-zI. /]*\d*)$") in if n.Success then Some(g.[0..n.Index],n.Value) else None
Line 256 ⟶ 384:
printfn " Street Number\n ______ ______"
td|>List.iter(fun g->match fN g with Some(n,g)->printfn $"%27s{n.Trim()} %s{g}" |_->printfn $"FAILED %s{g}")
</syntaxhighlight>
{{out}}
<pre>
Line 299 ⟶ 427:
=={{header|Go}}==
{{trans|Kotlin}}
<
import (
Line 375 ⟶ 503:
fmt.Printf("%-22s %s\n", street, house)
}
}</
{{out}}
Line 421 ⟶ 549:
{{Works with|GHC|7.8.3}}
<
{- Recommended package versions to use:
Line 521 ⟶ 649:
Right r -> return r
pairs <- mapM (splitAddress rx) testSet
mapM_ T.putStrLn $ formatPairs pairs</
{{out}}
<pre>
Line 564 ⟶ 692:
'''Solution''':
<
digit=: '0123456789'
nope=: {{>./({.I.y=' '),1+I. special +./@:(E."1) y}}
here=: {{I.1,~y e.digit}}
din5008=: ({.;}.)~ here {.@#~ nope < here</syntaxhighlight>
Sample data:
<
Straat 12
Straat 12 II
Line 606 ⟶ 734:
Schmidener Weg 3
Karl-Weysser-Str. 6
)</
'''Example''':<
┌───────────────────┬────────────────┐
│Straat │12
├───────────────────┼────────────────┤
│Straat │12 II
├───────────────────┼────────────────┤
│Dr. J. Straat │12
├───────────────────┼────────────────┤
│Dr. J. Straat │12 a
├───────────────────┼────────────────┤
│Dr. J. Straat │12-14
├───────────────────┼────────────────┤
│Laan 1940 – 1945
├───────────────────┼────────────────┤
│Plein 1940 │2
├───────────────────┼────────────────┤
├───────────────────┼────────────────┤
│16 april 1944 Pad │1
├───────────────────┼────────────────┤
├───────────────────┼────────────────┤
│Laan 1940-’45
├───────────────────┼────────────────┤
├───────────────────┼────────────────┤
│Langeloërduinen │3 46
├───────────────────┼────────────────┤
│Marienwaerdt │2e Dreef 2
├───────────────────┼────────────────┤
│Provincialeweg N │205 1
├───────────────────┼────────────────┤
│Rivium │2e Straat 59.
├───────────────────┼────────────────┤
│Nieuwe gracht │20rd
├───────────────────┼────────────────┤
│Nieuwe gracht │20rd 2
├───────────────────┼────────────────┤
│Nieuwe gracht │20zw /2
├───────────────────┼────────────────┤
│Nieuwe gracht │20zw/3
├───────────────────┼────────────────┤
│Nieuwe gracht │20 zw/4
├───────────────────┼────────────────┤
│Bahnhofstr. │4
├───────────────────┼────────────────┤
│Wertstr. │10
├───────────────────┼────────────────┤
│Lindenhof │1
├───────────────────┼────────────────┤
│Nordesch │20
├───────────────────┼────────────────┤
│Weilstr. │6
├───────────────────┼────────────────┤
│Harthauer Weg │2
├───────────────────┼────────────────┤
│Mainaustr. │49
├───────────────────┼────────────────┤
│August-Horch-Str. │3
├───────────────────┼────────────────┤
│Marktplatz │31
├───────────────────┼────────────────┤
│Schmidener Weg │3
├───────────────────┼────────────────┤
│Karl-Weysser-Str. │6
└───────────────────┴────────────────┘</syntaxhighlight>
=={{header|jq}}==
Line 685 ⟶ 813:
"(?<s>.*) (?<n> (\\s\\d+[-/]\\d+) | (\\s(?!1940|1945)\\d+[a-zI. /]*\\d*)$ | \\d+\\['][40|45]$ )"
</pre>
<
"^ (?<s>.*?) \\s+"
+ " (?<n>\\d* ( \\-|\\/)? \\d*"
Line 700 ⟶ 828:
| .n |= if . == "" or . == null then "(none)" else . end ;
def lpad($len): tostring | ($len - length) as $l | (" " * $l)[:$l] + .;</
'''The Task'''
<
"Plataanstraat 5",
"Straat 12",
Line 745 ⟶ 873:
| "\(.s|lpad(22)) \(.n)" ) ;
task</
{{out}}
<pre>
Line 787 ⟶ 915:
=={{header|Julia}}==
Uses the regex from the Perl version.
<
(
\d* (\-|\/)? \d*
Line 837 ⟶ 965:
end
end
</
<pre>
Plataanstraat 5 split as street => Plataanstraat, number => 5
Line 875 ⟶ 1,003:
=={{header|Kotlin}}==
<
val r = Regex("""\s+""")
Line 939 ⟶ 1,067:
println("${street.padEnd(22)} ${if (house != "") house else "(none)"}")
}
}</
{{out}}
Line 982 ⟶ 1,110:
=={{header|Nim}}==
{{trans|Go}}
<
from unicode import align
Line 1,037 ⟶ 1,165:
for address in Addresses:
let (street, house) = address.separateHouseNumber()
echo street.align(22), " ", if house.len != 0: house else: "(none)"</
{{out}}
Line 1,077 ⟶ 1,205:
=={{header|Perl}}==
<
'Plataanstraat 5', 'Straat 12', 'Straat 12 II', 'Dr. J. Straat 12',
'Dr. J. Straat 12 a', 'Dr. J. Straat 12-14', 'Laan 1940 – 1945 37', 'Plein 1940 2',
Line 1,097 ⟶ 1,225:
$]x;
$number ? printf "%-26s\t%s\n", ($street, $number) : ($_, "\t(no match)");
}</
{{out}}
<pre style="height:35ex">Plataanstraat 5
Line 1,135 ⟶ 1,263:
=={{header|Phix}}==
{{trans|Go}}
<!--<
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">isDigit</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">)</span>
Line 1,211 ⟶ 1,339:
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #000000;">main</span><span style="color: #0000FF;">()</span>
<!--</
{{out}}
<pre>
Line 1,253 ⟶ 1,381:
=={{header|Python}}==
<!-- ?? missing code ?? -->
<
Plataanstraat 5 split as (Plataanstraat, 5)
Straat 12 split as (Straat, 12)
Line 1,286 ⟶ 1,414:
Marktplatz 31 split as (Marktplatz, 31)
Schmidener Weg 3 split as (Schmidener Weg, 3)
Karl-Weysser-Str. 6 split as (Karl-Weysser-Str., 6)''')</
=={{header|Racket}}==
Line 1,292 ⟶ 1,420:
Same as other regexp-splittings on this page. (I don't see much point in this, but the related [[Starting_a_web_browser]] seems like a good idea.)
<
#lang racket
Line 1,344 ⟶ 1,472:
(cond [(splits-adressen str) => cdr]
[else '???])))
</syntaxhighlight>
{{out}}
Line 1,393 ⟶ 1,521:
in places such as the U.S. where each block gets a hundred house numbers
to play with, and there are cities with hundreds of blocks along a street.)
<syntaxhighlight lang="raku"
( .*? )
Line 1,405 ⟶ 1,533:
$
] for lines;</
{{out}}
<pre>「Plataanstraat 5」
Line 1,541 ⟶ 1,669:
=={{header|REXX}}==
<
!= '│' /*a pipe-ish symbol for $ concatenation*/
$= "Plataanstraat 5" ! ,
Line 1,597 ⟶ 1,725:
(datatype(e, 'N') & pl & \verify("'", p, "M")) then s=s-1
if s==0 then s=n /*if no split, then relocate split to ∞*/
return s /* [↑] indicate where to split the txt*/</
{{out|output|text= when using the default (internal) input:}}
<pre>
Line 1,636 ⟶ 1,764:
=={{header|Scala}}==
<
val extractor = new scala.util.matching.Regex( """(\s\d+[-/]\d+)|(\s(?!1940|1945)\d+[a-zI. /]*\d*)$|\d+\['][40|45]$""")
Line 1,679 ⟶ 1,807:
adressen.foreach(s => println(f"$s%-25s split as ${splitsAdressen(s)}"))
}</
{{out}}
<pre>Plataanstraat 5 split as (Plataanstraat, 5)
Line 1,717 ⟶ 1,845:
=={{header|Sidef}}==
{{trans|Raku}}
<
( .*? )
(?:
Line 1,736 ⟶ 1,864:
warn "Can't parse: «#{line}»"
}
}</
{{out}}
<pre>
Line 1,776 ⟶ 1,904:
=={{header|Tcl}}==
{{trans|Scala}}
<
set RE {(?x)
^ (.*?) (
Line 1,829 ⟶ 1,957:
lassign [split_DE_NL_address $streetAddress] str num
puts "split <$streetAddress> as <$str> <$num>"
}</
{{out}}
<pre>
Line 1,868 ⟶ 1,996:
=={{header|TUSCRIPT}}==
<
$$ MODE DATA
Line 1,919 ⟶ 2,047:
TRACE *output
</syntaxhighlight>
Output:
<pre style='height:30ex;overflow:scroll'>
Line 1,963 ⟶ 2,091:
{{libheader|Wren-pattern}}
{{libheader|Wren-fmt}}
<
import "./fmt" for Fmt
var digits = "0123456789"
Line 2,034 ⟶ 2,162:
if (house == "") house = "(none)"
Fmt.print("$-22s $s", street, house)
}</
{{out}}
|