Soundex: Difference between revisions

Rename Perl 6 -> Raku, alphabetize, minor clean-up
(→‎{{header|TMG}}: new section)
(Rename Perl 6 -> Raku, alphabetize, minor clean-up)
Line 116:
16 EXAMPLE E251
</pre>
 
 
=={{header|Ada}}==
Line 630 ⟶ 629:
return 0;
}</lang>
 
=={{header|C++}}==
<lang c>
#include <iostream> // required for debug code in main() only
#include <iomanip> // required for debug code in main() only
#include <string>
 
std::string soundex( char const* s )
{
static char const code[] = { 0, -1, 1, 2, 3, -1, 1, 2, 0, -1, 2, 2, 4, 5, 5, -1, 1, 2, 6, 2, 3, -1, 1, 0, 2, 0, 2, 0, 0, 0, 0, 0 };
 
if( !s || !*s )
return std::string();
 
std::string out( "0000" );
out[0] = (*s >= 'a' && *s <= 'z') ? *s - ('a' - 'A') : *s;
++s;
 
char prev = code[out[0] & 0x1F]; // first letter, though not coded, can still affect next letter: Pfister
for( unsigned i = 1; *s && i < 4; ++s )
{
if( (*s & 0xC0) != 0x40 ) // process only letters in range [0x40 - 0x7F]
continue;
auto const c = code[*s & 0x1F];
if( c == prev )
continue;
 
if( c == -1 )
prev = 0; // vowel as separator
else if( c )
{
out[i] = c + '0';
++i;
prev = c;
}
}
return out;
}
 
int main()
{
static char const * const names[][2] =
{
{"Ashcraft", "A261"},
{"Burroughs", "B620"},
{"Burrows", "B620"},
{"Ekzampul", "E251"},
{"Ellery", "E460"},
{"Euler", "E460"},
{"Example", "E251"},
{"Gauss", "G200"},
{"Ghosh", "G200"},
{"Gutierrez", "G362"},
{"Heilbronn", "H416"},
{"Hilbert", "H416"},
{"Jackson", "J250"},
{"Kant", "K530"},
{"Knuth", "K530"},
{"Ladd", "L300"},
{"Lee", "L000"},
{"Lissajous", "L222"},
{"Lloyd", "L300"},
{"Lukasiewicz", "L222"},
{"O'Hara", "O600"},
{"Pfister", "P236"},
{"Soundex", "S532"},
{"Sownteks", "S532"},
{"Tymczak", "T522"},
{"VanDeusen", "V532"},
{"Washington", "W252"},
{"Wheaton", "W350"}
};
 
for( auto const& name : names )
{
auto const sdx = soundex( name[0] );
std::cout << std::left << std::setw( 16 ) << name[0] << std::setw( 8 ) << sdx << (sdx == name[1] ? " ok" : " ERROR") << std::endl;
}
return 0;
}
 
</lang>
{{out|Example output}}
<pre>
Ashcraft A261 ok
Burroughs B620 ok
Burrows B620 ok
Ekzampul E251 ok
Ellery E460 ok
Euler E460 ok
Example E251 ok
Gauss G200 ok
Ghosh G200 ok
Gutierrez G362 ok
Heilbronn H416 ok
Hilbert H416 ok
Jackson J250 ok
Kant K530 ok
Knuth K530 ok
Ladd L300 ok
Lee L000 ok
Lissajous L222 ok
Lloyd L300 ok
Lukasiewicz L222 ok
O'Hara O600 ok
Pfister P236 ok
Soundex S532 ok
Sownteks S532 ok
Tymczak T522 ok
VanDeusen V532 ok
Washington W252 ok
Wheaton W350 ok
</pre>
 
=={{header|C sharp}}==
Line 876 ⟶ 762:
VanDeusen -> V532 (True)
Ashcraft -> A261 (True)
</pre>
 
=={{header|C++}}==
<lang c>
#include <iostream> // required for debug code in main() only
#include <iomanip> // required for debug code in main() only
#include <string>
 
std::string soundex( char const* s )
{
static char const code[] = { 0, -1, 1, 2, 3, -1, 1, 2, 0, -1, 2, 2, 4, 5, 5, -1, 1, 2, 6, 2, 3, -1, 1, 0, 2, 0, 2, 0, 0, 0, 0, 0 };
 
if( !s || !*s )
return std::string();
 
std::string out( "0000" );
out[0] = (*s >= 'a' && *s <= 'z') ? *s - ('a' - 'A') : *s;
++s;
 
char prev = code[out[0] & 0x1F]; // first letter, though not coded, can still affect next letter: Pfister
for( unsigned i = 1; *s && i < 4; ++s )
{
if( (*s & 0xC0) != 0x40 ) // process only letters in range [0x40 - 0x7F]
continue;
auto const c = code[*s & 0x1F];
if( c == prev )
continue;
 
if( c == -1 )
prev = 0; // vowel as separator
else if( c )
{
out[i] = c + '0';
++i;
prev = c;
}
}
return out;
}
 
int main()
{
static char const * const names[][2] =
{
{"Ashcraft", "A261"},
{"Burroughs", "B620"},
{"Burrows", "B620"},
{"Ekzampul", "E251"},
{"Ellery", "E460"},
{"Euler", "E460"},
{"Example", "E251"},
{"Gauss", "G200"},
{"Ghosh", "G200"},
{"Gutierrez", "G362"},
{"Heilbronn", "H416"},
{"Hilbert", "H416"},
{"Jackson", "J250"},
{"Kant", "K530"},
{"Knuth", "K530"},
{"Ladd", "L300"},
{"Lee", "L000"},
{"Lissajous", "L222"},
{"Lloyd", "L300"},
{"Lukasiewicz", "L222"},
{"O'Hara", "O600"},
{"Pfister", "P236"},
{"Soundex", "S532"},
{"Sownteks", "S532"},
{"Tymczak", "T522"},
{"VanDeusen", "V532"},
{"Washington", "W252"},
{"Wheaton", "W350"}
};
 
for( auto const& name : names )
{
auto const sdx = soundex( name[0] );
std::cout << std::left << std::setw( 16 ) << name[0] << std::setw( 8 ) << sdx << (sdx == name[1] ? " ok" : " ERROR") << std::endl;
}
return 0;
}
 
</lang>
{{out|Example output}}
<pre>
Ashcraft A261 ok
Burroughs B620 ok
Burrows B620 ok
Ekzampul E251 ok
Ellery E460 ok
Euler E460 ok
Example E251 ok
Gauss G200 ok
Ghosh G200 ok
Gutierrez G362 ok
Heilbronn H416 ok
Hilbert H416 ok
Jackson J250 ok
Kant K530 ok
Knuth K530 ok
Ladd L300 ok
Lee L000 ok
Lissajous L222 ok
Lloyd L300 ok
Lukasiewicz L222 ok
O'Hara O600 ok
Pfister P236 ok
Soundex S532 ok
Sownteks S532 ok
Tymczak T522 ok
VanDeusen V532 ok
Washington W252 ok
Wheaton W350 ok
</pre>
 
Line 2,644 ⟶ 2,643:
Example -> E251 -> true
</pre>
 
 
=={{header|Lua}}==
Line 3,291 ⟶ 3,289:
print soundex("Sownteks"), "\n"; # S532
print soundex("Ekzampul"), "\n"; # E251</lang>
 
=={{header|Perl 6}}==
US census algorithm, so "Ashcraft" and "Burroughs" adjusted to match.
We fake up a first consonant in some cases to make up for the fact that we always trim the first numeric code (so that the 'l' of 'Lloyd' is properly deleted).
<lang perl6>sub soundex ($name --> Str) {
my $first = substr($name,0,1).uc;
gather {
take $first;
my $fakefirst = '';
$fakefirst = "de " if $first ~~ /^ <[AEIOUWH]> /;
"$fakefirst$name".lc.trans('wh' => '') ~~ /
^
[
[
| <[ bfpv ]>+ { take 1 }
| <[ cgjkqsxz ]>+ { take 2 }
| <[ dt ]>+ { take 3 }
| <[ l ]>+ { take 4 }
| <[ mn ]>+ { take 5 }
| <[ r ]>+ { take 6 }
]
|| .
]+
$ { take 0,0,0 }
/;
}.flat.[0,2,3,4].join;
}
 
for < Soundex S532
Example E251
Sownteks S532
Ekzampul E251
Euler E460
Gauss G200
Hilbert H416
Knuth K530
Lloyd L300
Lukasiewicz L222
Ellery E460
Ghosh G200
Heilbronn H416
Kant K530
Ladd L300
Lissajous L222
Wheaton W350
Ashcraft A261
Burroughs B620
Burrows B620
O'Hara O600 >
-> $n, $s {
my $s2 = soundex($n);
say $n.fmt("%16s "), $s, $s eq $s2 ?? " OK" !! " NOT OK $s2";
}</lang>
{{out}}
<pre> Soundex S532 OK
Example E251 OK
Sownteks S532 OK
Ekzampul E251 OK
Euler E460 OK
Gauss G200 OK
Hilbert H416 OK
Knuth K530 OK
Lloyd L300 OK
Lukasiewicz L222 OK
Ellery E460 OK
Ghosh G200 OK
Heilbronn H416 OK
Kant K530 OK
Ladd L300 OK
Lissajous L222 OK
Wheaton W350 OK
Ashcraft A261 OK
Burroughs B620 OK
Burrows B620 OK
O'Hara O600 OK</pre>
 
=={{header|Phix}}==
Line 3,930 ⟶ 3,853:
=={{header|Racket}}==
The [http://rosettacode.org/wiki/Soundex#Scheme Scheme solution] runs as is in Racket.
 
=={{header|Raku}}==
(formerly Perl 6)
US census algorithm, so "Ashcraft" and "Burroughs" adjusted to match.
We fake up a first consonant in some cases to make up for the fact that we always trim the first numeric code (so that the 'l' of 'Lloyd' is properly deleted).
<lang perl6>sub soundex ($name --> Str) {
my $first = substr($name,0,1).uc;
gather {
take $first;
my $fakefirst = '';
$fakefirst = "de " if $first ~~ /^ <[AEIOUWH]> /;
"$fakefirst$name".lc.trans('wh' => '') ~~ /
^
[
[
| <[ bfpv ]>+ { take 1 }
| <[ cgjkqsxz ]>+ { take 2 }
| <[ dt ]>+ { take 3 }
| <[ l ]>+ { take 4 }
| <[ mn ]>+ { take 5 }
| <[ r ]>+ { take 6 }
]
|| .
]+
$ { take 0,0,0 }
/;
}.flat.[0,2,3,4].join;
}
 
for < Soundex S532
Example E251
Sownteks S532
Ekzampul E251
Euler E460
Gauss G200
Hilbert H416
Knuth K530
Lloyd L300
Lukasiewicz L222
Ellery E460
Ghosh G200
Heilbronn H416
Kant K530
Ladd L300
Lissajous L222
Wheaton W350
Ashcraft A261
Burroughs B620
Burrows B620
O'Hara O600 >
-> $n, $s {
my $s2 = soundex($n);
say $n.fmt("%16s "), $s, $s eq $s2 ?? " OK" !! " NOT OK $s2";
}</lang>
{{out}}
<pre> Soundex S532 OK
Example E251 OK
Sownteks S532 OK
Ekzampul E251 OK
Euler E460 OK
Gauss G200 OK
Hilbert H416 OK
Knuth K530 OK
Lloyd L300 OK
Lukasiewicz L222 OK
Ellery E460 OK
Ghosh G200 OK
Heilbronn H416 OK
Kant K530 OK
Ladd L300 OK
Lissajous L222 OK
Wheaton W350 OK
Ashcraft A261 OK
Burroughs B620 OK
Burrows B620 OK
O'Hara O600 OK</pre>
 
=={{header|REXX}}==
Line 4,446 ⟶ 4,445:
 
testSoundex()</lang>
 
=={{header|Smalltalk}}==
 
{{works with|Smalltalk/X}}
using a builtin utility:
<lang smalltalk>PhoneticStringUtilities soundexCodeOf: 'Soundex' "-> S532"</lang>
 
=={{header|SNOBOL4}}==
Line 4,492 ⟶ 4,497:
S460 Swhgler
O256 O'Connor</pre>
 
=={{header|Smalltalk}}==
 
{{works with|Smalltalk/X}}
using a builtin utility:
<lang smalltalk>PhoneticStringUtilities soundexCodeOf: 'Soundex' "-> S532"</lang>
 
=={{header|Standard ML}}==
10,333

edits