NYSIIS: Difference between revisions
Content added Content deleted
(Add Julia language) |
(Added C++ example) |
||
Line 10: | Line 10: | ||
* [[Soundex]] |
* [[Soundex]] |
||
<br><br> |
<br><br> |
||
=={{header|C++}}== |
|||
Implementation based on Wikipedia description of the algorithm. |
|||
<lang c> |
|||
#include <iostream> // required for debug code in main() only |
|||
#include <iomanip> // required for debug code in main() only |
|||
#include <string> |
|||
std::string NYSIIS( std::string const& str ) |
|||
{ |
|||
std::string s, out; |
|||
s.reserve( str.length() ); |
|||
for( auto const c : str ) |
|||
{ |
|||
if( c >= 'a' && c <= 'z' ) |
|||
s += c - ('a' - 'A'); |
|||
else if( c >= 'A' && c <= 'Z' ) |
|||
s += c; |
|||
} |
|||
auto replace = []( char const * const from, char const* to, char* const dst ) -> bool |
|||
{ |
|||
auto const n = strlen( from ); |
|||
if( strncmp( from, dst, n ) == 0 ) |
|||
{ |
|||
strncpy( dst, to, n ); |
|||
return true; |
|||
} |
|||
return false; |
|||
}; |
|||
auto multiReplace = []( char const* const* from, char const* to, char* const dst ) -> bool |
|||
{ |
|||
auto const n = strlen( *from ); |
|||
for( ; *from; ++from ) |
|||
if( strncmp( *from, dst, n ) == 0 ) |
|||
{ |
|||
memcpy( dst, to, n ); |
|||
return true; |
|||
} |
|||
return false; |
|||
}; |
|||
auto isVowel = []( char const c ) -> bool |
|||
{ |
|||
return c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U'; |
|||
}; |
|||
size_t n = s.length(); |
|||
replace( "MAC", "MCC", &s[0] ); |
|||
replace( "KN", "NN", &s[0] ); |
|||
replace( "K", "C", &s[0] ); |
|||
char const* const prefix[] = { "PH", "PF", 0 }; |
|||
multiReplace( prefix, "FF", &s[0] ); |
|||
replace( "SCH", "SSS", &s[0] ); |
|||
char const* const suffix1[] = { "EE", "IE", 0 }; |
|||
char const* const suffix2[] = { "DT", "RT", "RD", "NT", "ND", 0 }; |
|||
if( multiReplace( suffix1, "Y", &s[n - 2] ) || multiReplace( suffix2, "D", &s[n - 2] )) |
|||
{ |
|||
s.pop_back(); |
|||
--n; |
|||
} |
|||
out += s[0]; |
|||
char* vowels[] = { "A", "E", "I", "O", "U", 0 }; |
|||
for( unsigned i = 1; i < n; ++i ) |
|||
{ |
|||
char* const c = &s[i]; |
|||
if( !replace( "EV", "AV", c ) ) |
|||
multiReplace( vowels, "A", c ); |
|||
replace( "Q", "G", c ); |
|||
replace( "Z", "S", c ); |
|||
replace( "M", "N", c ); |
|||
if( !replace( "KN", "NN", c )) |
|||
replace( "K", "C", c ); |
|||
replace( "SCH", "SSS", c ); |
|||
replace( "PH", "FF", c ); |
|||
if( *c == 'H' && (!isVowel( s[i - 1] ) || i + 1 >= n || !isVowel( s[i + 1] ))) |
|||
*c = s[i - 1]; |
|||
if( *c == 'W' && isVowel( s[i - 1] )) |
|||
*c = 'A'; |
|||
if( out.back() != *c ) |
|||
out += *c; |
|||
} |
|||
if( out.back() == 'S' || out.back() == 'A' ) |
|||
out.pop_back(); |
|||
n = out.length() - 2; |
|||
if( out[n] == 'A' && out[n + 1] == 'Y' ) |
|||
out = out.substr( 0, n ) + "Y"; |
|||
return out; |
|||
} |
|||
int main() |
|||
{ |
|||
static char const * const names[][2] = { |
|||
{ "Bishop", "BASAP" }, |
|||
{ "Carlson", "CARLSAN" }, |
|||
{ "Carr", "CAR" }, |
|||
{ "Chapman", "CAPNAN" }, |
|||
{ "Franklin", "FRANCLAN" }, |
|||
{ "Greene", "GRAN" }, |
|||
{ "Harper", "HARPAR" }, |
|||
{ "Jacobs", "JACAB" }, |
|||
{ "Larson", "LARSAN" }, |
|||
{ "Lawrence", "LARANC" }, |
|||
{ "Lawson", "LASAN" }, |
|||
{ "Louis, XVI", "LASXV" }, |
|||
{ "Lynch", "LYNC" }, |
|||
{ "Mackenzie", "MCANSY" }, |
|||
{ "Matthews", "MATA" }, |
|||
{ "McCormack", "MCARNAC" }, |
|||
{ "McDaniel", "MCDANAL" }, |
|||
{ "McDonald", "MCDANALD" }, |
|||
{ "Mclaughlin", "MCLAGLAN" }, |
|||
{ "Morrison", "MARASAN" }, |
|||
{ "O'Banion", "OBANAN" }, |
|||
{ "O'Brien", "OBRAN" }, |
|||
{ "Richards", "RACARD" }, |
|||
{ "Silva", "SALV" }, |
|||
{ "Watkins", "WATCAN" }, |
|||
{ "Wheeler", "WALAR" }, |
|||
{ "Willis", "WALA" }, |
|||
{ "brown, sr", "BRANSR" }, |
|||
{ "browne, III", "BRAN" }, |
|||
{ "browne, IV", "BRANAV" }, |
|||
{ "knight", "NAGT" }, |
|||
{ "mitchell", "MATCAL" }, |
|||
{ "o'daniel", "ODANAL" } }; |
|||
for( auto const& name : names ) |
|||
{ |
|||
auto const code = NYSIIS( name[0] ); |
|||
std::cout << std::left << std::setw( 16 ) << name[0] << std::setw( 8 ) << code; |
|||
if( code == std::string( name[1] )) |
|||
std::cout << " ok"; |
|||
else |
|||
std::cout << " ERROR: " << name[1] << " expected"; |
|||
std::cout << std::endl; |
|||
} |
|||
return 0; |
|||
} |
|||
</lang> |
|||
{{out|Example output}} |
|||
<pre> |
|||
Bishop BASAP ok |
|||
Carlson CARLSAN ok |
|||
Carr CAR ok |
|||
Chapman CAPNAN ok |
|||
Franklin FRANCLAN ok |
|||
Greene GRAN ok |
|||
Harper HARPAR ok |
|||
Jacobs JACAB ok |
|||
Larson LARSAN ok |
|||
Lawrence LARANC ok |
|||
Lawson LASAN ok |
|||
Louis, XVI LASXV ok |
|||
Lynch LYNC ok |
|||
Mackenzie MCANSY ok |
|||
Matthews MATA ok |
|||
McCormack MCARNAC ok |
|||
McDaniel MCDANAL ok |
|||
McDonald MCDANALD ok |
|||
Mclaughlin MCLAGLAN ok |
|||
Morrison MARASAN ok |
|||
O'Banion OBANAN ok |
|||
O'Brien OBRAN ok |
|||
Richards RACARD ok |
|||
Silva SALV ok |
|||
Watkins WATCAN ok |
|||
Wheeler WALAR ok |
|||
Willis WALA ok |
|||
brown, sr BRANSR ok |
|||
browne, III BRAN ok |
|||
browne, IV BRANAV ok |
|||
knight NAGT ok |
|||
mitchell MATCAL ok |
|||
o'daniel ODANAL ok |
|||
</pre> |
|||
=={{header|Caché ObjectScript}}== |
=={{header|Caché ObjectScript}}== |