Jump to content

NYSIIS: Difference between revisions

5,456 bytes added ,  5 years ago
(Added Perl example)
Line 1,209:
Wheeler, WALAR
Louis XVI, L
</pre>
 
=={{header|Phix}}==
{{trans|Go}}
<lang Phix>function isVowel(integer byte)
return find(byte,"AEIOU")!=0
end function
function isRoman(string s)
if s == "" then
return false
end if
for i=1 to length(s) do
if not find(s[i],"IVX") then
return false
end if
end for
return true
end function
function nysiis(string word)
if word == "" then return "" end if
word = upper(word)
sequence ww = split_any(word, ", ", no_empty:=true)
if length(ww)>1 then
string last = ww[$]
if isRoman(last) then
word = word[1..-length(last)-1]
end if
end if
word = substitute_all(word, " ,'-", repeat("",4))
sequence eStrs = {"JR", "JNR", "SR", "SNR"}
for i=1 to length(eStrs) do
string ei = eStrs[i]
integer lei = length(ei)
if length(word)>lei
and word[-lei..$]=ei then
word = word[1..-lei-1]
end if
end for
sequence fStrs = {{"MAC","MCC"}, {"KN","N"}, {"K","C"},
{"PH","FF"}, {"PF","FF"}, {"SCH","SSS"}}
for i=1 to length(fStrs) do
string {fi,rfi} = fStrs[i]
integer lfi = length(fi)
if length(word)>lfi
and word[1..lfi]=fi then
word[1..lfi] = rfi
end if
end for
if length(word)>=2 then
string l2 = word[-2..-1]
if find(l2,{"EE","IE"}) then
word[-2..-1] = "Y"
elsif find(l2,{"DT","RT","RD","NT","ND"}) then
word[-2..-1] = "D"
end if
end if
integer initial = word[1]
string key = word[1..1]
word = word[2..$]
word = substitute_all(word,{"EV","KN","SCH","PH"},
{"AF","N", "SSS","FF"})
string sb = key&word
integer le := length(sb)
for i=2 to le do
switch sb[i] do
case 'E', 'I', 'O', 'U': sb[i] = 'A'
case 'Q': sb[i] = 'G'
case 'Z': sb[i] = 'S'
case 'M': sb[i] = 'N'
case 'K': sb[i] = 'C'
case 'H': if (i> 1 and not isVowel(sb[i-1]))
or (i<le and not isVowel(sb[i+1])) then
sb[i] = sb[i-1]
end if
case 'W': if isVowel(sb[i-1]) then
sb[i] = sb[i-1]
end if
end switch
end for
integer prev := initial
for j=2 to le do
integer c := sb[j]
if prev != c then
key &= c
prev = c
end if
end for
if length(key)>=1 and key[$] == 'S' then key[$ ..$] = "" end if
if length(key)>=2 and key[-2..-1] == "AY" then key[$-1..$] = "Y" end if
if length(key)>=1 and key[$] == 'A' then key[$ ..$] = "" end if
return key
end function
constant tests = {
{ "Bishop", "BASAP" },
{ "Carlson", "CARLSAN" },
{ "Carr", "CAR" },
{ "Chapman", "CAPNAN" },
{ "Franklin", "FRANCLAN" },
{ "Greene", "GRAN" },
{ "Harper", "HARPAR" },
{ "Jacobs", "JACAB" },
{ "Larson", "LARSAN" },
{ "Lawrence", "LARANC" },
{ "Lawson", "LASAN" },
{ "Louis, XVI", "L" }, -- (see note)
{ "Lynch", "LYNC" },
{ "Mackenzie", "MCANSY" },
{ "Matthews", "MAT" }, -- (see note)
{ "May jnr", "MY" },
{ "McCormack", "MCARNAC" },
{ "McDaniel", "MCDANAL" },
{ "McDonald", "MCDANALD" },
{ "Mclaughlin", "MCLAGLAN" },
{ "Morrison", "MARASAN" },
{ "O'Banion", "OBANAN" },
{ "O'Brien", "OBRAN" },
{ "Richards", "RACARD" },
{ "Silva", "SALV" },
{ "Watkins", "WATCAN" },
{ "Wheeler", "WALAR" },
{ "Willis", "WAL" }, -- (see note)
{ "Xi", "X" },
{ "bevan", "BAFAN" },
{ "brown, sr", "BRAN" },
{ "brown sr", "BRAN" },
{ "browne, III", "BRAN" },
{ "browne, IV", "BRAN" },
{ "evans", "EVAN" },
{ "knight", "NAGT" },
{ "mitchell", "MATCAL" },
{ "o'daniel", "ODANAL" },
{ "D'Souza", "DSAS" },
{ "de Sousa", "DASAS" },
{ "Hoyle-Johnson", "HAYLAJANSAN" },
{ "Vaughan Williams", "VAGANWALAN" },
{ "de la Mare II", "DALANAR" } }
 
integer errors = 0
for i=1 to length(tests) do
string {name,expected} = tests[i],
name2 := nysiis(name)
if name2!=expected then
errors += 1
if length(name2) > 6 then
name2 = sprintf("%s(%s)", {name2[1..6], name2[7..$]})
end if
printf(1,"%-16s : %s\n", {name, name2})
end if
end for
printf(1,"All tests completed, %d errors\n",errors)</lang>
Note: After some careful consideration, I have decided that all three (see note) tests <i>are</i> in fact correct, or at least follow wp, specifically step 6 <i>before</i> step 7.
{{out}}
<pre>
All tests completed, 0 errors
</pre>
 
7,805

edits

Cookies help us deliver our services. By using our services, you agree to our use of cookies.