Anonymous user
NYSIIS: Difference between revisions
Refactored 'Caché ObjectScript' section based on other code examples to reduce footprint.
(→Tcl: Added implementation) |
(Refactored 'Caché ObjectScript' section based on other code examples to reduce footprint.) |
||
Line 6:
=={{header|Caché ObjectScript}}==
Refactored code based on other examples to reduce footprint.
<lang cache>
Class Utils.Phonetic [ Abstract ]
{
ClassMethod Encode(pAlgorithm As %String = "", pName As %String = "", ByRef pCode As %String, pSuffixRem As %Boolean = 1, pTruncate As %Integer = 0) As %Status
{
// check algorithm and name
Set pAlgorithm=$ZConvert(pAlgorithm, "l")
If pAlgorithm="" Quit $$$ERROR($$$GeneralError, "No algorithm specified.")
If $Case(pAlgorithm, "nysiis":1, :0)=0 Quit $$$ERROR($$$GeneralError, "Unknown algorithm specified.")
If $Match(pName, ".*\d.*# no numbers") Quit $$$ERROR($$$GeneralError, "Name cannot contain numerics.")
// remove apostrophes, find punctuation and replace with spaces (exclude hyphens)
Set pName=$Translate(pName, "'")
Set pun=$ZStrip(pName, "*E'P", "-")
Set pName=$Translate(pName, pun, $Justify(" ", $Length(pun)))
Line 83 ⟶ 31:
// - http://en.wikipedia.org/wiki/List_of_post-nominal_letters_(United_Kingdom)
If pSuffixRem {
Set ords=$ListBuild("KG", "LG", "KT", "LT", "GCB", "KCB", "DCB", "CB", "GCMG", "KCMG", "DCMG", "CMG", "DSO
"GCVO", "KCVO", "DCVO", "CVO", "LVO", "MVO", "OM", "ISO", "GBE", "KBE", "DBE", "CBE", "OBE", "MBE", "CH")
Set decs=$ListBuild("VC", "GC", "CGC", "RRC", "DSC", "MC", "DFC", "AFC", "ARRC", "OBI", "IOM")
Set regexp="( )(SNR$|SR$|JNR$|JR$|ESQ$|"_$ListToString(ords, "$|")_"$|"_$ListToString(decs, "$|")_"$|[
Set rem=##class(%Regex.Matcher).%New(regexp, pName)
Set pName=rem.ReplaceAll("")
}
//
Set pName=$ZStrip($Translate(pName, "-", " "), "<=>W")
If $Length($Piece(pName, " "))<2 Quit $$$ERROR($$$GeneralError, "Invalid name.")
// begin algorithm and truncate result, if necessary
Set pCode=""
For piece=1:1:$Length(pName, " ") {
If pAlgorithm="nysiis" Set pCode=pCode_..NYSIIS(pName)
}
If pTruncate {
Set pName=pCode
Line 124 ⟶ 66:
- http://www.dropby.com/indexLF.html?content=/NYSIIS.html
*/
// create regexp matcher instance, remove punctuation and convert all to upper case
Set rem=##class(%Regex.Matcher).%New(" ")
Set rem.Text=$ZConvert($ZStrip(pName, "*P"), "U")
// translate first characters of name:
// => MAC->MCC, KN->N, K->C, PH/PF->FF, SCH->SSS
For rule="^MAC->MCC", "^KN->N", "^K->C", "^(PH|PF)->FF", "SCH->SSS" {
If rem.Locate() Set rem.Text=rem.ReplaceFirst($Piece(rule, "->", 2)) Quit
}
// translate last characters of name:
// => EE/IE->Y, DT/RT/RD/NT/ND->D
For rule="(EE|IE)$->Y", "(DT|RT|RD|NT|ND)$->D" {
Set rem.Pattern=$Piece(rule, "->")
}
// first character of key = first character of name
Set pName1=$Extract(
// translate remaining characters by following rules, incrementing by one character each time:
Line 162 ⟶ 96:
// => W->if previous is vowel, A (A is the only vowel left)
// => add current to key if current is not same as the last key character
Set ptr=0, rules=$ListBuild("EV->AF", "(A|E|I|O|U)->A", "Q->G", "Z->S", "M->N", "KN->N", "K->C",
"SCH->SSS", "PH->FF", "H[^A]", "[^A]H", "AW->A")
While $ListNext(rules, ptr, rule) {
}
Set rem.Text=rem.ReplaceAll($Piece(rule, "->", 2))
}
Set pName=$
// if last character is S, remove it
Line 187 ⟶ 120:
}
}
</lang>
{{out|Examples}}
<pre>
USER>
knight -> NAGT
mitchell -> MATCAL
o'daniel -> ODANAL
brown sr -> BRAN
browne III -> BRAN
browne IV -> BRAN
O'Banion -> OBANAN
Mclaughlin -> MCLAGL[AN]
McCormack -> MCARNA[C]
Chapman -> CHAPNA[N]
Silva -> SALV
McDonald -> MCDANA[LD]
Lawson -> LASAN
Jacobs -> JACAB
Greene -> GRAN
O'Brien -> OBRAN
Morrison -> MARASA[N]
Larson -> LARSAN
Willis -> WAL
Mackenzie -> MCANSY
Carr -> CAR
Lawrence -> LARANC
Matthews -> MAT
Richards -> RACARD
Bishop -> BASAP
Franklin -> FRANCL[AN]
McDaniel -> MCDANA[L]
Harper -> HARPAR
Lynch -> LYNC
Watkins -> WATCAN
Carlson -> CARLSA[N]
Wheeler -> WHALAR
Louis XVI -> L
</pre>
=={{header|Perl 6}}==
|