NYSIIS: Difference between revisions

1,105 bytes removed ,  11 years ago
Refactored 'Caché ObjectScript' section based on other code examples to reduce footprint.
(→‎Tcl: Added implementation)
(Refactored 'Caché ObjectScript' section based on other code examples to reduce footprint.)
Line 6:
 
=={{header|Caché ObjectScript}}==
Refactored code based on other examples to reduce footprint.
<lang cache>Class Utils.Phonetic [ Abstract ]
{
 
<lang cache>
ClassMethod UnitTest() As %Status
Class Utils.Phonetic [ Abstract ]
{
// define test list
Set testlist=$ListBuild(
$ListBuild("knight", "NAGT"),
$ListBuild("mitchell", "MATCAL"),
$ListBuild("o'daniel", "ODANAL"),
$ListBuild("brown sr", "BRAN"),
$ListBuild("browne III", "BRAN"),
$ListBuild("browne IV", "BRAN"),
$ListBuild("O'Banion", "OBANAN"),
$ListBuild("Mclaughlin", "MCLAGL[AN]"),
$ListBuild("McCormack", "MCARNA[C]"),
$ListBuild("Chapman", "CHAPNA[N]"),
$ListBuild("Silva", "SALV"),
$ListBuild("McDonald", "MCDANA[LD]"),
$ListBuild("Lawson", "LASAN"),
$ListBuild("Jacobs", "JACAB"),
$ListBuild("Greene", "GRAN"),
$ListBuild("O'Brien", "OBRAN"),
$ListBuild("Morrison", "MARASA[N]"),
$ListBuild("Larson", "LARSAN"),
$ListBuild("Willis", "WAL"),
$ListBuild("Mackenzie", "MCANSY"),
$ListBuild("Carr", "CAR"),
$ListBuild("Lawrence", "LARANC"),
$ListBuild("Matthews", "MAT"),
$ListBuild("Richards", "RACARD"),
$ListBuild("Bishop", "BASAP"),
$ListBuild("Franklin", "FRANCL[AN]"),
$ListBuild("McDaniel", "MCDANA[L]"),
$ListBuild("Harper", "HARPAR"),
$ListBuild("Lynch", "LYNC"),
$ListBuild("Watkins", "WATCAN"),
$ListBuild("Carlson", "CARLSA[N]"),
$ListBuild("Wheeler", "WHALAR")
)
// order through test list
Set ptr=0, sc=$$$ERROR($$$GeneralError, "No entries found.")
While $ListNext(testlist, ptr, val) {
Set sc=##class(Utils.Phonetic).Encode("nysiis", $List(val), .code,, 6)
If $$$ISERR(sc) Quit
If code'=$List(val, 2) Set sc=$$$ERROR($$$GeneralError, "Encoding did not match.") Quit
}
// finished
If $$$ISERR(sc) Quit sc
Quit $$$OK
}
 
ClassMethod Encode(pAlgorithm As %String = "", pName As %String = "", ByRef pCode As %String, pSuffixRem As %Boolean = 1, pTruncate As %Integer = 0) As %Status
{
// check algorithm and name
Set pAlgorithm=$ZConvert(pAlgorithm, "l")
If pAlgorithm="" Quit $$$ERROR($$$GeneralError, "No algorithm specified.")
If $Case(pAlgorithm, "nysiis":1, :0)=0 Quit $$$ERROR($$$GeneralError, "Unknown algorithm specified.")
// check name
If $Match(pName, ".*\d.*# no numbers") Quit $$$ERROR($$$GeneralError, "Name cannot contain numerics.")
// remove apostrophes, find punctuation and replace with spaces (exclude hyphens)
Set pName=$Translate(pName, "'")
// find punctuation and replace with spaces (exclude hyphens)
Set pun=$ZStrip(pName, "*E'P", "-")
Set pName=$Translate(pName, pun, $Justify(" ", $Length(pun)))
Line 83 ⟶ 31:
// - http://en.wikipedia.org/wiki/List_of_post-nominal_letters_(United_Kingdom)
If pSuffixRem {
Set ords=$ListBuild("KG", "LG", "KT", "LT", "GCB", "KCB", "DCB", "CB", "GCMG", "KCMG", "DCMG", "CMG", "DSO", "GCVO",
"GCVO", "KCVO", "DCVO", "CVO", "LVO", "MVO", "OM", "ISO", "GBE", "KBE", "DBE", "CBE", "OBE", "MBE", "CH")
Set decs=$ListBuild("VC", "GC", "CGC", "RRC", "DSC", "MC", "DFC", "AFC", "ARRC", "OBI", "IOM")
Set regexp="( )(SNR$|SR$|JNR$|JR$|ESQ$|"_$ListToString(ords, "$|")_"$|"_$ListToString(decs, "$|")_"$|[IVIVX]+$)"
Set rem=##class(%Regex.Matcher).%New(regexp, pName)
For {
Set pName=rem.ReplaceAll("")
Set locn=$Locate(pName, regexp) If 'locn Quit
Set pName=$Extract(pName, 1, locn-1)
}
}
// lastly replace hyphen withand white space, plus some final validation
Set pName=$ZStrip($Translate(pName, "-", " "), "<=>W")
// some final validation
If $Length($Piece(pName, " "))<2 Quit $$$ERROR($$$GeneralError, "Invalid name.")
// begin algorithm and truncate result, if necessary
Set pCode=""
For piece=1:1:$Length(pName, " ") {
If pAlgorithm="nysiis" Set pCode=pCode_..NYSIIS(pName)
}
// truncate string, if necessary
If pTruncate {
Set pName=pCode
Line 124 ⟶ 66:
- http://www.dropby.com/indexLF.html?content=/NYSIIS.html
*/
// create regexp matcher instance, remove punctuation and convert all to upper case
Set rem=##class(%Regex.Matcher).%New(" ")
Set rem.Text=$ZConvert($ZStrip(pName, "*P"), "U")
// translate first characters of name:
// => MAC->MCC, KN->N, K->C, PH/PF->FF, SCH->SSS
For rule="^MAC->MCC", "^KN->N", "^K->C", "^(PH|PF)->FF", "SCH->SSS" {
Set pName1=$Extract(pName, 1)
Set pName2rem.Pattern=$ExtractPiece(pNamerule, 1, 2"->")
If rem.Locate() Set rem.Text=rem.ReplaceFirst($Piece(rule, "->", 2)) Quit
Set pName3=$Extract(pName, 1, 3)
If pName3="MAC" {
Set $Extract(pName, 1, 3)="MCC"
} ElseIf pName2="KN" {
Set $Extract(pName, 1, 2)="N"
} ElseIf pName1="K" {
Set $Extract(pName, 1)="C"
} ElseIf pName2="PH" {
Set $Extract(pName, 1, 2)="FF"
} ElseIf pName2="PF" {
Set $Extract(pName, 1, 2)="FF"
} ElseIf pName3="SCH" {
Set $Extract(pName, 1, 3)="SSS"
}
 
// translate last characters of name:
// => EE/IE->Y, DT/RT/RD/NT/ND->D
For rule="(EE|IE)$->Y", "(DT|RT|RD|NT|ND)$->D" {
Set pNamexx=$Case($Extract(pName, *-1, *), "EE": "Y", "IE": "Y",
Set rem.Pattern=$Piece(rule, "->")
"DT": "D", "RT": "D", "RD": "D", "NT": "D", "ND": "D", :"")
If $Lengthrem.Locate(pNamexx) Set pNamerem.Text=rem.ReplaceFirst($ExtractPiece(pNamerule, 1"->", *-2)_pNamexx) Quit
}
// first character of key = first character of name
Set pName1=$Extract(pNamerem.Text, 1), rem.Text=$Extract(rem.Text, 2, *)
Set $Extract(pName, 1)=""
// translate remaining characters by following rules, incrementing by one character each time:
Line 162 ⟶ 96:
// => W->if previous is vowel, A (A is the only vowel left)
// => add current to key if current is not same as the last key character
Set ptr=0, rules=$ListBuild("EV->AF", "(A|E|I|O|U)->A", "Q->G", "Z->S", "M->N", "KN->N", "K->C",
Set pName=$Replace(pName, "EV", "AF")
"SCH->SSS", "PH->FF", "H[^A]", "[^A]H", "AW->A")
Set pName=$Translate(pName, "EIOU", "AAAA")
While $ListNext(rules, ptr, rule) {
Set pName=$Translate(pName, "QZM", "GSN")
Set pNamerem.Pattern=$ReplacePiece(pNamerule, "KN", "N->")
Set If pName=$TranslatePiece(pNamerule, "K->", 2)="C", rem.Locate() {
Set pName=$ReplacePiece(pNamerule, "SCH->", 2)=$Translate(rem.Group, "SSSH")
}
Set pName=$Replace(pName, "PH", "FF")
Set rem.Text=rem.ReplaceAll($Piece(rule, "->", 2))
Set locn=$Locate(pName, "H[^A]") If locn Set $Extract(pName, locn)=""
}
Set locn=$Locate(pName, "[^A]H") If locn Set $Extract(pName, locn+1)=""
Set pName=$ReplaceZStrip(pNamerem.Text, "AW", "A=U") // remove duplicates
Set pName=$ZStrip(pName,"=U") // remove duplicates
// if last character is S, remove it
Line 187 ⟶ 120:
}
 
}
}</lang>
</lang>
{{out|Examples}}
<pre>
USER>DoFor { Read !, name Quit:name="" Set sc=##class(Utils.Phonetic).Encode("nysiis", "Mclaughlin"name, .code,, 6) If sc Write " -> ", code }
USER>Write code
MCLAGLAN
</pre>
 
knight -> NAGT
mitchell -> MATCAL
o'daniel -> ODANAL
brown sr -> BRAN
browne III -> BRAN
browne IV -> BRAN
O'Banion -> OBANAN
Mclaughlin -> MCLAGL[AN]
McCormack -> MCARNA[C]
Chapman -> CHAPNA[N]
Silva -> SALV
McDonald -> MCDANA[LD]
Lawson -> LASAN
Jacobs -> JACAB
Greene -> GRAN
O'Brien -> OBRAN
Morrison -> MARASA[N]
Larson -> LARSAN
Willis -> WAL
Mackenzie -> MCANSY
Carr -> CAR
Lawrence -> LARANC
Matthews -> MAT
Richards -> RACARD
Bishop -> BASAP
Franklin -> FRANCL[AN]
McDaniel -> MCDANA[L]
Harper -> HARPAR
Lynch -> LYNC
Watkins -> WATCAN
Carlson -> CARLSA[N]
Wheeler -> WHALAR
Louis XVI -> L
</pre>
 
=={{header|Perl 6}}==