NYSIIS: Difference between revisions

NYSIIS (view source)

Revision as of 12:42, 23 March 2013

1,105 bytes removed , 11 years ago

Refactored 'Caché ObjectScript' section based on other code examples to reduce footprint.

Anonymous user

rosettacode>Toucanbird

Revision as of 11:28, 22 March 2013 (view source) rosettacode>Dkf (→‎Tcl: Added implementation) ← Older edit		Revision as of 12:42, 23 March 2013 (view source) rosettacode>Toucanbird (Refactored 'Caché ObjectScript' section based on other code examples to reduce footprint.) Newer edit →
Line 6: =={{header\|Caché ObjectScript}}== Refactored code based on other examples to reduce footprint. ~~<lang cache>Class Utils.Phonetic [ Abstract ]~~ { <lang cache> ~~ClassMethod UnitTest() As %Status~~ Class Utils.Phonetic [ Abstract ] { ~~// define test list~~ ~~Set testlist=$ListBuild(~~ ~~$ListBuild("knight", "NAGT"),~~ ~~$ListBuild("mitchell", "MATCAL"),~~ ~~$ListBuild("o'daniel", "ODANAL"),~~ ~~$ListBuild("brown sr", "BRAN"),~~ ~~$ListBuild("browne III", "BRAN"),~~ ~~$ListBuild("browne IV", "BRAN"),~~ ~~$ListBuild("O'Banion", "OBANAN"),~~ ~~$ListBuild("Mclaughlin", "MCLAGL[AN]"),~~ ~~$ListBuild("McCormack", "MCARNA[C]"),~~ ~~$ListBuild("Chapman", "CHAPNA[N]"),~~ ~~$ListBuild("Silva", "SALV"),~~ ~~$ListBuild("McDonald", "MCDANA[LD]"),~~ ~~$ListBuild("Lawson", "LASAN"),~~ ~~$ListBuild("Jacobs", "JACAB"),~~ ~~$ListBuild("Greene", "GRAN"),~~ ~~$ListBuild("O'Brien", "OBRAN"),~~ ~~$ListBuild("Morrison", "MARASA[N]"),~~ ~~$ListBuild("Larson", "LARSAN"),~~ ~~$ListBuild("Willis", "WAL"),~~ ~~$ListBuild("Mackenzie", "MCANSY"),~~ ~~$ListBuild("Carr", "CAR"),~~ ~~$ListBuild("Lawrence", "LARANC"),~~ ~~$ListBuild("Matthews", "MAT"),~~ ~~$ListBuild("Richards", "RACARD"),~~ ~~$ListBuild("Bishop", "BASAP"),~~ ~~$ListBuild("Franklin", "FRANCL[AN]"),~~ ~~$ListBuild("McDaniel", "MCDANA[L]"),~~ ~~$ListBuild("Harper", "HARPAR"),~~ ~~$ListBuild("Lynch", "LYNC"),~~ ~~$ListBuild("Watkins", "WATCAN"),~~ ~~$ListBuild("Carlson", "CARLSA[N]"),~~ ~~$ListBuild("Wheeler", "WHALAR")~~ ) ~~// order through test list~~ ~~Set ptr=0, sc=$$$ERROR($$$GeneralError, "No entries found.")~~ ~~While $ListNext(testlist, ptr, val) {~~ ~~Set sc=##class(Utils.Phonetic).Encode("nysiis", $List(val), .code,, 6)~~ ~~If $$$ISERR(sc) Quit~~ ~~If code'=$List(val, 2) Set sc=$$$ERROR($$$GeneralError, "Encoding did not match.") Quit~~ } ~~// finished~~ ~~If $$$ISERR(sc) Quit sc~~ ~~Quit $$$OK~~ } ClassMethod Encode(pAlgorithm As %String = "", pName As %String = "", ByRef pCode As %String, pSuffixRem As %Boolean = 1, pTruncate As %Integer = 0) As %Status { // check algorithm and name Set pAlgorithm=$ZConvert(pAlgorithm, "l") If pAlgorithm="" Quit $$$ERROR($$$GeneralError, "No algorithm specified.") If $Case(pAlgorithm, "nysiis":1, :0)=0 Quit $$$ERROR($$$GeneralError, "Unknown algorithm specified.") ~~// check name~~ If $Match(pName, ".\d.# no numbers") Quit $$$ERROR($$$GeneralError, "Name cannot contain numerics.") // remove apostrophes, find punctuation and replace with spaces (exclude hyphens) Set pName=$Translate(pName, "'") ~~// find punctuation and replace with spaces (exclude hyphens)~~ Set pun=$ZStrip(pName, "E'P", "-") Set pName=$Translate(pName, pun, $Justify(" ", $Length(pun))) Line 83 ⟶ 31: // - http://en.wikipedia.org/wiki/List_of_post-nominal_letters_(United_Kingdom) If pSuffixRem { Set ords=$ListBuild("KG", "LG", "KT", "LT", "GCB", "KCB", "DCB", "CB", "GCMG", "KCMG", "DCMG", "CMG", "DSO~~", "GCVO~~", "GCVO", "KCVO", "DCVO", "CVO", "LVO", "MVO", "OM", "ISO", "GBE", "KBE", "DBE", "CBE", "OBE", "MBE", "CH") Set decs=$ListBuild("VC", "GC", "CGC", "RRC", "DSC", "MC", "DFC", "AFC", "ARRC", "OBI", "IOM") Set regexp="( )(SNR$\|SR$\|JNR$\|JR$\|ESQ$\|"_$ListToString(ords, "$\|")_"$\|"_$ListToString(decs, "$\|")_"$\|[IVIVX]+$)" Set rem=##class(%Regex.Matcher).%New(regexp, pName) ~~For {~~ Set pName=rem.ReplaceAll("") ~~Set locn=$Locate(pName, regexp) If 'locn Quit~~ ~~Set pName=$Extract(pName, 1, locn-1)~~ } } // ~~lastly~~ replace hyphen ~~with~~and white space, plus some final validation Set pName=$ZStrip($Translate(pName, "-", " "), "<=>W") ~~// some final validation~~ If $Length($Piece(pName, " "))<2 Quit $$$ERROR($$$GeneralError, "Invalid name.") // begin algorithm and truncate result, if necessary Set pCode="" For piece=1:1:$Length(pName, " ") { If pAlgorithm="nysiis" Set pCode=pCode_..NYSIIS(pName) } ~~// truncate string, if necessary~~ If pTruncate { Set pName=pCode Line 124 ⟶ 66: - http://www.dropby.com/indexLF.html?content=/NYSIIS.html / // create regexp matcher instance, remove punctuation and convert all to upper case Set rem=##class(%Regex.Matcher).%New(" ") Set rem.Text=$ZConvert($ZStrip(pName, "P"), "U") // translate first characters of name: // => MAC->MCC, KN->N, K->C, PH/PF->FF, SCH->SSS For rule="^MAC->MCC", "^KN->N", "^K->C", "^(PH\|PF)->FF", "SCH->SSS" { ~~Set pName1=$Extract(pName, 1)~~ Set ~~pName2~~rem.Pattern=$~~Extract~~Piece(~~pName~~rule, ~~1, 2~~"->") If rem.Locate() Set rem.Text=rem.ReplaceFirst($Piece(rule, "->", 2)) Quit ~~Set pName3=$Extract(pName, 1, 3)~~ ~~If pName3="MAC" {~~ ~~Set $Extract(pName, 1, 3)="MCC"~~ ~~} ElseIf pName2="KN" {~~ ~~Set $Extract(pName, 1, 2)="N"~~ ~~} ElseIf pName1="K" {~~ ~~Set $Extract(pName, 1)="C"~~ ~~} ElseIf pName2="PH" {~~ ~~Set $Extract(pName, 1, 2)="FF"~~ ~~} ElseIf pName2="PF" {~~ ~~Set $Extract(pName, 1, 2)="FF"~~ ~~} ElseIf pName3="SCH" {~~ ~~Set $Extract(pName, 1, 3)="SSS"~~ } // translate last characters of name: // => EE/IE->Y, DT/RT/RD/NT/ND->D For rule="(EE\|IE)$->Y", "(DT\|RT\|RD\|NT\|ND)$->D" { ~~Set pNamexx=$Case($Extract(pName, -1, ), "EE": "Y", "IE": "Y",~~ Set rem.Pattern=$Piece(rule, "->") ~~"DT": "D", "RT": "D", "RD": "D", "NT": "D", "ND": "D", :"")~~ If ~~$Length~~rem.Locate(~~pNamexx~~) Set ~~pName~~rem.Text=rem.ReplaceFirst($~~Extract~~Piece(~~pName~~rule, 1"->", -2)~~_pNamexx~~) Quit } // first character of key = first character of name Set pName1=$Extract(~~pName~~rem.Text, 1), rem.Text=$Extract(rem.Text, 2, *) ~~Set $Extract(pName, 1)=""~~ // translate remaining characters by following rules, incrementing by one character each time: Line 162 ⟶ 96: // => W->if previous is vowel, A (A is the only vowel left) // => add current to key if current is not same as the last key character Set ptr=0, rules=$ListBuild("EV->AF", "(A\|E\|I\|O\|U)->A", "Q->G", "Z->S", "M->N", "KN->N", "K->C", ~~Set pName=$Replace(pName, "EV", "AF")~~ "SCH->SSS", "PH->FF", "H[^A]", "[^A]H", "AW->A") ~~Set pName=$Translate(pName, "EIOU", "AAAA")~~ While $ListNext(rules, ptr, rule) { ~~Set pName=$Translate(pName, "QZM", "GSN")~~ Set ~~pName~~rem.Pattern=$~~Replace~~Piece(~~pName~~rule, "~~KN", "N~~->") ~~Set~~ If ~~pName=~~$~~Translate~~Piece(~~pName~~rule, "K->", 2)="C", rem.Locate() { Set ~~pName=~~$~~Replace~~Piece(~~pName~~rule, "~~SCH~~->", 2)=$Translate(rem.Group, "~~SSS~~H") } ~~Set pName=$Replace(pName, "PH", "FF")~~ Set rem.Text=rem.ReplaceAll($Piece(rule, "->", 2)) ~~Set locn=$Locate(pName, "H[^A]") If locn Set $Extract(pName, locn)=""~~ } ~~Set locn=$Locate(pName, "[^A]H") If locn Set $Extract(pName, locn+1)=""~~ Set pName=$~~Replace~~ZStrip(~~pName~~rem.Text, "~~AW", "A~~=U") // remove duplicates ~~Set pName=$ZStrip(pName,"=U") // remove duplicates~~ // if last character is S, remove it Line 187 ⟶ 120: } } ~~}</lang>~~ </lang> {{out\|Examples}} <pre> USER>DoFor { Read !, name Quit:name="" Set sc=##class(Utils.Phonetic).Encode("nysiis", ~~"Mclaughlin"~~name, .code,, 6) If sc Write " -> ", code } ~~USER>Write code~~ ~~MCLAGLAN~~ ~~</pre>~~ knight -> NAGT mitchell -> MATCAL o'daniel -> ODANAL brown sr -> BRAN browne III -> BRAN browne IV -> BRAN O'Banion -> OBANAN Mclaughlin -> MCLAGL[AN] McCormack -> MCARNA[C] Chapman -> CHAPNA[N] Silva -> SALV McDonald -> MCDANA[LD] Lawson -> LASAN Jacobs -> JACAB Greene -> GRAN O'Brien -> OBRAN Morrison -> MARASA[N] Larson -> LARSAN Willis -> WAL Mackenzie -> MCANSY Carr -> CAR Lawrence -> LARANC Matthews -> MAT Richards -> RACARD Bishop -> BASAP Franklin -> FRANCL[AN] McDaniel -> MCDANA[L] Harper -> HARPAR Lynch -> LYNC Watkins -> WATCAN Carlson -> CARLSA[N] Wheeler -> WHALAR Louis XVI -> L </pre> =={{header\|Perl 6}}==