Soundex: Difference between revisions

Content deleted Content added
Wrong 'head' call
Sonia (talk | contribs)
Go solution
Line 279:
s" Burrows" soundex cr type \ B620 (W test) (any Welsh names?)
s" O'Hara" soundex cr type \ O600 (punctuation test)</lang>
=={{header|Go}}==
WP article rules, plus my interpretation for input validation.
<lang go>
 
package main
 
import (
"fmt"
"os"
"unicode"
)
 
var code = []int("01230127022455012623017202")
 
func soundex(s string) (string, os.Error) {
var sx [4]int
var sxi int
lastCode := '0'
for i, c := range s {
switch {
case !unicode.IsLetter(c):
if c < ' ' || c == 127 {
return "", os.NewError("ASCII control characters disallowed")
}
if i == 0 {
return "", os.NewError("initial character must be a letter")
}
lastCode = '0'
continue
case c >= 'A' && c <= 'Z':
c -= 'A'
case c >= 'a' && c <= 'z':
c -= 'a'
default:
return "", os.NewError("non-ASCII letters unsupported")
}
// c is valid letter index at this point
if i == 0 {
sx[0] = c + 'A'
sxi = 1
continue
}
x := code[c]
switch x {
case '7', lastCode:
case '0':
lastCode = '0'
default:
sx[sxi] = x
if sxi == 3 {
return string(sx[:]), nil
}
sxi++
lastCode = x
}
}
if sxi == 0 {
return "", os.NewError("no letters present")
}
for ; sxi < 4; sxi++ {
sx[sxi] = '0'
}
return string(sx[:]), nil
}
 
func main() {
for _, s := range []string{
"Robert", // WP test case = R163
"Rupert", // WP test case = R163
"Rubin", // WP test case = R150
"ashcroft", // WP test case = A261
"ashcraft", // s and c combine across h, t not needed
"moses", // s's don't combine across e
"O'Mally", // apostrophe allowed, adjacent ll's combine
"d jay", // spaces allowed
"R2-D2", // digits, hyphen allowed
"12p2", // just not in leading position
"naïve", // non ASCII disallowed
"", // empty string disallowed
"bump\t", // ASCII control characters disallowed
} {
x, err := soundex(s)
if err == nil {
fmt.Println("soundex", s, "=", x)
} else {
fmt.Printf("\"%s\" fail. %s\n", s, err)
}
}
}
</lang>
Output:
<pre>
soundex Robert = R163
soundex Rupert = R163
soundex Rubin = R150
soundex ashcroft = A261
soundex ashcraft = A261
soundex moses = M220
soundex O'Mally = O540
soundex d jay = D200
soundex R2-D2 = R300
"12p2" fail. initial character must be a letter
"naïve" fail. non-ASCII letters unsupported
"" fail. no letters present
"bump " fail. ASCII control characters disallowed
</pre>
 
=={{header|Haskell}}==