Soundex: Difference between revisions

No change in size ,  5 years ago
m
reorder
m (→‎{{header|Ring}}: Remove vanity tags)
m (reorder)
Line 1,683:
s" Burrows" soundex cr type \ B620 (W test) (any Welsh names?)
s" O'Hara" soundex cr type \ O600 (punctuation test)</lang>
 
=={{header|Go}}==
WP article rules, plus my interpretation for input validation.
<lang go>package main
 
import (
"errors"
"fmt"
"unicode"
)
 
var code = []byte("01230127022455012623017202")
 
func soundex(s string) (string, error) {
var sx [4]byte
var sxi int
var cx, lastCode byte
for i, c := range s {
switch {
case !unicode.IsLetter(c):
if c < ' ' || c == 127 {
return "", errors.New("ASCII control characters disallowed")
}
if i == 0 {
return "", errors.New("initial character must be a letter")
}
lastCode = '0'
continue
case c >= 'A' && c <= 'Z':
cx = byte(c - 'A')
case c >= 'a' && c <= 'z':
cx = byte(c - 'a')
default:
return "", errors.New("non-ASCII letters unsupported")
}
// cx is valid letter index at this point
if i == 0 {
sx[0] = cx + 'A'
sxi = 1
continue
}
switch x := code[cx]; x {
case '7', lastCode:
case '0':
lastCode = '0'
default:
sx[sxi] = x
if sxi == 3 {
return string(sx[:]), nil
}
sxi++
lastCode = x
}
}
if sxi == 0 {
return "", errors.New("no letters present")
}
for ; sxi < 4; sxi++ {
sx[sxi] = '0'
}
return string(sx[:]), nil
}
 
func main() {
for _, s := range []string{
"Robert", // WP test case = R163
"Rupert", // WP test case = R163
"Rubin", // WP test case = R150
"ashcroft", // WP test case = A261
"ashcraft", // s and c combine across h, t not needed
"moses", // s's don't combine across e
"O'Mally", // apostrophe allowed, adjacent ll's combine
"d jay", // spaces allowed
"R2-D2", // digits, hyphen allowed
"12p2", // just not in leading position
"naïve", // non ASCII disallowed
"", // empty string disallowed
"bump\t", // ASCII control characters disallowed
} {
if x, err := soundex(s); err == nil {
fmt.Println("soundex", s, "=", x)
} else {
fmt.Printf("\"%s\" fail. %s\n", s, err)
}
}
}</lang>
{{out}}
<pre>
soundex Robert = R163
soundex Rupert = R163
soundex Rubin = R150
soundex ashcroft = A261
soundex ashcraft = A261
soundex moses = M220
soundex O'Mally = O540
soundex d jay = D200
soundex R2-D2 = R300
"12p2" fail. initial character must be a letter
"naïve" fail. non-ASCII letters unsupported
"" fail. no letters present
"bump " fail. ASCII control characters disallowed
</pre>
 
=={{header|FutureBasic}}==
Line 2,071 ⟶ 1,969:
Steward = S363
Seward = S630
</pre>
 
=={{header|Go}}==
WP article rules, plus my interpretation for input validation.
<lang go>package main
 
import (
"errors"
"fmt"
"unicode"
)
 
var code = []byte("01230127022455012623017202")
 
func soundex(s string) (string, error) {
var sx [4]byte
var sxi int
var cx, lastCode byte
for i, c := range s {
switch {
case !unicode.IsLetter(c):
if c < ' ' || c == 127 {
return "", errors.New("ASCII control characters disallowed")
}
if i == 0 {
return "", errors.New("initial character must be a letter")
}
lastCode = '0'
continue
case c >= 'A' && c <= 'Z':
cx = byte(c - 'A')
case c >= 'a' && c <= 'z':
cx = byte(c - 'a')
default:
return "", errors.New("non-ASCII letters unsupported")
}
// cx is valid letter index at this point
if i == 0 {
sx[0] = cx + 'A'
sxi = 1
continue
}
switch x := code[cx]; x {
case '7', lastCode:
case '0':
lastCode = '0'
default:
sx[sxi] = x
if sxi == 3 {
return string(sx[:]), nil
}
sxi++
lastCode = x
}
}
if sxi == 0 {
return "", errors.New("no letters present")
}
for ; sxi < 4; sxi++ {
sx[sxi] = '0'
}
return string(sx[:]), nil
}
 
func main() {
for _, s := range []string{
"Robert", // WP test case = R163
"Rupert", // WP test case = R163
"Rubin", // WP test case = R150
"ashcroft", // WP test case = A261
"ashcraft", // s and c combine across h, t not needed
"moses", // s's don't combine across e
"O'Mally", // apostrophe allowed, adjacent ll's combine
"d jay", // spaces allowed
"R2-D2", // digits, hyphen allowed
"12p2", // just not in leading position
"naïve", // non ASCII disallowed
"", // empty string disallowed
"bump\t", // ASCII control characters disallowed
} {
if x, err := soundex(s); err == nil {
fmt.Println("soundex", s, "=", x)
} else {
fmt.Printf("\"%s\" fail. %s\n", s, err)
}
}
}</lang>
{{out}}
<pre>
soundex Robert = R163
soundex Rupert = R163
soundex Rubin = R150
soundex ashcroft = A261
soundex ashcraft = A261
soundex moses = M220
soundex O'Mally = O540
soundex d jay = D200
soundex R2-D2 = R300
"12p2" fail. initial character must be a letter
"naïve" fail. non-ASCII letters unsupported
"" fail. no letters present
"bump " fail. ASCII control characters disallowed
</pre>
 
1,336

edits