VictoriaMetrics/vendor/github.com/antzucaro/matchr/soundex.go
2022-05-20 14:40:09 +03:00

72 lines
1.4 KiB
Go

package matchr
import "strings"
// Soundex computes the Soundex phonetic representation of the input string. It
// attempts to encode homophones with the same characters. More information can
// be found at http://en.wikipedia.org/wiki/Soundex.
func Soundex(s1 string) string {
if len(s1) == 0 {
return ""
}
// we should work with all uppercase
s1 = strings.ToUpper(s1)
input := NewString(s1)
// the encoded value
enc := input.Slice(0, 1)
c := ""
prev := ""
hw := false
for i := 0; i < input.RuneCount(); i++ {
switch rune(input.At(i)) {
case 'B', 'F', 'P', 'V':
c = "1"
case 'C', 'G', 'J', 'K', 'Q', 'S', 'X', 'Z':
c = "2"
case 'D', 'T':
c = "3"
case 'L':
c = "4"
case 'M', 'N':
c = "5"
case 'R':
c = "6"
case 'H', 'W':
hw = true
default:
c = ""
}
// don't encode the first position, but we need its code value
// to prevent repeats
if c != "" && c != prev && i > 0 {
// if the next encoded digit is different, we can add it right away
// if it is the same, though, it must not have been preceded
// by an 'H' or a 'W'
if enc[len(enc)-1:len(enc)] != c || !hw {
enc = enc + c
}
// we're done when we reach four encoded characters
if len(enc) == 4 {
break
}
}
prev = c
hw = false
}
// if we've fallen short of 4 "real" encoded characters,
// it gets padded with zeros
for len(enc) < 4 {
enc = enc + "0"
}
return enc
}