This repository has been archived on 2026-02-10. You can view files and clone it, but cannot push or open issues or pull requests.
Files
horsebot/autocomplete/autocomplete.go
2026-01-22 22:24:24 -05:00

123 lines
2.5 KiB
Go

package autocomplete
import (
"slices"
"strings"
"unicode"
"unicode/utf8"
)
// Set is an autocomplete set.
type Set struct {
mask []uint64
keys [][]string
vals [][]string
}
var (
test [2]uint64
idxs [256]byte
)
const careset = " 0123456789abcdefghijklmnopqrstuvwxyz.!'"
// Certify the size of careset at compile time.
// It must be no larger than 64.
var _ [0]struct{} = [len(careset) - 40]struct{}{}
func init() {
// Construct the test and idxs maps.
for i, c := range []byte(careset) {
if c < 64 {
test[0] |= 1 << c
} else {
test[1] |= 1 << (c - 64)
}
idxs[c] = byte(i)
}
}
func normalize(s string) string {
s = strings.Map(func(c rune) rune {
c = unicode.ToLower(c)
// TODO(zeph): map latin letters with diacritics to their not-diacritic
// characters; e.g. 2* pasta's unique Corazón ☆ Ardiente should have
// ó mapped to o. for now, we just special-case it.
if c == 'ó' {
c = 'o'
}
return c
}, s)
return s
}
func filter(s string) (r uint64) {
for _, c := range s {
if c > 0x7f {
// Skip non-ASCII characters.
continue
}
if test[byte(c)/64]>>(byte(c)%64)&1 == 0 {
// Not in the care set.
continue
}
r |= 1 << idxs[byte(c)]
}
return r
}
// Add associates a value with a key in the autocomplete set.
// The behavior is undefined if the key already has a value.
func (s *Set) Add(key, val string) {
key = normalize(key)
m := filter(key)
i, ok := slices.BinarySearch(s.mask, m)
if !ok {
s.mask = slices.Insert(s.mask, i, m)
s.keys = slices.Insert(s.keys, i, nil)
s.vals = slices.Insert(s.vals, i, nil)
}
j, _ := slices.BinarySearch(s.keys[i], key)
s.keys[i] = slices.Insert(s.keys[i], j, key)
s.vals[i] = slices.Insert(s.vals[i], j, val)
}
// Find appends to r all values in the set with keys that key matches.
func (s *Set) Find(r []string, key string) []string {
key = normalize(key)
m := filter(key)
for i, v := range s.mask {
if m&v != m {
continue
}
for j, k := range s.keys[i] {
if inorder(key, k) {
r = append(r, s.vals[i][j])
}
}
}
return r
}
// inorder checks whether each character in a appears in the same relative order in b.
func inorder(a, b string) bool {
for _, c := range a {
k := strings.IndexRune(b, c)
if k < 0 {
return false
}
_, l := utf8.DecodeRuneInString(b[k:])
b = b[k+l:]
}
return true
}
// Metrics gets the number of buckets in the autocomplete set and the length
// of the longest bucket.
func (s *Set) Metrics() (buckets, longest int) {
for _, b := range s.keys {
longest = max(longest, len(b))
}
return len(s.keys), longest
}