package autocomplete import ( "slices" "strings" "unicode" "unicode/utf8" ) // Set is an autocomplete set. type Set struct { mask []uint64 keys [][]string vals [][]string } var ( test [2]uint64 idxs [256]byte ) const careset = " 0123456789abcdefghijklmnopqrstuvwxyz.!'" // Certify the size of careset at compile time. // It must be no larger than 64. var _ [0]struct{} = [len(careset) - 40]struct{}{} func init() { // Construct the test and idxs maps. for i, c := range []byte(careset) { if c < 64 { test[0] |= 1 << c } else { test[1] |= 1 << (c - 64) } idxs[c] = byte(i) } } func normalize(s string) string { s = strings.Map(func(c rune) rune { c = unicode.ToLower(c) // TODO(zeph): map latin letters with diacritics to their not-diacritic // characters; e.g. 2* pasta's unique Corazón ☆ Ardiente should have // ó mapped to o. for now, we just special-case it. if c == 'ó' { c = 'o' } return c }, s) return s } func filter(s string) (r uint64) { for _, c := range s { if c > 0x7f { // Skip non-ASCII characters. continue } if test[byte(c)/64]>>(byte(c)%64)&1 == 0 { // Not in the care set. continue } r |= 1 << idxs[byte(c)] } return r } // Add associates a value with a key in the autocomplete set. // The behavior is undefined if the key already has a value. func (s *Set) Add(key, val string) { key = normalize(key) m := filter(key) i, ok := slices.BinarySearch(s.mask, m) if !ok { s.mask = slices.Insert(s.mask, i, m) s.keys = slices.Insert(s.keys, i, nil) s.vals = slices.Insert(s.vals, i, nil) } j, _ := slices.BinarySearch(s.keys[i], key) s.keys[i] = slices.Insert(s.keys[i], j, key) s.vals[i] = slices.Insert(s.vals[i], j, val) } // Find appends to r all values in the set with keys that key matches. func (s *Set) Find(r []string, key string) []string { key = normalize(key) m := filter(key) for i, v := range s.mask { if m&v != m { continue } for j, k := range s.keys[i] { if inorder(key, k) { r = append(r, s.vals[i][j]) } } } return r } // inorder checks whether each character in a appears in the same relative order in b. func inorder(a, b string) bool { for _, c := range a { k := strings.IndexRune(b, c) if k < 0 { return false } _, l := utf8.DecodeRuneInString(b[k:]) b = b[k+l:] } return true } // Metrics gets the number of buckets in the autocomplete set and the length // of the longest bucket. func (s *Set) Metrics() (buckets, longest int) { for _, b := range s.keys { longest = max(longest, len(b)) } return len(s.keys), longest }