diff --git a/autocomplete/autocomplete.go b/autocomplete/autocomplete.go new file mode 100644 index 0000000..a7f0423 --- /dev/null +++ b/autocomplete/autocomplete.go @@ -0,0 +1,113 @@ +package autocomplete + +import ( + "slices" + "strings" + "unicode" + "unicode/utf8" +) + +// Set is an autocomplete set. +type Set struct { + mask []uint64 + keys [][]string + vals [][]string +} + +var ( + test [2]uint64 + idxs [256]byte +) + +const careset = " 0123456789abcdefghijklmnopqrstuvwxyz.!'" + +// Certify the size of careset at compile time. +// It must be no larger than 64. +var _ [0]struct{} = [len(careset) - 40]struct{}{} + +func init() { + // Construct the test and idxs maps. + for i, c := range []byte(careset) { + if c < 64 { + test[0] |= 1 << c + } else { + test[1] |= 1 << (c - 64) + } + idxs[c] = byte(i) + } +} + +func normalize(s string) string { + s = strings.Map(func(c rune) rune { + c = unicode.ToLower(c) + // TODO(zeph): map latin letters with diacritics to their not-diacritic + // characters; e.g. 2* pasta's unique Corazón ☆ Ardiente should have + // ó mapped to o. for now, we just special-case it. + if c == 'ó' { + c = 'o' + } + return c + }, s) + return s +} + +func filter(s string) (r uint64) { + for _, c := range s { + if c > 0x7f { + // Skip non-ASCII characters. + continue + } + if test[byte(c)/64]>>(byte(c)%64)&1 == 0 { + // Not in the care set. + continue + } + r |= 1 << idxs[byte(c)] + } + return r +} + +// Add associates a value with a key in the autocomplete set. +// The behavior is undefined if the key already has a value. +func (s *Set) Add(key, val string) { + key = normalize(key) + m := filter(key) + i, ok := slices.BinarySearch(s.mask, m) + if !ok { + s.mask = slices.Insert(s.mask, i, m) + s.keys = slices.Insert(s.keys, i, nil) + s.vals = slices.Insert(s.vals, i, nil) + } + j, _ := slices.BinarySearch(s.keys[i], key) + s.keys[i] = slices.Insert(s.keys[i], j, key) + s.vals[i] = slices.Insert(s.vals[i], j, val) +} + +// Find appends to r all values in the set with keys that key matches. +func (s *Set) Find(r []string, key string) []string { + key = normalize(key) + m := filter(key) + for i, v := range s.mask { + if m&v != m { + continue + } + for j, k := range s.keys[i] { + if inorder(key, k) { + r = append(r, s.vals[i][j]) + } + } + } + return r +} + +// inorder checks whether each character in a appears in the same relative order in b. +func inorder(a, b string) bool { + for _, c := range a { + k := strings.IndexRune(b, c) + if k < 0 { + return false + } + _, l := utf8.DecodeRuneInString(b[k:]) + b = b[k+l:] + } + return true +} diff --git a/autocomplete/autocomplete_test.go b/autocomplete/autocomplete_test.go new file mode 100644 index 0000000..c4a4756 --- /dev/null +++ b/autocomplete/autocomplete_test.go @@ -0,0 +1,64 @@ +package autocomplete_test + +import ( + "slices" + "testing" + + "git.sunturtle.xyz/zephyr/horsebot/autocomplete" +) + +func these(s ...string) []string { return s } + +func TestAutocomplete(t *testing.T) { + cases := []struct { + name string + add []string + search string + want []string + }{ + { + name: "empty", + add: nil, + search: "", + want: nil, + }, + { + name: "exact", + add: these("bocchi"), + search: "bocchi", + want: these("bocchi"), + }, + { + name: "extra", + add: these("bocchi", "ryo", "nijika", "kita"), + search: "bocchi", + want: these("bocchi"), + }, + { + name: "short", + add: these("bocchi", "ryo", "nijika", "kita"), + search: "o", + want: these("bocchi", "ryo"), + }, + { + name: "map", + add: these("Corazón ☆ Ardiente"), + search: "corazo", + want: these("Corazón ☆ Ardiente"), + }, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + var set autocomplete.Set + for _, s := range c.add { + set.Add(s, s) + } + got := set.Find(nil, c.search) + slices.Sort(c.want) + slices.Sort(got) + if !slices.Equal(c.want, got) { + t.Errorf("wrong results: want %q, got %q", c.want, got) + } + }) + } +} diff --git a/go.sum b/go.sum index edb8ae5..870d1b1 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,3 @@ -git.sunturtle.xyz/zephyr/horse v0.0.0-20260118202043-d147d71519e4 h1:3m5C5/9TX4BcW8z8gQAC0+zbsb+jLtJ+VlJk3UibY2s= -git.sunturtle.xyz/zephyr/horse v0.0.0-20260118202043-d147d71519e4/go.mod h1:qGXO/93EfCOI1oGSLqrRkPDF/EAdsgLNZJjRKx+i4Lk= git.sunturtle.xyz/zephyr/horse v0.0.0-20260122143238-1ae654c266b7 h1:fneQyGAkeefCemRtuvTO32MIwkcYm3ajeIS6hfXrAno= git.sunturtle.xyz/zephyr/horse v0.0.0-20260122143238-1ae654c266b7/go.mod h1:qGXO/93EfCOI1oGSLqrRkPDF/EAdsgLNZJjRKx+i4Lk= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=