From 293031fe38cfaa6ebf5aa85090910e64fd32dc1d Mon Sep 17 00:00:00 2001 From: Branden J Brown Date: Sun, 6 Apr 2025 14:20:47 -0400 Subject: [PATCH] emote: add emote parsing engine --- emote/emote.go | 65 ++++++++++++++++++++++++++ emote/emote_test.go | 111 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 176 insertions(+) create mode 100644 emote/emote.go create mode 100644 emote/emote_test.go diff --git a/emote/emote.go b/emote/emote.go new file mode 100644 index 0000000..2fb229b --- /dev/null +++ b/emote/emote.go @@ -0,0 +1,65 @@ +package emote + +import ( + "strings" + "unicode" +) + +// Emote is the information Kaiyan needs about an emote. +type Emote struct { + // ID is the emote ID per the source. + ID string + // Name is the text of the emote as would be parsed from message text. + Name string + // Source is the name of the emote source, e.g. "7TV", "Twitch:cirno_tv", &c. + Source string + // Link is a hyperlink to manage the emote. + Link string + // Image is a hyperlink to the emote image of any size. + Image string +} + +// Parser finds emotes in a message. +// +// Parser assumes that emotes are bound by whitespace. +type Parser struct { + m map[string]string + // TODO(branden): more efficient data structure; trie? +} + +// NewParser creates a Parser for the given list of emotes. +func NewParser(emotes ...Emote) Parser { + m := make(map[string]string, len(emotes)) + for _, e := range emotes { + m[e.Name] = e.ID + } + return Parser{m} +} + +// Next parses the next emote instance from the message and returns the +// remainder of the message text following it. +// If there is no emote in the message the returned emote is the empty string. +func (p Parser) Next(text string) (name, id, following string) { + for text != "" { + // First trim any existing space. + text = strings.TrimSpace(text) + // Then look for the next space. + // If there is none, this is the last word of the message; we still + // need to look it up. + k := strings.IndexFunc(text, unicode.IsSpace) + word := text + if k >= 0 { + word = text[:k] + following = text[k:] + } else { + following = "" + } + id = p.m[word] + if id != "" { + return word, id, following + } + text = following + } + // No emote found. + return "", "", "" +} diff --git a/emote/emote_test.go b/emote/emote_test.go new file mode 100644 index 0000000..4fd4077 --- /dev/null +++ b/emote/emote_test.go @@ -0,0 +1,111 @@ +package emote_test + +import ( + "strings" + "testing" + + "git.sunturtle.xyz/zephyr/kaiyan/emote" + "github.com/google/go-cmp/cmp" +) + +func TestParser(t *testing.T) { + cases := []struct { + name string + emotes []string + text string + want []string + }{ + { + name: "empty", + emotes: strings.Fields("bocchi ryō nijika kita"), + text: "", + want: nil, + }, + { + name: "none", + emotes: nil, + text: "bocchi ryō nijika kita", + want: nil, + }, + { + name: "all", + emotes: strings.Fields("bocchi ryō nijika kita"), + text: "bocchi ryō nijika kita", + want: strings.Fields("bocchi ryō nijika kita"), + }, + { + name: "some", + emotes: strings.Fields("bocchi nijika"), + text: "bocchi ryō nijika kita", + want: strings.Fields("bocchi nijika"), + }, + { + name: "others", + emotes: strings.Fields("ryō kita"), + text: "bocchi ryō nijika kita", + want: strings.Fields("ryō kita"), + }, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + // Convert the emote names to emotes proper. + e := make([]emote.Emote, len(c.emotes)) + for i, w := range c.emotes { + e[i] = emote.Emote{ID: w, Name: w, Source: c.name} + } + p := emote.NewParser(e...) + var got []string + text := c.text + for { + name, id, rest := p.Next(text) + if id == "" { + break + } + if name != id { + // Not normally the case, but this test is constructed + // so that it is. + t.Errorf("wrong id %q for name %q", id, name) + } + got = append(got, name) + text = rest + } + if diff := cmp.Diff(c.want, got); diff != "" { + t.Errorf("wrong emotes (-want/+got):\n%s", diff) + } + }) + } +} + +func BenchmarkParser(b *testing.B) { + e := []emote.Emote{ + {ID: "bocchi", Name: "bocchi"}, + {ID: "ryō", Name: "ryō"}, + {ID: "nijika", Name: "nijika"}, + {ID: "kita", Name: "kita"}, + {ID: "seika", Name: "seika"}, + {ID: "kikuri", Name: "kikuri"}, + {ID: "eliza", Name: "eliza"}, + {ID: "shima", Name: "shima"}, + } + p := emote.NewParser(e...) + texts := []struct { + name string + text string + }{ + {"none-short", "none"}, + {"none-long", strings.TrimSpace(strings.Repeat("none ", 100))}, + {"single", "bocchi"}, + {"natural", strings.TrimSpace(strings.Repeat("kessoku band is bocchi ryō nijika kita starry is seika PA-san sickhack is kikuri eliza shima ", 5))}, + } + for _, c := range texts { + b.Run(c.name, func(b *testing.B) { + b.SetBytes(int64(len(c.text))) + for b.Loop() { + text := c.text + for text != "" { + _, _, text = p.Next(text) + } + } + }) + } +}