emote: add emote parsing engine

This commit is contained in:
Branden J Brown 2025-04-06 14:20:47 -04:00
parent 91be2bf466
commit 293031fe38
2 changed files with 176 additions and 0 deletions

65
emote/emote.go Normal file
View File

@ -0,0 +1,65 @@
package emote
import (
"strings"
"unicode"
)
// Emote is the information Kaiyan needs about an emote.
type Emote struct {
// ID is the emote ID per the source.
ID string
// Name is the text of the emote as would be parsed from message text.
Name string
// Source is the name of the emote source, e.g. "7TV", "Twitch:cirno_tv", &c.
Source string
// Link is a hyperlink to manage the emote.
Link string
// Image is a hyperlink to the emote image of any size.
Image string
}
// Parser finds emotes in a message.
//
// Parser assumes that emotes are bound by whitespace.
type Parser struct {
m map[string]string
// TODO(branden): more efficient data structure; trie?
}
// NewParser creates a Parser for the given list of emotes.
func NewParser(emotes ...Emote) Parser {
m := make(map[string]string, len(emotes))
for _, e := range emotes {
m[e.Name] = e.ID
}
return Parser{m}
}
// Next parses the next emote instance from the message and returns the
// remainder of the message text following it.
// If there is no emote in the message the returned emote is the empty string.
func (p Parser) Next(text string) (name, id, following string) {
for text != "" {
// First trim any existing space.
text = strings.TrimSpace(text)
// Then look for the next space.
// If there is none, this is the last word of the message; we still
// need to look it up.
k := strings.IndexFunc(text, unicode.IsSpace)
word := text
if k >= 0 {
word = text[:k]
following = text[k:]
} else {
following = ""
}
id = p.m[word]
if id != "" {
return word, id, following
}
text = following
}
// No emote found.
return "", "", ""
}

111
emote/emote_test.go Normal file
View File

@ -0,0 +1,111 @@
package emote_test
import (
"strings"
"testing"
"git.sunturtle.xyz/zephyr/kaiyan/emote"
"github.com/google/go-cmp/cmp"
)
func TestParser(t *testing.T) {
cases := []struct {
name string
emotes []string
text string
want []string
}{
{
name: "empty",
emotes: strings.Fields("bocchi ryō nijika kita"),
text: "",
want: nil,
},
{
name: "none",
emotes: nil,
text: "bocchi ryō nijika kita",
want: nil,
},
{
name: "all",
emotes: strings.Fields("bocchi ryō nijika kita"),
text: "bocchi ryō nijika kita",
want: strings.Fields("bocchi ryō nijika kita"),
},
{
name: "some",
emotes: strings.Fields("bocchi nijika"),
text: "bocchi ryō nijika kita",
want: strings.Fields("bocchi nijika"),
},
{
name: "others",
emotes: strings.Fields("ryō kita"),
text: "bocchi ryō nijika kita",
want: strings.Fields("ryō kita"),
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
// Convert the emote names to emotes proper.
e := make([]emote.Emote, len(c.emotes))
for i, w := range c.emotes {
e[i] = emote.Emote{ID: w, Name: w, Source: c.name}
}
p := emote.NewParser(e...)
var got []string
text := c.text
for {
name, id, rest := p.Next(text)
if id == "" {
break
}
if name != id {
// Not normally the case, but this test is constructed
// so that it is.
t.Errorf("wrong id %q for name %q", id, name)
}
got = append(got, name)
text = rest
}
if diff := cmp.Diff(c.want, got); diff != "" {
t.Errorf("wrong emotes (-want/+got):\n%s", diff)
}
})
}
}
func BenchmarkParser(b *testing.B) {
e := []emote.Emote{
{ID: "bocchi", Name: "bocchi"},
{ID: "ryō", Name: "ryō"},
{ID: "nijika", Name: "nijika"},
{ID: "kita", Name: "kita"},
{ID: "seika", Name: "seika"},
{ID: "kikuri", Name: "kikuri"},
{ID: "eliza", Name: "eliza"},
{ID: "shima", Name: "shima"},
}
p := emote.NewParser(e...)
texts := []struct {
name string
text string
}{
{"none-short", "none"},
{"none-long", strings.TrimSpace(strings.Repeat("none ", 100))},
{"single", "bocchi"},
{"natural", strings.TrimSpace(strings.Repeat("kessoku band is bocchi ryō nijika kita starry is seika PA-san sickhack is kikuri eliza shima ", 5))},
}
for _, c := range texts {
b.Run(c.name, func(b *testing.B) {
b.SetBytes(int64(len(c.text)))
for b.Loop() {
text := c.text
for text != "" {
_, _, text = p.Next(text)
}
}
})
}
}