emote: add emote parsing engine
This commit is contained in:
parent
91be2bf466
commit
293031fe38
65
emote/emote.go
Normal file
65
emote/emote.go
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
package emote
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"unicode"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Emote is the information Kaiyan needs about an emote.
|
||||||
|
type Emote struct {
|
||||||
|
// ID is the emote ID per the source.
|
||||||
|
ID string
|
||||||
|
// Name is the text of the emote as would be parsed from message text.
|
||||||
|
Name string
|
||||||
|
// Source is the name of the emote source, e.g. "7TV", "Twitch:cirno_tv", &c.
|
||||||
|
Source string
|
||||||
|
// Link is a hyperlink to manage the emote.
|
||||||
|
Link string
|
||||||
|
// Image is a hyperlink to the emote image of any size.
|
||||||
|
Image string
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parser finds emotes in a message.
|
||||||
|
//
|
||||||
|
// Parser assumes that emotes are bound by whitespace.
|
||||||
|
type Parser struct {
|
||||||
|
m map[string]string
|
||||||
|
// TODO(branden): more efficient data structure; trie?
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewParser creates a Parser for the given list of emotes.
|
||||||
|
func NewParser(emotes ...Emote) Parser {
|
||||||
|
m := make(map[string]string, len(emotes))
|
||||||
|
for _, e := range emotes {
|
||||||
|
m[e.Name] = e.ID
|
||||||
|
}
|
||||||
|
return Parser{m}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Next parses the next emote instance from the message and returns the
|
||||||
|
// remainder of the message text following it.
|
||||||
|
// If there is no emote in the message the returned emote is the empty string.
|
||||||
|
func (p Parser) Next(text string) (name, id, following string) {
|
||||||
|
for text != "" {
|
||||||
|
// First trim any existing space.
|
||||||
|
text = strings.TrimSpace(text)
|
||||||
|
// Then look for the next space.
|
||||||
|
// If there is none, this is the last word of the message; we still
|
||||||
|
// need to look it up.
|
||||||
|
k := strings.IndexFunc(text, unicode.IsSpace)
|
||||||
|
word := text
|
||||||
|
if k >= 0 {
|
||||||
|
word = text[:k]
|
||||||
|
following = text[k:]
|
||||||
|
} else {
|
||||||
|
following = ""
|
||||||
|
}
|
||||||
|
id = p.m[word]
|
||||||
|
if id != "" {
|
||||||
|
return word, id, following
|
||||||
|
}
|
||||||
|
text = following
|
||||||
|
}
|
||||||
|
// No emote found.
|
||||||
|
return "", "", ""
|
||||||
|
}
|
111
emote/emote_test.go
Normal file
111
emote/emote_test.go
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
package emote_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"git.sunturtle.xyz/zephyr/kaiyan/emote"
|
||||||
|
"github.com/google/go-cmp/cmp"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestParser(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
emotes []string
|
||||||
|
text string
|
||||||
|
want []string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "empty",
|
||||||
|
emotes: strings.Fields("bocchi ryō nijika kita"),
|
||||||
|
text: "",
|
||||||
|
want: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "none",
|
||||||
|
emotes: nil,
|
||||||
|
text: "bocchi ryō nijika kita",
|
||||||
|
want: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "all",
|
||||||
|
emotes: strings.Fields("bocchi ryō nijika kita"),
|
||||||
|
text: "bocchi ryō nijika kita",
|
||||||
|
want: strings.Fields("bocchi ryō nijika kita"),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "some",
|
||||||
|
emotes: strings.Fields("bocchi nijika"),
|
||||||
|
text: "bocchi ryō nijika kita",
|
||||||
|
want: strings.Fields("bocchi nijika"),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "others",
|
||||||
|
emotes: strings.Fields("ryō kita"),
|
||||||
|
text: "bocchi ryō nijika kita",
|
||||||
|
want: strings.Fields("ryō kita"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, c := range cases {
|
||||||
|
t.Run(c.name, func(t *testing.T) {
|
||||||
|
// Convert the emote names to emotes proper.
|
||||||
|
e := make([]emote.Emote, len(c.emotes))
|
||||||
|
for i, w := range c.emotes {
|
||||||
|
e[i] = emote.Emote{ID: w, Name: w, Source: c.name}
|
||||||
|
}
|
||||||
|
p := emote.NewParser(e...)
|
||||||
|
var got []string
|
||||||
|
text := c.text
|
||||||
|
for {
|
||||||
|
name, id, rest := p.Next(text)
|
||||||
|
if id == "" {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if name != id {
|
||||||
|
// Not normally the case, but this test is constructed
|
||||||
|
// so that it is.
|
||||||
|
t.Errorf("wrong id %q for name %q", id, name)
|
||||||
|
}
|
||||||
|
got = append(got, name)
|
||||||
|
text = rest
|
||||||
|
}
|
||||||
|
if diff := cmp.Diff(c.want, got); diff != "" {
|
||||||
|
t.Errorf("wrong emotes (-want/+got):\n%s", diff)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkParser(b *testing.B) {
|
||||||
|
e := []emote.Emote{
|
||||||
|
{ID: "bocchi", Name: "bocchi"},
|
||||||
|
{ID: "ryō", Name: "ryō"},
|
||||||
|
{ID: "nijika", Name: "nijika"},
|
||||||
|
{ID: "kita", Name: "kita"},
|
||||||
|
{ID: "seika", Name: "seika"},
|
||||||
|
{ID: "kikuri", Name: "kikuri"},
|
||||||
|
{ID: "eliza", Name: "eliza"},
|
||||||
|
{ID: "shima", Name: "shima"},
|
||||||
|
}
|
||||||
|
p := emote.NewParser(e...)
|
||||||
|
texts := []struct {
|
||||||
|
name string
|
||||||
|
text string
|
||||||
|
}{
|
||||||
|
{"none-short", "none"},
|
||||||
|
{"none-long", strings.TrimSpace(strings.Repeat("none ", 100))},
|
||||||
|
{"single", "bocchi"},
|
||||||
|
{"natural", strings.TrimSpace(strings.Repeat("kessoku band is bocchi ryō nijika kita starry is seika PA-san sickhack is kikuri eliza shima ", 5))},
|
||||||
|
}
|
||||||
|
for _, c := range texts {
|
||||||
|
b.Run(c.name, func(b *testing.B) {
|
||||||
|
b.SetBytes(int64(len(c.text)))
|
||||||
|
for b.Loop() {
|
||||||
|
text := c.text
|
||||||
|
for text != "" {
|
||||||
|
_, _, text = p.Next(text)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user