emote: add emote parsing engine
This commit is contained in:
		
							
								
								
									
										65
									
								
								emote/emote.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								emote/emote.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,65 @@
 | 
				
			|||||||
 | 
					package emote
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import (
 | 
				
			||||||
 | 
						"strings"
 | 
				
			||||||
 | 
						"unicode"
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Emote is the information Kaiyan needs about an emote.
 | 
				
			||||||
 | 
					type Emote struct {
 | 
				
			||||||
 | 
						// ID is the emote ID per the source.
 | 
				
			||||||
 | 
						ID string
 | 
				
			||||||
 | 
						// Name is the text of the emote as would be parsed from message text.
 | 
				
			||||||
 | 
						Name string
 | 
				
			||||||
 | 
						// Source is the name of the emote source, e.g. "7TV", "Twitch:cirno_tv", &c.
 | 
				
			||||||
 | 
						Source string
 | 
				
			||||||
 | 
						// Link is a hyperlink to manage the emote.
 | 
				
			||||||
 | 
						Link string
 | 
				
			||||||
 | 
						// Image is a hyperlink to the emote image of any size.
 | 
				
			||||||
 | 
						Image string
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Parser finds emotes in a message.
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// Parser assumes that emotes are bound by whitespace.
 | 
				
			||||||
 | 
					type Parser struct {
 | 
				
			||||||
 | 
						m map[string]string
 | 
				
			||||||
 | 
						// TODO(branden): more efficient data structure; trie?
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// NewParser creates a Parser for the given list of emotes.
 | 
				
			||||||
 | 
					func NewParser(emotes ...Emote) Parser {
 | 
				
			||||||
 | 
						m := make(map[string]string, len(emotes))
 | 
				
			||||||
 | 
						for _, e := range emotes {
 | 
				
			||||||
 | 
							m[e.Name] = e.ID
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return Parser{m}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Next parses the next emote instance from the message and returns the
 | 
				
			||||||
 | 
					// remainder of the message text following it.
 | 
				
			||||||
 | 
					// If there is no emote in the message the returned emote is the empty string.
 | 
				
			||||||
 | 
					func (p Parser) Next(text string) (name, id, following string) {
 | 
				
			||||||
 | 
						for text != "" {
 | 
				
			||||||
 | 
							// First trim any existing space.
 | 
				
			||||||
 | 
							text = strings.TrimSpace(text)
 | 
				
			||||||
 | 
							// Then look for the next space.
 | 
				
			||||||
 | 
							// If there is none, this is the last word of the message; we still
 | 
				
			||||||
 | 
							// need to look it up.
 | 
				
			||||||
 | 
							k := strings.IndexFunc(text, unicode.IsSpace)
 | 
				
			||||||
 | 
							word := text
 | 
				
			||||||
 | 
							if k >= 0 {
 | 
				
			||||||
 | 
								word = text[:k]
 | 
				
			||||||
 | 
								following = text[k:]
 | 
				
			||||||
 | 
							} else {
 | 
				
			||||||
 | 
								following = ""
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							id = p.m[word]
 | 
				
			||||||
 | 
							if id != "" {
 | 
				
			||||||
 | 
								return word, id, following
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							text = following
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						// No emote found.
 | 
				
			||||||
 | 
						return "", "", ""
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										111
									
								
								emote/emote_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										111
									
								
								emote/emote_test.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,111 @@
 | 
				
			|||||||
 | 
					package emote_test
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import (
 | 
				
			||||||
 | 
						"strings"
 | 
				
			||||||
 | 
						"testing"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"git.sunturtle.xyz/zephyr/kaiyan/emote"
 | 
				
			||||||
 | 
						"github.com/google/go-cmp/cmp"
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func TestParser(t *testing.T) {
 | 
				
			||||||
 | 
						cases := []struct {
 | 
				
			||||||
 | 
							name   string
 | 
				
			||||||
 | 
							emotes []string
 | 
				
			||||||
 | 
							text   string
 | 
				
			||||||
 | 
							want   []string
 | 
				
			||||||
 | 
						}{
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								name:   "empty",
 | 
				
			||||||
 | 
								emotes: strings.Fields("bocchi ryō nijika kita"),
 | 
				
			||||||
 | 
								text:   "",
 | 
				
			||||||
 | 
								want:   nil,
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								name:   "none",
 | 
				
			||||||
 | 
								emotes: nil,
 | 
				
			||||||
 | 
								text:   "bocchi ryō nijika kita",
 | 
				
			||||||
 | 
								want:   nil,
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								name:   "all",
 | 
				
			||||||
 | 
								emotes: strings.Fields("bocchi ryō nijika kita"),
 | 
				
			||||||
 | 
								text:   "bocchi ryō nijika kita",
 | 
				
			||||||
 | 
								want:   strings.Fields("bocchi ryō nijika kita"),
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								name:   "some",
 | 
				
			||||||
 | 
								emotes: strings.Fields("bocchi nijika"),
 | 
				
			||||||
 | 
								text:   "bocchi ryō nijika kita",
 | 
				
			||||||
 | 
								want:   strings.Fields("bocchi nijika"),
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								name:   "others",
 | 
				
			||||||
 | 
								emotes: strings.Fields("ryō kita"),
 | 
				
			||||||
 | 
								text:   "bocchi ryō nijika kita",
 | 
				
			||||||
 | 
								want:   strings.Fields("ryō kita"),
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						for _, c := range cases {
 | 
				
			||||||
 | 
							t.Run(c.name, func(t *testing.T) {
 | 
				
			||||||
 | 
								// Convert the emote names to emotes proper.
 | 
				
			||||||
 | 
								e := make([]emote.Emote, len(c.emotes))
 | 
				
			||||||
 | 
								for i, w := range c.emotes {
 | 
				
			||||||
 | 
									e[i] = emote.Emote{ID: w, Name: w, Source: c.name}
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								p := emote.NewParser(e...)
 | 
				
			||||||
 | 
								var got []string
 | 
				
			||||||
 | 
								text := c.text
 | 
				
			||||||
 | 
								for {
 | 
				
			||||||
 | 
									name, id, rest := p.Next(text)
 | 
				
			||||||
 | 
									if id == "" {
 | 
				
			||||||
 | 
										break
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
									if name != id {
 | 
				
			||||||
 | 
										// Not normally the case, but this test is constructed
 | 
				
			||||||
 | 
										// so that it is.
 | 
				
			||||||
 | 
										t.Errorf("wrong id %q for name %q", id, name)
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
									got = append(got, name)
 | 
				
			||||||
 | 
									text = rest
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								if diff := cmp.Diff(c.want, got); diff != "" {
 | 
				
			||||||
 | 
									t.Errorf("wrong emotes (-want/+got):\n%s", diff)
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							})
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func BenchmarkParser(b *testing.B) {
 | 
				
			||||||
 | 
						e := []emote.Emote{
 | 
				
			||||||
 | 
							{ID: "bocchi", Name: "bocchi"},
 | 
				
			||||||
 | 
							{ID: "ryō", Name: "ryō"},
 | 
				
			||||||
 | 
							{ID: "nijika", Name: "nijika"},
 | 
				
			||||||
 | 
							{ID: "kita", Name: "kita"},
 | 
				
			||||||
 | 
							{ID: "seika", Name: "seika"},
 | 
				
			||||||
 | 
							{ID: "kikuri", Name: "kikuri"},
 | 
				
			||||||
 | 
							{ID: "eliza", Name: "eliza"},
 | 
				
			||||||
 | 
							{ID: "shima", Name: "shima"},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						p := emote.NewParser(e...)
 | 
				
			||||||
 | 
						texts := []struct {
 | 
				
			||||||
 | 
							name string
 | 
				
			||||||
 | 
							text string
 | 
				
			||||||
 | 
						}{
 | 
				
			||||||
 | 
							{"none-short", "none"},
 | 
				
			||||||
 | 
							{"none-long", strings.TrimSpace(strings.Repeat("none ", 100))},
 | 
				
			||||||
 | 
							{"single", "bocchi"},
 | 
				
			||||||
 | 
							{"natural", strings.TrimSpace(strings.Repeat("kessoku band is bocchi ryō nijika kita starry is seika PA-san sickhack is kikuri eliza shima ", 5))},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						for _, c := range texts {
 | 
				
			||||||
 | 
							b.Run(c.name, func(b *testing.B) {
 | 
				
			||||||
 | 
								b.SetBytes(int64(len(c.text)))
 | 
				
			||||||
 | 
								for b.Loop() {
 | 
				
			||||||
 | 
									text := c.text
 | 
				
			||||||
 | 
									for text != "" {
 | 
				
			||||||
 | 
										_, _, text = p.Next(text)
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							})
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Reference in New Issue
	
	Block a user