Compare commits

...

3 Commits

Author SHA1 Message Date
Branden J Brown
91be2bf466 queue: add comparative benchmarks for sender 2025-04-06 11:01:55 -04:00
Branden J Brown
0bf52b3fac queue: add types for ingest -> indexer 2025-04-06 10:48:15 -04:00
Branden J Brown
844ad98142 ingest: add wire format for incoming messages 2025-04-06 09:20:32 -04:00
12 changed files with 483 additions and 0 deletions

5
go.mod
View File

@ -1,3 +1,8 @@
module git.sunturtle.xyz/zephyr/kaiyan
go 1.24.1
require (
github.com/go-json-experiment/json v0.0.0-20250223041408-d3c622f1b874
github.com/google/go-cmp v0.7.0
)

4
go.sum Normal file
View File

@ -0,0 +1,4 @@
github.com/go-json-experiment/json v0.0.0-20250223041408-d3c622f1b874 h1:F8d1AJ6M9UQCavhwmO6ZsrYLfG8zVFWfEfMS2MXPkSY=
github.com/go-json-experiment/json v0.0.0-20250223041408-d3c622f1b874/go.mod h1:TiCD2a1pcmjd7YnhGH0f/zKNcCD06B029pHhzV23c2M=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=

24
ingest/message.go Normal file
View File

@ -0,0 +1,24 @@
package ingest
// Twitch is the shape required of Twitch EventSub chat message events.
// Fields that Kaiyan does not need are dropped for efficiency.
type Twitch struct {
// Broadcaster is the broadcaster user ID.
Broadcaster string `json:"broadcaster_user_id"`
// Chatter is the user ID of the user who sent the message.
Chatter string `json:"chatter_user_id"`
// ID is the message ID.
ID string `json:"message_id"`
// Message is the message content.
Message TwitchContent `json:"message"`
// SourceBroadcaster is the user ID of the broadcaster whose chat was the
// one to which this message was sent originally, in the case of shared chat.
// If it is not a shared chat message, then this is the empty string.
SourceBroadcaster NullableString `json:"source_broadcaster_user_id"`
}
// TwitchContent is the content of a Twitch message event.
// Fields that Kaiyan does not need are dropped for efficiency.
type TwitchContent struct {
Text string `json:"text"`
}

60
ingest/message_test.go Normal file
View File

@ -0,0 +1,60 @@
package ingest_test
import (
_ "embed"
"testing"
"github.com/go-json-experiment/json"
"github.com/google/go-cmp/cmp"
"git.sunturtle.xyz/zephyr/kaiyan/ingest"
"git.sunturtle.xyz/zephyr/kaiyan/twitch"
)
func TestTwitchDecode(t *testing.T) {
cases := []struct {
name string
json string
want ingest.Twitch
}{
{
name: "message",
json: twitchMessage,
want: ingest.Twitch{
Broadcaster: "1971641",
Chatter: "4145994",
ID: "cc106a89-1814-919d-454c-f4f2f970aae7",
Message: ingest.TwitchContent{Text: "Hi chat"},
},
},
{
name: "shared",
json: twitchMessageShared,
want: ingest.Twitch{
Broadcaster: "1971641",
Chatter: "4145994",
ID: "cc106a89-1814-919d-454c-f4f2f970aae7",
Message: ingest.TwitchContent{Text: "Hi chat"},
SourceBroadcaster: "112233",
},
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
var got twitch.EventSub[ingest.Twitch]
err := json.Unmarshal([]byte(c.json), &got)
if err != nil {
t.Errorf("couldn't decode message: %v", err)
}
if diff := cmp.Diff(c.want, got.Event); diff != "" {
t.Errorf("wrong result (-want/+got):\n%s", diff)
}
})
}
}
//go:embed testdata/twitch-message.json
var twitchMessage string
//go:embed testdata/twitch-message-shared.json
var twitchMessageShared string

View File

@ -0,0 +1,72 @@
{
"subscription": {
"id": "0b7f3361-672b-4d39-b307-dd5b576c9b27",
"status": "enabled",
"type": "channel.chat.message",
"version": "1",
"condition": {
"broadcaster_user_id": "1971641",
"user_id": "2914196"
},
"transport": {
"method": "websocket",
"session_id": "AgoQHR3s6Mb4T8GFB1l3DlPfiRIGY2VsbC1h"
},
"created_at": "2023-11-06T18:11:47.492253549Z",
"cost": 0
},
"event": {
"broadcaster_user_id": "1971641",
"broadcaster_user_login": "streamer",
"broadcaster_user_name": "streamer",
"chatter_user_id": "4145994",
"chatter_user_login": "viewer32",
"chatter_user_name": "viewer32",
"message_id": "cc106a89-1814-919d-454c-f4f2f970aae7",
"message": {
"text": "Hi chat",
"fragments": [
{
"type": "text",
"text": "Hi chat",
"cheermote": null,
"emote": null,
"mention": null
}
]
},
"color": "#00FF7F",
"badges": [
{
"set_id": "moderator",
"id": "1",
"info": ""
},
{
"set_id": "subscriber",
"id": "12",
"info": "16"
},
{
"set_id": "sub-gifter",
"id": "1",
"info": ""
}
],
"message_type": "text",
"cheer": null,
"reply": null,
"channel_points_custom_reward_id": null,
"source_broadcaster_user_id": "112233",
"source_broadcaster_user_login": "streamer33",
"source_broadcaster_user_name": "streamer33",
"source_message_id": "e03f6d5d-8ec8-4c63-b473-9e5fe61e289b",
"source_badges": [
{
"set_id": "subscriber",
"id": "3",
"info": "3"
}
]
}
}

66
ingest/testdata/twitch-message.json vendored Normal file
View File

@ -0,0 +1,66 @@
{
"subscription": {
"id": "0b7f3361-672b-4d39-b307-dd5b576c9b27",
"status": "enabled",
"type": "channel.chat.message",
"version": "1",
"condition": {
"broadcaster_user_id": "1971641",
"user_id": "2914196"
},
"transport": {
"method": "websocket",
"session_id": "AgoQHR3s6Mb4T8GFB1l3DlPfiRIGY2VsbC1h"
},
"created_at": "2023-11-06T18:11:47.492253549Z",
"cost": 0
},
"event": {
"broadcaster_user_id": "1971641",
"broadcaster_user_login": "streamer",
"broadcaster_user_name": "streamer",
"chatter_user_id": "4145994",
"chatter_user_login": "viewer32",
"chatter_user_name": "viewer32",
"message_id": "cc106a89-1814-919d-454c-f4f2f970aae7",
"message": {
"text": "Hi chat",
"fragments": [
{
"type": "text",
"text": "Hi chat",
"cheermote": null,
"emote": null,
"mention": null
}
]
},
"color": "#00FF7F",
"badges": [
{
"set_id": "moderator",
"id": "1",
"info": ""
},
{
"set_id": "subscriber",
"id": "12",
"info": "16"
},
{
"set_id": "sub-gifter",
"id": "1",
"info": ""
}
],
"message_type": "text",
"cheer": null,
"reply": null,
"channel_points_custom_reward_id": null,
"source_broadcaster_user_id": null,
"source_broadcaster_user_login": null,
"source_broadcaster_user_name": null,
"source_message_id": null,
"source_badges": null
}
}

31
ingest/wire.go Normal file
View File

@ -0,0 +1,31 @@
package ingest
import (
"fmt"
"io"
"github.com/go-json-experiment/json/jsontext"
)
// NullableString is a string that decodes JSON null as the empty string.
type NullableString string
func (n *NullableString) UnmarshalJSONFrom(d *jsontext.Decoder) error {
t, err := d.ReadToken()
switch err {
case nil: // do nothing
case io.EOF:
return io.ErrUnexpectedEOF
default:
return err
}
switch t.Kind() {
case 'n':
*n = ""
case '"':
*n = NullableString(t.String())
default:
return fmt.Errorf("invalid token for nullable string %q", t.String())
}
return nil
}

45
ingest/wire_test.go Normal file
View File

@ -0,0 +1,45 @@
package ingest_test
import (
"testing"
"github.com/go-json-experiment/json"
"git.sunturtle.xyz/zephyr/kaiyan/ingest"
)
func TestNullableString(t *testing.T) {
cases := []struct {
name string
json string
want ingest.NullableString
}{
{
name: "null",
json: "null",
want: "",
},
{
name: "simple",
json: `"bocchi"`,
want: "bocchi",
},
{
name: "quotes",
json: `"\"bocchi\""`,
want: `"bocchi"`,
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
var got ingest.NullableString
err := json.Unmarshal([]byte(c.json), &got)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if got != c.want {
t.Errorf("wrong result: want %q, got %q", c.want, got)
}
})
}
}

13
queue/message.go Normal file
View File

@ -0,0 +1,13 @@
package queue
// Message is the shape of messages handed from ingest to indexers.
type Message struct {
// ID is the message ID.
ID string `json:"id"`
// Channel is an identifier for the chat channel where the message was sent.
Channel string `json:"ch"`
// Sender is an obfuscated identifier for the sending user.
Sender sender `json:"u"`
// Text is the message content.
Text string `json:"t"`
}

64
queue/sender.go Normal file
View File

@ -0,0 +1,64 @@
package queue
import (
"crypto/sha3"
"encoding/base64"
"fmt"
"io"
"github.com/go-json-experiment/json/jsontext"
)
// sender is an obfuscated sender ID.
//
// It acts approximately as a quotient type: since it is unexported, the only
// way to obtain values of type sender without unusual effort is to use the
// constructors provided by this package.
// This helps to prevent misuse that exposes actual user IDs.
type sender [28]byte
// Sender constructs an obfuscated sender ID.
// The salt must be shared by all ingesters.
// While the salt may be of any length, a length between 16 and 64 bytes is suggested.
func Sender(salt []byte, channel, user string) sender {
b := make([]byte, 128)
b = append(b, channel...)
b = append(b, 0)
b = append(b, salt...)
b = append(b, 0)
b = append(b, user...)
return sender(sha3.Sum224(b))
}
func (s sender) String() string {
b := base64.RawStdEncoding.AppendEncode(make([]byte, 0, 38), s[:])
return string(b)
}
func (s sender) MarshalJSONTo(e *jsontext.Encoder) error {
return e.WriteToken(jsontext.String(s.String()))
}
func (s *sender) UnmarshalJSONFrom(d *jsontext.Decoder) error {
t, err := d.ReadToken()
switch err {
case nil: // do nothing
case io.EOF:
return io.ErrUnexpectedEOF
default:
return err
}
e := t.String()
if t.Kind() != '"' {
return fmt.Errorf("invalid token for sender %q", e)
}
if len(e) != 38 {
return fmt.Errorf("invalid string for sender %q", e)
}
b, err := base64.RawStdEncoding.AppendDecode(make([]byte, 0, 28), []byte(e))
if err != nil {
return err
}
*s = sender(b)
return nil
}

86
queue/sender_test.go Normal file
View File

@ -0,0 +1,86 @@
package queue
import (
"io"
"math/rand/v2"
"testing"
"github.com/go-json-experiment/json"
"github.com/go-json-experiment/json/jsontext"
)
func TestSenderUnique(t *testing.T) {
// Salt, channel, and user should all produce differences in the sender ID.
base := Sender([]byte("nijika"), "kessoku", "bocchi")
salt := Sender([]byte("kita"), "kessoku", "bocchi")
channel := Sender([]byte("nijika"), "sickhack", "bocchi")
user := Sender([]byte("nijika"), "kessoku", "ryō")
m := map[sender]struct{}{
base: {},
salt: {},
channel: {},
user: {},
}
if len(m) != 4 {
t.Errorf("collision:\nbase: %v\nsalt: %v\nchan: %v\nuser: %v", base, salt, channel, user)
}
}
func TestSenderRoundTrip(t *testing.T) {
want := Sender([]byte("nijika"), "kessoku", "bocchi")
b, err := json.Marshal(want)
if err != nil {
t.Errorf("encode failed: %v", err)
}
var got sender
if err := json.Unmarshal(b, &got); err != nil {
t.Errorf("decode failed: %v", err)
}
if want != got {
t.Errorf("round-trip failed:\nwant %v\ngot %v", want, got)
}
}
func BenchmarkSenderEncode(b *testing.B) {
enc := jsontext.NewEncoder(io.Discard)
opts := []*sender{ // pointers to avoid allocations
ref(Sender([]byte("nijika"), "kessoku", "bocchi")),
ref(Sender([]byte("kita"), "kessoku", "bocchi")),
ref(Sender([]byte("nijika"), "sickhack", "bocchi")),
ref(Sender([]byte("nijika"), "kessoku", "ryō")),
ref(Sender([]byte("nijika"), "kessoku", "bocchi2")),
ref(Sender([]byte("kita"), "kessoku", "bocchi2")),
ref(Sender([]byte("nijika"), "sickhack", "bocchi2")),
ref(Sender([]byte("nijika"), "kessoku", "ryō2")),
}
b.ReportAllocs()
for b.Loop() {
json.MarshalEncode(enc, opts[rand.IntN(len(opts))])
}
}
func BenchmarkSenderDecode(b *testing.B) {
opts := [][]byte{
must(json.Marshal(Sender([]byte("nijika"), "kessoku", "bocchi"))),
must(json.Marshal(Sender([]byte("kita"), "kessoku", "bocchi"))),
must(json.Marshal(Sender([]byte("nijika"), "sickhack", "bocchi"))),
must(json.Marshal(Sender([]byte("nijika"), "kessoku", "ryō"))),
must(json.Marshal(Sender([]byte("nijika"), "kessoku", "bocchi2"))),
must(json.Marshal(Sender([]byte("kita"), "kessoku", "bocchi2"))),
must(json.Marshal(Sender([]byte("nijika"), "sickhack", "bocchi2"))),
must(json.Marshal(Sender([]byte("nijika"), "kessoku", "ryō2"))),
}
b.ReportAllocs()
var v sender
for b.Loop() {
json.Unmarshal(opts[rand.IntN(len(opts))], &v)
}
}
func ref[T any](x T) *T { return &x }
func must[T any](x T, err error) T {
if err != nil {
panic(err)
}
return x
}

13
twitch/eventsub.go Normal file
View File

@ -0,0 +1,13 @@
package twitch
// EventSub is the wire format of incoming payloads from Twitch EventSub.
type EventSub[Event any] struct {
// Subscription is fields relating to the EventSub subscription itself.
Subscription Subscription `json:"subscription"`
// Event is the event payload.
Event Event `json:"event"`
}
// Subscription is the fields of an EventSub subscription
// which are relevant to Kaiyan.
type Subscription struct{}