chord/vendor/github.com/go-json-experiment/json/internal/jsonwire/wire.go
2025-03-15 20:42:37 -04:00

216 lines
6.3 KiB
Go

// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package jsonwire implements stateless functionality for handling JSON text.
package jsonwire
import (
"cmp"
"errors"
"strconv"
"strings"
"unicode"
"unicode/utf16"
"unicode/utf8"
)
// TrimSuffixWhitespace trims JSON from the end of b.
func TrimSuffixWhitespace(b []byte) []byte {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
n := len(b) - 1
for n >= 0 && (b[n] == ' ' || b[n] == '\t' || b[n] == '\r' || b[n] == '\n') {
n--
}
return b[:n+1]
}
// TrimSuffixString trims a valid JSON string at the end of b.
// The behavior is undefined if there is not a valid JSON string present.
func TrimSuffixString(b []byte) []byte {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
if len(b) > 0 && b[len(b)-1] == '"' {
b = b[:len(b)-1]
}
for len(b) >= 2 && !(b[len(b)-1] == '"' && b[len(b)-2] != '\\') {
b = b[:len(b)-1] // trim all characters except an unescaped quote
}
if len(b) > 0 && b[len(b)-1] == '"' {
b = b[:len(b)-1]
}
return b
}
// HasSuffixByte reports whether b ends with c.
func HasSuffixByte(b []byte, c byte) bool {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
return len(b) > 0 && b[len(b)-1] == c
}
// TrimSuffixByte removes c from the end of b if it is present.
func TrimSuffixByte(b []byte, c byte) []byte {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
if len(b) > 0 && b[len(b)-1] == c {
return b[:len(b)-1]
}
return b
}
// QuoteRune quotes the first rune in the input.
func QuoteRune[Bytes ~[]byte | ~string](b Bytes) string {
r, n := utf8.DecodeRuneInString(string(truncateMaxUTF8(b)))
if r == utf8.RuneError && n == 1 {
return `'\x` + strconv.FormatUint(uint64(b[0]), 16) + `'`
}
return strconv.QuoteRune(r)
}
// CompareUTF16 lexicographically compares x to y according
// to the UTF-16 codepoints of the UTF-8 encoded input strings.
// This implements the ordering specified in RFC 8785, section 3.2.3.
func CompareUTF16[Bytes ~[]byte | ~string](x, y Bytes) int {
// NOTE: This is an optimized, mostly allocation-free implementation
// of CompareUTF16Simple in wire_test.go. FuzzCompareUTF16 verifies that the
// two implementations agree on the result of comparing any two strings.
isUTF16Self := func(r rune) bool {
return ('\u0000' <= r && r <= '\uD7FF') || ('\uE000' <= r && r <= '\uFFFF')
}
for {
if len(x) == 0 || len(y) == 0 {
return cmp.Compare(len(x), len(y))
}
// ASCII fast-path.
if x[0] < utf8.RuneSelf || y[0] < utf8.RuneSelf {
if x[0] != y[0] {
return cmp.Compare(x[0], y[0])
}
x, y = x[1:], y[1:]
continue
}
// Decode next pair of runes as UTF-8.
rx, nx := utf8.DecodeRuneInString(string(truncateMaxUTF8(x)))
ry, ny := utf8.DecodeRuneInString(string(truncateMaxUTF8(y)))
selfx := isUTF16Self(rx)
selfy := isUTF16Self(ry)
switch {
// The x rune is a single UTF-16 codepoint, while
// the y rune is a surrogate pair of UTF-16 codepoints.
case selfx && !selfy:
ry, _ = utf16.EncodeRune(ry)
// The y rune is a single UTF-16 codepoint, while
// the x rune is a surrogate pair of UTF-16 codepoints.
case selfy && !selfx:
rx, _ = utf16.EncodeRune(rx)
}
if rx != ry {
return cmp.Compare(rx, ry)
}
// Check for invalid UTF-8, in which case,
// we just perform a byte-for-byte comparison.
if isInvalidUTF8(rx, nx) || isInvalidUTF8(ry, ny) {
if x[0] != y[0] {
return cmp.Compare(x[0], y[0])
}
}
x, y = x[nx:], y[ny:]
}
}
// truncateMaxUTF8 truncates b such it contains at least one rune.
//
// The utf8 package currently lacks generic variants, which complicates
// generic functions that operates on either []byte or string.
// As a hack, we always call the utf8 function operating on strings,
// but always truncate the input such that the result is identical.
//
// Example usage:
//
// utf8.DecodeRuneInString(string(truncateMaxUTF8(b)))
//
// Converting a []byte to a string is stack allocated since
// truncateMaxUTF8 guarantees that the []byte is short.
func truncateMaxUTF8[Bytes ~[]byte | ~string](b Bytes) Bytes {
// TODO(https://go.dev/issue/56948): Remove this function and
// instead directly call generic utf8 functions wherever used.
if len(b) > utf8.UTFMax {
return b[:utf8.UTFMax]
}
return b
}
// TODO(https://go.dev/issue/70547): Use utf8.ErrInvalid instead.
var ErrInvalidUTF8 = errors.New("invalid UTF-8")
func NewInvalidCharacterError[Bytes ~[]byte | ~string](prefix Bytes, where string) error {
what := QuoteRune(prefix)
return errors.New("invalid character " + what + " " + where)
}
func NewInvalidEscapeSequenceError[Bytes ~[]byte | ~string](what Bytes) error {
label := "escape sequence"
if len(what) > 6 {
label = "surrogate pair"
}
needEscape := strings.IndexFunc(string(what), func(r rune) bool {
return r == '`' || r == utf8.RuneError || unicode.IsSpace(r) || !unicode.IsPrint(r)
}) >= 0
if needEscape {
return errors.New("invalid " + label + " " + strconv.Quote(string(what)) + " in string")
} else {
return errors.New("invalid " + label + " `" + string(what) + "` in string")
}
}
// TruncatePointer optionally truncates the JSON pointer,
// enforcing that the length roughly does not exceed n.
func TruncatePointer(s string, n int) string {
if len(s) <= n {
return s
}
i := n / 2
j := len(s) - n/2
// Avoid truncating a name if there are multiple names present.
if k := strings.LastIndexByte(s[:i], '/'); k > 0 {
i = k
}
if k := strings.IndexByte(s[j:], '/'); k >= 0 {
j += k + len("/")
}
// Avoid truncation in the middle of a UTF-8 rune.
for i > 0 && isInvalidUTF8(utf8.DecodeLastRuneInString(s[:i])) {
i--
}
for j < len(s) && isInvalidUTF8(utf8.DecodeRuneInString(s[j:])) {
j++
}
// Determine the right middle fragment to use.
var middle string
switch strings.Count(s[i:j], "/") {
case 0:
middle = "…"
case 1:
middle = "…/…"
default:
middle = "…/…/…"
}
if strings.HasPrefix(s[i:j], "/") && middle != "…" {
middle = strings.TrimPrefix(middle, "…")
}
if strings.HasSuffix(s[i:j], "/") && middle != "…" {
middle = strings.TrimSuffix(middle, "…")
}
return s[:i] + middle + s[j:]
}
func isInvalidUTF8(r rune, rn int) bool {
return r == utf8.RuneError && rn == 1
}