vendor dependencies

This commit is contained in:
2025-03-15 20:42:37 -04:00
parent dd693fb000
commit af65c66317
84 changed files with 19478 additions and 0 deletions

View File

@@ -0,0 +1,39 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package internal
import "errors"
// NotForPublicUse is a marker type that an API is for internal use only.
// It does not perfectly prevent usage of that API, but helps to restrict usage.
// Anything with this marker is not covered by the Go compatibility agreement.
type NotForPublicUse struct{}
// AllowInternalUse is passed from "json" to "jsontext" to authenticate
// that the caller can have access to internal functionality.
var AllowInternalUse NotForPublicUse
// Sentinel error values internally shared between jsonv1 and jsonv2.
var (
ErrCycle = errors.New("encountered a cycle")
ErrNonNilReference = errors.New("value must be passed as a non-nil pointer reference")
)
var (
// TransformMarshalError converts a v2 error into a v1 error.
// It is called only at the top-level of a Marshal function.
TransformMarshalError func(any, error) error
// NewMarshalerError constructs a jsonv1.MarshalerError.
// It is called after a user-defined Marshal method/function fails.
NewMarshalerError func(any, error, string) error
// TransformUnmarshalError converts a v2 error into a v1 error.
// It is called only at the top-level of a Unmarshal function.
TransformUnmarshalError func(any, error) error
// NewRawNumber returns new(jsonv1.Number).
NewRawNumber func() any
// RawNumberOf returns jsonv1.Number(b).
RawNumberOf func(b []byte) any
)

View File

@@ -0,0 +1,203 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// jsonflags implements all the optional boolean flags.
// These flags are shared across both "json", "jsontext", and "jsonopts".
package jsonflags
import "github.com/go-json-experiment/json/internal"
// Bools represents zero or more boolean flags, all set to true or false.
// The least-significant bit is the boolean value of all flags in the set.
// The remaining bits identify which particular flags.
//
// In common usage, this is OR'd with 0 or 1. For example:
// - (AllowInvalidUTF8 | 0) means "AllowInvalidUTF8 is false"
// - (Multiline | Indent | 1) means "Multiline and Indent are true"
type Bools uint64
func (Bools) JSONOptions(internal.NotForPublicUse) {}
const (
// AllFlags is the set of all flags.
AllFlags = AllCoderFlags | AllArshalV2Flags | AllArshalV1Flags
// AllCoderFlags is the set of all encoder/decoder flags.
AllCoderFlags = (maxCoderFlag - 1) - initFlag
// AllArshalV2Flags is the set of all v2 marshal/unmarshal flags.
AllArshalV2Flags = (maxArshalV2Flag - 1) - (maxCoderFlag - 1)
// AllArshalV1Flags is the set of all v1 marshal/unmarshal flags.
AllArshalV1Flags = (maxArshalV1Flag - 1) - (maxArshalV2Flag - 1)
// NonBooleanFlags is the set of non-boolean flags,
// where the value is some other concrete Go type.
// The value of the flag is stored within jsonopts.Struct.
NonBooleanFlags = 0 |
Indent |
IndentPrefix |
ByteLimit |
DepthLimit |
Marshalers |
Unmarshalers
// DefaultV1Flags is the set of booleans flags that default to true under
// v1 semantics. None of the non-boolean flags differ between v1 and v2.
DefaultV1Flags = 0 |
AllowDuplicateNames |
AllowInvalidUTF8 |
EscapeForHTML |
EscapeForJS |
EscapeInvalidUTF8 |
PreserveRawStrings |
Deterministic |
FormatNilMapAsNull |
FormatNilSliceAsNull |
MatchCaseInsensitiveNames |
CallMethodsWithLegacySemantics |
FormatBytesWithLegacySemantics |
FormatTimeWithLegacySemantics |
MatchCaseSensitiveDelimiter |
MergeWithLegacySemantics |
OmitEmptyWithLegacyDefinition |
ReportErrorsWithLegacySemantics |
StringifyWithLegacySemantics |
UnmarshalArrayFromAnyLength
// AnyWhitespace reports whether the encoded output might have any whitespace.
AnyWhitespace = Multiline | SpaceAfterColon | SpaceAfterComma
// WhitespaceFlags is the set of flags related to whitespace formatting.
// In contrast to AnyWhitespace, this includes Indent and IndentPrefix
// as those settings take no effect if Multiline is false.
WhitespaceFlags = AnyWhitespace | Indent | IndentPrefix
// AnyEscape is the set of flags related to escaping in a JSON string.
AnyEscape = EscapeForHTML | EscapeForJS | EscapeInvalidUTF8
// CanonicalizeNumbers is the set of flags related to raw number canonicalization.
CanonicalizeNumbers = CanonicalizeRawInts | CanonicalizeRawFloats
)
// Encoder and decoder flags.
const (
initFlag Bools = 1 << iota // reserved for the boolean value itself
AllowDuplicateNames // encode or decode
AllowInvalidUTF8 // encode or decode
WithinArshalCall // encode or decode; for internal use by json.Marshal and json.Unmarshal
OmitTopLevelNewline // encode only; for internal use by json.Marshal and json.MarshalWrite
PreserveRawStrings // encode only
CanonicalizeRawInts // encode only
CanonicalizeRawFloats // encode only
ReorderRawObjects // encode only
EscapeForHTML // encode only
EscapeForJS // encode only
EscapeInvalidUTF8 // encode only; only exposed in v1
Multiline // encode only
SpaceAfterColon // encode only
SpaceAfterComma // encode only
Indent // encode only; non-boolean flag
IndentPrefix // encode only; non-boolean flag
ByteLimit // encode or decode; non-boolean flag
DepthLimit // encode or decode; non-boolean flag
maxCoderFlag
)
// Marshal and Unmarshal flags (for v2).
const (
_ Bools = (maxCoderFlag >> 1) << iota
StringifyNumbers // marshal or unmarshal
Deterministic // marshal only
FormatNilMapAsNull // marshal only
FormatNilSliceAsNull // marshal only
OmitZeroStructFields // marshal only
MatchCaseInsensitiveNames // marshal or unmarshal
DiscardUnknownMembers // marshal only
RejectUnknownMembers // unmarshal only
Marshalers // marshal only; non-boolean flag
Unmarshalers // unmarshal only; non-boolean flag
maxArshalV2Flag
)
// Marshal and Unmarshal flags (for v1).
const (
_ Bools = (maxArshalV2Flag >> 1) << iota
CallMethodsWithLegacySemantics // marshal or unmarshal
FormatBytesWithLegacySemantics // marshal or unmarshal
FormatTimeWithLegacySemantics // marshal or unmarshal
MatchCaseSensitiveDelimiter // marshal or unmarshal
MergeWithLegacySemantics // unmarshal
OmitEmptyWithLegacyDefinition // marshal
ReportErrorsWithLegacySemantics // marshal or unmarshal
StringifyWithLegacySemantics // marshal or unmarshal
StringifyBoolsAndStrings // marshal or unmarshal; for internal use by jsonv2.makeStructArshaler
UnmarshalAnyWithRawNumber // unmarshal; for internal use by jsonv1.Decoder.UseNumber
UnmarshalArrayFromAnyLength // unmarshal
maxArshalV1Flag
)
// Flags is a set of boolean flags.
// If the presence bit is zero, then the value bit must also be zero.
// The least-significant bit of both fields is always zero.
//
// Unlike Bools, which can represent a set of bools that are all true or false,
// Flags represents a set of bools, each individually may be true or false.
type Flags struct{ Presence, Values uint64 }
// Join joins two sets of flags such that the latter takes precedence.
func (dst *Flags) Join(src Flags) {
// Copy over all source presence bits over to the destination (using OR),
// then invert the source presence bits to clear out source value (using AND-NOT),
// then copy over source value bits over to the destination (using OR).
// e.g., dst := Flags{Presence: 0b_1100_0011, Value: 0b_1000_0011}
// e.g., src := Flags{Presence: 0b_0101_1010, Value: 0b_1001_0010}
dst.Presence |= src.Presence // e.g., 0b_1100_0011 | 0b_0101_1010 -> 0b_110_11011
dst.Values &= ^src.Presence // e.g., 0b_1000_0011 & 0b_1010_0101 -> 0b_100_00001
dst.Values |= src.Values // e.g., 0b_1000_0001 | 0b_1001_0010 -> 0b_100_10011
}
// Set sets both the presence and value for the provided bool (or set of bools).
func (fs *Flags) Set(f Bools) {
// Select out the bits for the flag identifiers (everything except LSB),
// then set the presence for all the identifier bits (using OR),
// then invert the identifier bits to clear out the values (using AND-NOT),
// then copy over all the identifier bits to the value if LSB is 1.
// e.g., fs := Flags{Presence: 0b_0101_0010, Value: 0b_0001_0010}
// e.g., f := 0b_1001_0001
id := uint64(f) &^ uint64(1) // e.g., 0b_1001_0001 & 0b_1111_1110 -> 0b_1001_0000
fs.Presence |= id // e.g., 0b_0101_0010 | 0b_1001_0000 -> 0b_1101_0011
fs.Values &= ^id // e.g., 0b_0001_0010 & 0b_0110_1111 -> 0b_0000_0010
fs.Values |= uint64(f&1) * id // e.g., 0b_0000_0010 | 0b_1001_0000 -> 0b_1001_0010
}
// Get reports whether the bool (or any of the bools) is true.
// This is generally only used with a singular bool.
// The value bit of f (i.e., the LSB) is ignored.
func (fs Flags) Get(f Bools) bool {
return fs.Values&uint64(f) > 0
}
// Has reports whether the bool (or any of the bools) is set.
// The value bit of f (i.e., the LSB) is ignored.
func (fs Flags) Has(f Bools) bool {
return fs.Presence&uint64(f) > 0
}
// Clear clears both the presence and value for the provided bool or bools.
// The value bit of f (i.e., the LSB) is ignored.
func (fs *Flags) Clear(f Bools) {
// Invert f to produce a mask to clear all bits in f (using AND).
// e.g., fs := Flags{Presence: 0b_0101_0010, Value: 0b_0001_0010}
// e.g., f := 0b_0001_1000
mask := uint64(^f) // e.g., 0b_0001_1000 -> 0b_1110_0111
fs.Presence &= mask // e.g., 0b_0101_0010 & 0b_1110_0111 -> 0b_0100_0010
fs.Values &= mask // e.g., 0b_0001_0010 & 0b_1110_0111 -> 0b_0000_0010
}

View File

@@ -0,0 +1,200 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package jsonopts
import (
"github.com/go-json-experiment/json/internal"
"github.com/go-json-experiment/json/internal/jsonflags"
)
// Options is the common options type shared across json packages.
type Options interface {
// JSONOptions is exported so related json packages can implement Options.
JSONOptions(internal.NotForPublicUse)
}
// Struct is the combination of all options in struct form.
// This is efficient to pass down the call stack and to query.
type Struct struct {
Flags jsonflags.Flags
CoderValues
ArshalValues
}
type CoderValues struct {
Indent string // jsonflags.Indent
IndentPrefix string // jsonflags.IndentPrefix
ByteLimit int64 // jsonflags.ByteLimit
DepthLimit int // jsonflags.DepthLimit
}
type ArshalValues struct {
// The Marshalers and Unmarshalers fields use the any type to avoid a
// concrete dependency on *json.Marshalers and *json.Unmarshalers,
// which would in turn create a dependency on the "reflect" package.
Marshalers any // jsonflags.Marshalers
Unmarshalers any // jsonflags.Unmarshalers
Format string
FormatDepth int
}
// DefaultOptionsV2 is the set of all options that define default v2 behavior.
var DefaultOptionsV2 = Struct{
Flags: jsonflags.Flags{
Presence: uint64(jsonflags.AllFlags & ^jsonflags.WhitespaceFlags),
Values: uint64(0),
},
}
// DefaultOptionsV1 is the set of all options that define default v1 behavior.
var DefaultOptionsV1 = Struct{
Flags: jsonflags.Flags{
Presence: uint64(jsonflags.AllFlags & ^jsonflags.WhitespaceFlags),
Values: uint64(jsonflags.DefaultV1Flags),
},
}
func (*Struct) JSONOptions(internal.NotForPublicUse) {}
// GetUnknownOption is injected by the "json" package to handle Options
// declared in that package so that "jsonopts" can handle them.
var GetUnknownOption = func(*Struct, Options) (any, bool) { panic("unknown option") }
func GetOption[T any](opts Options, setter func(T) Options) (T, bool) {
// Collapse the options to *Struct to simplify lookup.
structOpts, ok := opts.(*Struct)
if !ok {
var structOpts2 Struct
structOpts2.Join(opts)
structOpts = &structOpts2
}
// Lookup the option based on the return value of the setter.
var zero T
switch opt := setter(zero).(type) {
case jsonflags.Bools:
v := structOpts.Flags.Get(opt)
ok := structOpts.Flags.Has(opt)
return any(v).(T), ok
case Indent:
if !structOpts.Flags.Has(jsonflags.Indent) {
return zero, false
}
return any(structOpts.Indent).(T), true
case IndentPrefix:
if !structOpts.Flags.Has(jsonflags.IndentPrefix) {
return zero, false
}
return any(structOpts.IndentPrefix).(T), true
case ByteLimit:
if !structOpts.Flags.Has(jsonflags.ByteLimit) {
return zero, false
}
return any(structOpts.ByteLimit).(T), true
case DepthLimit:
if !structOpts.Flags.Has(jsonflags.DepthLimit) {
return zero, false
}
return any(structOpts.DepthLimit).(T), true
default:
v, ok := GetUnknownOption(structOpts, opt)
return v.(T), ok
}
}
// JoinUnknownOption is injected by the "json" package to handle Options
// declared in that package so that "jsonopts" can handle them.
var JoinUnknownOption = func(*Struct, Options) { panic("unknown option") }
func (dst *Struct) Join(srcs ...Options) {
dst.join(false, srcs...)
}
func (dst *Struct) JoinWithoutCoderOptions(srcs ...Options) {
dst.join(true, srcs...)
}
func (dst *Struct) join(excludeCoderOptions bool, srcs ...Options) {
for _, src := range srcs {
switch src := src.(type) {
case nil:
continue
case jsonflags.Bools:
if excludeCoderOptions {
src &= ^jsonflags.AllCoderFlags
}
dst.Flags.Set(src)
case Indent:
if excludeCoderOptions {
continue
}
dst.Flags.Set(jsonflags.Multiline | jsonflags.Indent | 1)
dst.Indent = string(src)
case IndentPrefix:
if excludeCoderOptions {
continue
}
dst.Flags.Set(jsonflags.Multiline | jsonflags.IndentPrefix | 1)
dst.IndentPrefix = string(src)
case ByteLimit:
if excludeCoderOptions {
continue
}
dst.Flags.Set(jsonflags.ByteLimit | 1)
dst.ByteLimit = int64(src)
case DepthLimit:
if excludeCoderOptions {
continue
}
dst.Flags.Set(jsonflags.DepthLimit | 1)
dst.DepthLimit = int(src)
case *Struct:
srcFlags := src.Flags // shallow copy the flags
if excludeCoderOptions {
srcFlags.Clear(jsonflags.AllCoderFlags)
}
dst.Flags.Join(srcFlags)
if srcFlags.Has(jsonflags.NonBooleanFlags) {
if srcFlags.Has(jsonflags.Indent) {
dst.Indent = src.Indent
}
if srcFlags.Has(jsonflags.IndentPrefix) {
dst.IndentPrefix = src.IndentPrefix
}
if srcFlags.Has(jsonflags.ByteLimit) {
dst.ByteLimit = src.ByteLimit
}
if srcFlags.Has(jsonflags.DepthLimit) {
dst.DepthLimit = src.DepthLimit
}
if srcFlags.Has(jsonflags.Marshalers) {
dst.Marshalers = src.Marshalers
}
if srcFlags.Has(jsonflags.Unmarshalers) {
dst.Unmarshalers = src.Unmarshalers
}
}
default:
JoinUnknownOption(dst, src)
}
}
}
type (
Indent string // jsontext.WithIndent
IndentPrefix string // jsontext.WithIndentPrefix
ByteLimit int64 // jsontext.WithByteLimit
DepthLimit int // jsontext.WithDepthLimit
// type for jsonflags.Marshalers declared in "json" package
// type for jsonflags.Unmarshalers declared in "json" package
)
func (Indent) JSONOptions(internal.NotForPublicUse) {}
func (IndentPrefix) JSONOptions(internal.NotForPublicUse) {}
func (ByteLimit) JSONOptions(internal.NotForPublicUse) {}
func (DepthLimit) JSONOptions(internal.NotForPublicUse) {}

View File

@@ -0,0 +1,627 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package jsonwire
import (
"io"
"math"
"slices"
"strconv"
"unicode/utf16"
"unicode/utf8"
)
type ValueFlags uint
const (
_ ValueFlags = (1 << iota) / 2 // powers of two starting with zero
stringNonVerbatim // string cannot be naively treated as valid UTF-8
stringNonCanonical // string not formatted according to RFC 8785, section 3.2.2.2.
// TODO: Track whether a number is a non-integer?
)
func (f *ValueFlags) Join(f2 ValueFlags) { *f |= f2 }
func (f ValueFlags) IsVerbatim() bool { return f&stringNonVerbatim == 0 }
func (f ValueFlags) IsCanonical() bool { return f&stringNonCanonical == 0 }
// ConsumeWhitespace consumes leading JSON whitespace per RFC 7159, section 2.
func ConsumeWhitespace(b []byte) (n int) {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
for len(b) > n && (b[n] == ' ' || b[n] == '\t' || b[n] == '\r' || b[n] == '\n') {
n++
}
return n
}
// ConsumeNull consumes the next JSON null literal per RFC 7159, section 3.
// It returns 0 if it is invalid, in which case consumeLiteral should be used.
func ConsumeNull(b []byte) int {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
const literal = "null"
if len(b) >= len(literal) && string(b[:len(literal)]) == literal {
return len(literal)
}
return 0
}
// ConsumeFalse consumes the next JSON false literal per RFC 7159, section 3.
// It returns 0 if it is invalid, in which case consumeLiteral should be used.
func ConsumeFalse(b []byte) int {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
const literal = "false"
if len(b) >= len(literal) && string(b[:len(literal)]) == literal {
return len(literal)
}
return 0
}
// ConsumeTrue consumes the next JSON true literal per RFC 7159, section 3.
// It returns 0 if it is invalid, in which case consumeLiteral should be used.
func ConsumeTrue(b []byte) int {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
const literal = "true"
if len(b) >= len(literal) && string(b[:len(literal)]) == literal {
return len(literal)
}
return 0
}
// ConsumeLiteral consumes the next JSON literal per RFC 7159, section 3.
// If the input appears truncated, it returns io.ErrUnexpectedEOF.
func ConsumeLiteral(b []byte, lit string) (n int, err error) {
for i := 0; i < len(b) && i < len(lit); i++ {
if b[i] != lit[i] {
return i, NewInvalidCharacterError(b[i:], "in literal "+lit+" (expecting "+strconv.QuoteRune(rune(lit[i]))+")")
}
}
if len(b) < len(lit) {
return len(b), io.ErrUnexpectedEOF
}
return len(lit), nil
}
// ConsumeSimpleString consumes the next JSON string per RFC 7159, section 7
// but is limited to the grammar for an ASCII string without escape sequences.
// It returns 0 if it is invalid or more complicated than a simple string,
// in which case consumeString should be called.
//
// It rejects '<', '>', and '&' for compatibility reasons since these were
// always escaped in the v1 implementation. Thus, if this function reports
// non-zero then we know that the string would be encoded the same way
// under both v1 or v2 escape semantics.
func ConsumeSimpleString(b []byte) (n int) {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
if len(b) > 0 && b[0] == '"' {
n++
for len(b) > n && b[n] < utf8.RuneSelf && escapeASCII[b[n]] == 0 {
n++
}
if uint(len(b)) > uint(n) && b[n] == '"' {
n++
return n
}
}
return 0
}
// ConsumeString consumes the next JSON string per RFC 7159, section 7.
// If validateUTF8 is false, then this allows the presence of invalid UTF-8
// characters within the string itself.
// It reports the number of bytes consumed and whether an error was encountered.
// If the input appears truncated, it returns io.ErrUnexpectedEOF.
func ConsumeString(flags *ValueFlags, b []byte, validateUTF8 bool) (n int, err error) {
return ConsumeStringResumable(flags, b, 0, validateUTF8)
}
// ConsumeStringResumable is identical to consumeString but supports resuming
// from a previous call that returned io.ErrUnexpectedEOF.
func ConsumeStringResumable(flags *ValueFlags, b []byte, resumeOffset int, validateUTF8 bool) (n int, err error) {
// Consume the leading double quote.
switch {
case resumeOffset > 0:
n = resumeOffset // already handled the leading quote
case uint(len(b)) == 0:
return n, io.ErrUnexpectedEOF
case b[0] == '"':
n++
default:
return n, NewInvalidCharacterError(b[n:], `at start of string (expecting '"')`)
}
// Consume every character in the string.
for uint(len(b)) > uint(n) {
// Optimize for long sequences of unescaped characters.
noEscape := func(c byte) bool {
return c < utf8.RuneSelf && ' ' <= c && c != '\\' && c != '"'
}
for uint(len(b)) > uint(n) && noEscape(b[n]) {
n++
}
if uint(len(b)) <= uint(n) {
return n, io.ErrUnexpectedEOF
}
// Check for terminating double quote.
if b[n] == '"' {
n++
return n, nil
}
switch r, rn := utf8.DecodeRune(b[n:]); {
// Handle UTF-8 encoded byte sequence.
// Due to specialized handling of ASCII above, we know that
// all normal sequences at this point must be 2 bytes or larger.
case rn > 1:
n += rn
// Handle escape sequence.
case r == '\\':
flags.Join(stringNonVerbatim)
resumeOffset = n
if uint(len(b)) < uint(n+2) {
return resumeOffset, io.ErrUnexpectedEOF
}
switch r := b[n+1]; r {
case '/':
// Forward slash is the only character with 3 representations.
// Per RFC 8785, section 3.2.2.2., this must not be escaped.
flags.Join(stringNonCanonical)
n += 2
case '"', '\\', 'b', 'f', 'n', 'r', 't':
n += 2
case 'u':
if uint(len(b)) < uint(n+6) {
if hasEscapedUTF16Prefix(b[n:], false) {
return resumeOffset, io.ErrUnexpectedEOF
}
flags.Join(stringNonCanonical)
return n, NewInvalidEscapeSequenceError(b[n:])
}
v1, ok := parseHexUint16(b[n+2 : n+6])
if !ok {
flags.Join(stringNonCanonical)
return n, NewInvalidEscapeSequenceError(b[n : n+6])
}
// Only certain control characters can use the \uFFFF notation
// for canonical formatting (per RFC 8785, section 3.2.2.2.).
switch v1 {
// \uFFFF notation not permitted for these characters.
case '\b', '\f', '\n', '\r', '\t':
flags.Join(stringNonCanonical)
default:
// \uFFFF notation only permitted for control characters.
if v1 >= ' ' {
flags.Join(stringNonCanonical)
} else {
// \uFFFF notation must be lower case.
for _, c := range b[n+2 : n+6] {
if 'A' <= c && c <= 'F' {
flags.Join(stringNonCanonical)
}
}
}
}
n += 6
r := rune(v1)
if validateUTF8 && utf16.IsSurrogate(r) {
if uint(len(b)) < uint(n+6) {
if hasEscapedUTF16Prefix(b[n:], true) {
return resumeOffset, io.ErrUnexpectedEOF
}
flags.Join(stringNonCanonical)
return n - 6, NewInvalidEscapeSequenceError(b[n-6:])
} else if v2, ok := parseHexUint16(b[n+2 : n+6]); b[n] != '\\' || b[n+1] != 'u' || !ok {
flags.Join(stringNonCanonical)
return n - 6, NewInvalidEscapeSequenceError(b[n-6 : n+6])
} else if r = utf16.DecodeRune(rune(v1), rune(v2)); r == utf8.RuneError {
flags.Join(stringNonCanonical)
return n - 6, NewInvalidEscapeSequenceError(b[n-6 : n+6])
} else {
n += 6
}
}
default:
flags.Join(stringNonCanonical)
return n, NewInvalidEscapeSequenceError(b[n : n+2])
}
// Handle invalid UTF-8.
case r == utf8.RuneError:
if !utf8.FullRune(b[n:]) {
return n, io.ErrUnexpectedEOF
}
flags.Join(stringNonVerbatim | stringNonCanonical)
if validateUTF8 {
return n, ErrInvalidUTF8
}
n++
// Handle invalid control characters.
case r < ' ':
flags.Join(stringNonVerbatim | stringNonCanonical)
return n, NewInvalidCharacterError(b[n:], "in string (expecting non-control character)")
default:
panic("BUG: unhandled character " + QuoteRune(b[n:]))
}
}
return n, io.ErrUnexpectedEOF
}
// AppendUnquote appends the unescaped form of a JSON string in src to dst.
// Any invalid UTF-8 within the string will be replaced with utf8.RuneError,
// but the error will be specified as having encountered such an error.
// The input must be an entire JSON string with no surrounding whitespace.
func AppendUnquote[Bytes ~[]byte | ~string](dst []byte, src Bytes) (v []byte, err error) {
dst = slices.Grow(dst, len(src))
// Consume the leading double quote.
var i, n int
switch {
case uint(len(src)) == 0:
return dst, io.ErrUnexpectedEOF
case src[0] == '"':
i, n = 1, 1
default:
return dst, NewInvalidCharacterError(src, `at start of string (expecting '"')`)
}
// Consume every character in the string.
for uint(len(src)) > uint(n) {
// Optimize for long sequences of unescaped characters.
noEscape := func(c byte) bool {
return c < utf8.RuneSelf && ' ' <= c && c != '\\' && c != '"'
}
for uint(len(src)) > uint(n) && noEscape(src[n]) {
n++
}
if uint(len(src)) <= uint(n) {
dst = append(dst, src[i:n]...)
return dst, io.ErrUnexpectedEOF
}
// Check for terminating double quote.
if src[n] == '"' {
dst = append(dst, src[i:n]...)
n++
if n < len(src) {
err = NewInvalidCharacterError(src[n:], "after string value")
}
return dst, err
}
switch r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:]))); {
// Handle UTF-8 encoded byte sequence.
// Due to specialized handling of ASCII above, we know that
// all normal sequences at this point must be 2 bytes or larger.
case rn > 1:
n += rn
// Handle escape sequence.
case r == '\\':
dst = append(dst, src[i:n]...)
// Handle escape sequence.
if uint(len(src)) < uint(n+2) {
return dst, io.ErrUnexpectedEOF
}
switch r := src[n+1]; r {
case '"', '\\', '/':
dst = append(dst, r)
n += 2
case 'b':
dst = append(dst, '\b')
n += 2
case 'f':
dst = append(dst, '\f')
n += 2
case 'n':
dst = append(dst, '\n')
n += 2
case 'r':
dst = append(dst, '\r')
n += 2
case 't':
dst = append(dst, '\t')
n += 2
case 'u':
if uint(len(src)) < uint(n+6) {
if hasEscapedUTF16Prefix(src[n:], false) {
return dst, io.ErrUnexpectedEOF
}
return dst, NewInvalidEscapeSequenceError(src[n:])
}
v1, ok := parseHexUint16(src[n+2 : n+6])
if !ok {
return dst, NewInvalidEscapeSequenceError(src[n : n+6])
}
n += 6
// Check whether this is a surrogate half.
r := rune(v1)
if utf16.IsSurrogate(r) {
r = utf8.RuneError // assume failure unless the following succeeds
if uint(len(src)) < uint(n+6) {
if hasEscapedUTF16Prefix(src[n:], true) {
return utf8.AppendRune(dst, r), io.ErrUnexpectedEOF
}
err = NewInvalidEscapeSequenceError(src[n-6:])
} else if v2, ok := parseHexUint16(src[n+2 : n+6]); src[n] != '\\' || src[n+1] != 'u' || !ok {
err = NewInvalidEscapeSequenceError(src[n-6 : n+6])
} else if r = utf16.DecodeRune(rune(v1), rune(v2)); r == utf8.RuneError {
err = NewInvalidEscapeSequenceError(src[n-6 : n+6])
} else {
n += 6
}
}
dst = utf8.AppendRune(dst, r)
default:
return dst, NewInvalidEscapeSequenceError(src[n : n+2])
}
i = n
// Handle invalid UTF-8.
case r == utf8.RuneError:
dst = append(dst, src[i:n]...)
if !utf8.FullRuneInString(string(truncateMaxUTF8(src[n:]))) {
return dst, io.ErrUnexpectedEOF
}
// NOTE: An unescaped string may be longer than the escaped string
// because invalid UTF-8 bytes are being replaced.
dst = append(dst, "\uFFFD"...)
n += rn
i = n
err = ErrInvalidUTF8
// Handle invalid control characters.
case r < ' ':
dst = append(dst, src[i:n]...)
return dst, NewInvalidCharacterError(src[n:], "in string (expecting non-control character)")
default:
panic("BUG: unhandled character " + QuoteRune(src[n:]))
}
}
dst = append(dst, src[i:n]...)
return dst, io.ErrUnexpectedEOF
}
// hasEscapedUTF16Prefix reports whether b is possibly
// the truncated prefix of a \uFFFF escape sequence.
func hasEscapedUTF16Prefix[Bytes ~[]byte | ~string](b Bytes, lowerSurrogateHalf bool) bool {
for i := range len(b) {
switch c := b[i]; {
case i == 0 && c != '\\':
return false
case i == 1 && c != 'u':
return false
case i == 2 && lowerSurrogateHalf && c != 'd' && c != 'D':
return false // not within ['\uDC00':'\uDFFF']
case i == 3 && lowerSurrogateHalf && !('c' <= c && c <= 'f') && !('C' <= c && c <= 'F'):
return false // not within ['\uDC00':'\uDFFF']
case i >= 2 && i < 6 && !('0' <= c && c <= '9') && !('a' <= c && c <= 'f') && !('A' <= c && c <= 'F'):
return false
}
}
return true
}
// UnquoteMayCopy returns the unescaped form of b.
// If there are no escaped characters, the output is simply a subslice of
// the input with the surrounding quotes removed.
// Otherwise, a new buffer is allocated for the output.
// It assumes the input is valid.
func UnquoteMayCopy(b []byte, isVerbatim bool) []byte {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
if isVerbatim {
return b[len(`"`) : len(b)-len(`"`)]
}
b, _ = AppendUnquote(nil, b)
return b
}
// ConsumeSimpleNumber consumes the next JSON number per RFC 7159, section 6
// but is limited to the grammar for a positive integer.
// It returns 0 if it is invalid or more complicated than a simple integer,
// in which case consumeNumber should be called.
func ConsumeSimpleNumber(b []byte) (n int) {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
if len(b) > 0 {
if b[0] == '0' {
n++
} else if '1' <= b[0] && b[0] <= '9' {
n++
for len(b) > n && ('0' <= b[n] && b[n] <= '9') {
n++
}
} else {
return 0
}
if uint(len(b)) <= uint(n) || (b[n] != '.' && b[n] != 'e' && b[n] != 'E') {
return n
}
}
return 0
}
type ConsumeNumberState uint
const (
consumeNumberInit ConsumeNumberState = iota
beforeIntegerDigits
withinIntegerDigits
beforeFractionalDigits
withinFractionalDigits
beforeExponentDigits
withinExponentDigits
)
// ConsumeNumber consumes the next JSON number per RFC 7159, section 6.
// It reports the number of bytes consumed and whether an error was encountered.
// If the input appears truncated, it returns io.ErrUnexpectedEOF.
//
// Note that JSON numbers are not self-terminating.
// If the entire input is consumed, then the caller needs to consider whether
// there may be subsequent unread data that may still be part of this number.
func ConsumeNumber(b []byte) (n int, err error) {
n, _, err = ConsumeNumberResumable(b, 0, consumeNumberInit)
return n, err
}
// ConsumeNumberResumable is identical to consumeNumber but supports resuming
// from a previous call that returned io.ErrUnexpectedEOF.
func ConsumeNumberResumable(b []byte, resumeOffset int, state ConsumeNumberState) (n int, _ ConsumeNumberState, err error) {
// Jump to the right state when resuming from a partial consumption.
n = resumeOffset
if state > consumeNumberInit {
switch state {
case withinIntegerDigits, withinFractionalDigits, withinExponentDigits:
// Consume leading digits.
for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
n++
}
if uint(len(b)) <= uint(n) {
return n, state, nil // still within the same state
}
state++ // switches "withinX" to "beforeY" where Y is the state after X
}
switch state {
case beforeIntegerDigits:
goto beforeInteger
case beforeFractionalDigits:
goto beforeFractional
case beforeExponentDigits:
goto beforeExponent
default:
return n, state, nil
}
}
// Consume required integer component (with optional minus sign).
beforeInteger:
resumeOffset = n
if uint(len(b)) > 0 && b[0] == '-' {
n++
}
switch {
case uint(len(b)) <= uint(n):
return resumeOffset, beforeIntegerDigits, io.ErrUnexpectedEOF
case b[n] == '0':
n++
state = beforeFractionalDigits
case '1' <= b[n] && b[n] <= '9':
n++
for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
n++
}
state = withinIntegerDigits
default:
return n, state, NewInvalidCharacterError(b[n:], "in number (expecting digit)")
}
// Consume optional fractional component.
beforeFractional:
if uint(len(b)) > uint(n) && b[n] == '.' {
resumeOffset = n
n++
switch {
case uint(len(b)) <= uint(n):
return resumeOffset, beforeFractionalDigits, io.ErrUnexpectedEOF
case '0' <= b[n] && b[n] <= '9':
n++
default:
return n, state, NewInvalidCharacterError(b[n:], "in number (expecting digit)")
}
for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
n++
}
state = withinFractionalDigits
}
// Consume optional exponent component.
beforeExponent:
if uint(len(b)) > uint(n) && (b[n] == 'e' || b[n] == 'E') {
resumeOffset = n
n++
if uint(len(b)) > uint(n) && (b[n] == '-' || b[n] == '+') {
n++
}
switch {
case uint(len(b)) <= uint(n):
return resumeOffset, beforeExponentDigits, io.ErrUnexpectedEOF
case '0' <= b[n] && b[n] <= '9':
n++
default:
return n, state, NewInvalidCharacterError(b[n:], "in number (expecting digit)")
}
for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
n++
}
state = withinExponentDigits
}
return n, state, nil
}
// parseHexUint16 is similar to strconv.ParseUint,
// but operates directly on []byte and is optimized for base-16.
// See https://go.dev/issue/42429.
func parseHexUint16[Bytes ~[]byte | ~string](b Bytes) (v uint16, ok bool) {
if len(b) != 4 {
return 0, false
}
for i := range 4 {
c := b[i]
switch {
case '0' <= c && c <= '9':
c = c - '0'
case 'a' <= c && c <= 'f':
c = 10 + c - 'a'
case 'A' <= c && c <= 'F':
c = 10 + c - 'A'
default:
return 0, false
}
v = v*16 + uint16(c)
}
return v, true
}
// ParseUint parses b as a decimal unsigned integer according to
// a strict subset of the JSON number grammar, returning the value if valid.
// It returns (0, false) if there is a syntax error and
// returns (math.MaxUint64, false) if there is an overflow.
func ParseUint(b []byte) (v uint64, ok bool) {
const unsafeWidth = 20 // len(fmt.Sprint(uint64(math.MaxUint64)))
var n int
for ; len(b) > n && ('0' <= b[n] && b[n] <= '9'); n++ {
v = 10*v + uint64(b[n]-'0')
}
switch {
case n == 0 || len(b) != n || (b[0] == '0' && string(b) != "0"):
return 0, false
case n >= unsafeWidth && (b[0] != '1' || v < 1e19 || n > unsafeWidth):
return math.MaxUint64, false
}
return v, true
}
// ParseFloat parses a floating point number according to the Go float grammar.
// Note that the JSON number grammar is a strict subset.
//
// If the number overflows the finite representation of a float,
// then we return MaxFloat since any finite value will always be infinitely
// more accurate at representing another finite value than an infinite value.
func ParseFloat(b []byte, bits int) (v float64, ok bool) {
fv, err := strconv.ParseFloat(string(b), bits)
if math.IsInf(fv, 0) {
switch {
case bits == 32 && math.IsInf(fv, +1):
fv = +math.MaxFloat32
case bits == 64 && math.IsInf(fv, +1):
fv = +math.MaxFloat64
case bits == 32 && math.IsInf(fv, -1):
fv = -math.MaxFloat32
case bits == 64 && math.IsInf(fv, -1):
fv = -math.MaxFloat64
}
}
return fv, err == nil
}

View File

@@ -0,0 +1,292 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package jsonwire
import (
"math"
"slices"
"strconv"
"unicode/utf16"
"unicode/utf8"
"github.com/go-json-experiment/json/internal/jsonflags"
)
// escapeASCII reports whether the ASCII character needs to be escaped.
// It conservatively assumes EscapeForHTML.
var escapeASCII = [...]uint8{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // escape control characters
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // escape control characters
0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // escape '"' and '&'
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, // escape '<' and '>'
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // escape '\\'
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
}
// NeedEscape reports whether src needs escaping of any characters.
// It conservatively assumes EscapeForHTML and EscapeForJS.
// It reports true for inputs with invalid UTF-8.
func NeedEscape[Bytes ~[]byte | ~string](src Bytes) bool {
var i int
for uint(len(src)) > uint(i) {
if c := src[i]; c < utf8.RuneSelf {
if escapeASCII[c] > 0 {
return true
}
i++
} else {
r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[i:])))
if r == utf8.RuneError || r == '\u2028' || r == '\u2029' {
return true
}
i += rn
}
}
return false
}
// AppendQuote appends src to dst as a JSON string per RFC 7159, section 7.
//
// It takes in flags and respects the following:
// - EscapeForHTML escapes '<', '>', and '&'.
// - EscapeForJS escapes '\u2028' and '\u2029'.
// - AllowInvalidUTF8 avoids reporting an error for invalid UTF-8.
//
// Regardless of whether AllowInvalidUTF8 is specified,
// invalid bytes are replaced with the Unicode replacement character ('\ufffd').
// If no escape flags are set, then the shortest representable form is used,
// which is also the canonical form for strings (RFC 8785, section 3.2.2.2).
func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes, flags *jsonflags.Flags) ([]byte, error) {
var i, n int
var hasInvalidUTF8 bool
dst = slices.Grow(dst, len(`"`)+len(src)+len(`"`))
dst = append(dst, '"')
for uint(len(src)) > uint(n) {
if c := src[n]; c < utf8.RuneSelf {
// Handle single-byte ASCII.
n++
if escapeASCII[c] == 0 {
continue // no escaping possibly needed
}
// Handle escaping of single-byte ASCII.
if !(c == '<' || c == '>' || c == '&') || flags.Get(jsonflags.EscapeForHTML) {
dst = append(dst, src[i:n-1]...)
dst = appendEscapedASCII(dst, c)
i = n
}
} else {
// Handle multi-byte Unicode.
r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:])))
n += rn
if r != utf8.RuneError && r != '\u2028' && r != '\u2029' {
continue // no escaping possibly needed
}
// Handle escaping of multi-byte Unicode.
switch {
case isInvalidUTF8(r, rn):
hasInvalidUTF8 = true
dst = append(dst, src[i:n-rn]...)
if flags.Get(jsonflags.EscapeInvalidUTF8) {
dst = append(dst, `\ufffd`...)
} else {
dst = append(dst, "\ufffd"...)
}
i = n
case (r == '\u2028' || r == '\u2029') && flags.Get(jsonflags.EscapeForJS):
dst = append(dst, src[i:n-rn]...)
dst = appendEscapedUnicode(dst, r)
i = n
}
}
}
dst = append(dst, src[i:n]...)
dst = append(dst, '"')
if hasInvalidUTF8 && !flags.Get(jsonflags.AllowInvalidUTF8) {
return dst, ErrInvalidUTF8
}
return dst, nil
}
func appendEscapedASCII(dst []byte, c byte) []byte {
switch c {
case '"', '\\':
dst = append(dst, '\\', c)
case '\b':
dst = append(dst, "\\b"...)
case '\f':
dst = append(dst, "\\f"...)
case '\n':
dst = append(dst, "\\n"...)
case '\r':
dst = append(dst, "\\r"...)
case '\t':
dst = append(dst, "\\t"...)
default:
dst = appendEscapedUTF16(dst, uint16(c))
}
return dst
}
func appendEscapedUnicode(dst []byte, r rune) []byte {
if r1, r2 := utf16.EncodeRune(r); r1 != '\ufffd' && r2 != '\ufffd' {
dst = appendEscapedUTF16(dst, uint16(r1))
dst = appendEscapedUTF16(dst, uint16(r2))
} else {
dst = appendEscapedUTF16(dst, uint16(r))
}
return dst
}
func appendEscapedUTF16(dst []byte, x uint16) []byte {
const hex = "0123456789abcdef"
return append(dst, '\\', 'u', hex[(x>>12)&0xf], hex[(x>>8)&0xf], hex[(x>>4)&0xf], hex[(x>>0)&0xf])
}
// ReformatString consumes a JSON string from src and appends it to dst,
// reformatting it if necessary according to the specified flags.
// It returns the appended output and the number of consumed input bytes.
func ReformatString(dst, src []byte, flags *jsonflags.Flags) ([]byte, int, error) {
// TODO: Should this update ValueFlags as input?
var valFlags ValueFlags
n, err := ConsumeString(&valFlags, src, !flags.Get(jsonflags.AllowInvalidUTF8))
if err != nil {
return dst, n, err
}
// If the output requires no special escapes, and the input
// is already in canonical form or should be preserved verbatim,
// then directly copy the input to the output.
if !flags.Get(jsonflags.AnyEscape) &&
(valFlags.IsCanonical() || flags.Get(jsonflags.PreserveRawStrings)) {
dst = append(dst, src[:n]...) // copy the string verbatim
return dst, n, nil
}
// Under [jsonflags.PreserveRawStrings], any pre-escaped sequences
// remain escaped, however we still need to respect the
// [jsonflags.EscapeForHTML] and [jsonflags.EscapeForJS] options.
if flags.Get(jsonflags.PreserveRawStrings) {
var i, lastAppendIndex int
for i < n {
if c := src[i]; c < utf8.RuneSelf {
if (c == '<' || c == '>' || c == '&') && flags.Get(jsonflags.EscapeForHTML) {
dst = append(dst, src[lastAppendIndex:i]...)
dst = appendEscapedASCII(dst, c)
lastAppendIndex = i + 1
}
i++
} else {
r, rn := utf8.DecodeRune(truncateMaxUTF8(src[i:]))
if (r == '\u2028' || r == '\u2029') && flags.Get(jsonflags.EscapeForJS) {
dst = append(dst, src[lastAppendIndex:i]...)
dst = appendEscapedUnicode(dst, r)
lastAppendIndex = i + rn
}
i += rn
}
}
return append(dst, src[lastAppendIndex:n]...), n, nil
}
// The input contains characters that might need escaping,
// unnecessary escape sequences, or invalid UTF-8.
// Perform a round-trip unquote and quote to properly reformat
// these sequences according the current flags.
b, _ := AppendUnquote(nil, src[:n])
dst, _ = AppendQuote(dst, b, flags)
return dst, n, nil
}
// AppendFloat appends src to dst as a JSON number per RFC 7159, section 6.
// It formats numbers similar to the ES6 number-to-string conversion.
// See https://go.dev/issue/14135.
//
// The output is identical to ECMA-262, 6th edition, section 7.1.12.1 and with
// RFC 8785, section 3.2.2.3 for 64-bit floating-point numbers except for -0,
// which is formatted as -0 instead of just 0.
//
// For 32-bit floating-point numbers,
// the output is a 32-bit equivalent of the algorithm.
// Note that ECMA-262 specifies no algorithm for 32-bit numbers.
func AppendFloat(dst []byte, src float64, bits int) []byte {
if bits == 32 {
src = float64(float32(src))
}
abs := math.Abs(src)
fmt := byte('f')
if abs != 0 {
if bits == 64 && (float64(abs) < 1e-6 || float64(abs) >= 1e21) ||
bits == 32 && (float32(abs) < 1e-6 || float32(abs) >= 1e21) {
fmt = 'e'
}
}
dst = strconv.AppendFloat(dst, src, fmt, -1, bits)
if fmt == 'e' {
// Clean up e-09 to e-9.
n := len(dst)
if n >= 4 && dst[n-4] == 'e' && dst[n-3] == '-' && dst[n-2] == '0' {
dst[n-2] = dst[n-1]
dst = dst[:n-1]
}
}
return dst
}
// ReformatNumber consumes a JSON string from src and appends it to dst,
// canonicalizing it if specified.
// It returns the appended output and the number of consumed input bytes.
func ReformatNumber(dst, src []byte, flags *jsonflags.Flags) ([]byte, int, error) {
n, err := ConsumeNumber(src)
if err != nil {
return dst, n, err
}
if !flags.Get(jsonflags.CanonicalizeNumbers) {
dst = append(dst, src[:n]...) // copy the number verbatim
return dst, n, nil
}
// Identify the kind of number.
var isFloat bool
for _, c := range src[:n] {
if c == '.' || c == 'e' || c == 'E' {
isFloat = true // has fraction or exponent
break
}
}
// Check if need to canonicalize this kind of number.
switch {
case string(src[:n]) == "-0":
break // canonicalize -0 as 0 regardless of kind
case isFloat:
if !flags.Get(jsonflags.CanonicalizeRawFloats) {
dst = append(dst, src[:n]...) // copy the number verbatim
return dst, n, nil
}
default:
// As an optimization, we can copy integer numbers below 2⁵³ verbatim
// since the canonical form is always identical.
const maxExactIntegerDigits = 16 // len(strconv.AppendUint(nil, 1<<53, 10))
if !flags.Get(jsonflags.CanonicalizeRawInts) || n < maxExactIntegerDigits {
dst = append(dst, src[:n]...) // copy the number verbatim
return dst, n, nil
}
}
// Parse and reformat the number (which uses a canonical format).
fv, _ := strconv.ParseFloat(string(src[:n]), 64)
switch {
case fv == 0:
fv = 0 // normalize negative zero as just zero
case math.IsInf(fv, +1):
fv = +math.MaxFloat64
case math.IsInf(fv, -1):
fv = -math.MaxFloat64
}
return AppendFloat(dst, fv, 64), n, nil
}

View File

@@ -0,0 +1,215 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package jsonwire implements stateless functionality for handling JSON text.
package jsonwire
import (
"cmp"
"errors"
"strconv"
"strings"
"unicode"
"unicode/utf16"
"unicode/utf8"
)
// TrimSuffixWhitespace trims JSON from the end of b.
func TrimSuffixWhitespace(b []byte) []byte {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
n := len(b) - 1
for n >= 0 && (b[n] == ' ' || b[n] == '\t' || b[n] == '\r' || b[n] == '\n') {
n--
}
return b[:n+1]
}
// TrimSuffixString trims a valid JSON string at the end of b.
// The behavior is undefined if there is not a valid JSON string present.
func TrimSuffixString(b []byte) []byte {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
if len(b) > 0 && b[len(b)-1] == '"' {
b = b[:len(b)-1]
}
for len(b) >= 2 && !(b[len(b)-1] == '"' && b[len(b)-2] != '\\') {
b = b[:len(b)-1] // trim all characters except an unescaped quote
}
if len(b) > 0 && b[len(b)-1] == '"' {
b = b[:len(b)-1]
}
return b
}
// HasSuffixByte reports whether b ends with c.
func HasSuffixByte(b []byte, c byte) bool {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
return len(b) > 0 && b[len(b)-1] == c
}
// TrimSuffixByte removes c from the end of b if it is present.
func TrimSuffixByte(b []byte, c byte) []byte {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
if len(b) > 0 && b[len(b)-1] == c {
return b[:len(b)-1]
}
return b
}
// QuoteRune quotes the first rune in the input.
func QuoteRune[Bytes ~[]byte | ~string](b Bytes) string {
r, n := utf8.DecodeRuneInString(string(truncateMaxUTF8(b)))
if r == utf8.RuneError && n == 1 {
return `'\x` + strconv.FormatUint(uint64(b[0]), 16) + `'`
}
return strconv.QuoteRune(r)
}
// CompareUTF16 lexicographically compares x to y according
// to the UTF-16 codepoints of the UTF-8 encoded input strings.
// This implements the ordering specified in RFC 8785, section 3.2.3.
func CompareUTF16[Bytes ~[]byte | ~string](x, y Bytes) int {
// NOTE: This is an optimized, mostly allocation-free implementation
// of CompareUTF16Simple in wire_test.go. FuzzCompareUTF16 verifies that the
// two implementations agree on the result of comparing any two strings.
isUTF16Self := func(r rune) bool {
return ('\u0000' <= r && r <= '\uD7FF') || ('\uE000' <= r && r <= '\uFFFF')
}
for {
if len(x) == 0 || len(y) == 0 {
return cmp.Compare(len(x), len(y))
}
// ASCII fast-path.
if x[0] < utf8.RuneSelf || y[0] < utf8.RuneSelf {
if x[0] != y[0] {
return cmp.Compare(x[0], y[0])
}
x, y = x[1:], y[1:]
continue
}
// Decode next pair of runes as UTF-8.
rx, nx := utf8.DecodeRuneInString(string(truncateMaxUTF8(x)))
ry, ny := utf8.DecodeRuneInString(string(truncateMaxUTF8(y)))
selfx := isUTF16Self(rx)
selfy := isUTF16Self(ry)
switch {
// The x rune is a single UTF-16 codepoint, while
// the y rune is a surrogate pair of UTF-16 codepoints.
case selfx && !selfy:
ry, _ = utf16.EncodeRune(ry)
// The y rune is a single UTF-16 codepoint, while
// the x rune is a surrogate pair of UTF-16 codepoints.
case selfy && !selfx:
rx, _ = utf16.EncodeRune(rx)
}
if rx != ry {
return cmp.Compare(rx, ry)
}
// Check for invalid UTF-8, in which case,
// we just perform a byte-for-byte comparison.
if isInvalidUTF8(rx, nx) || isInvalidUTF8(ry, ny) {
if x[0] != y[0] {
return cmp.Compare(x[0], y[0])
}
}
x, y = x[nx:], y[ny:]
}
}
// truncateMaxUTF8 truncates b such it contains at least one rune.
//
// The utf8 package currently lacks generic variants, which complicates
// generic functions that operates on either []byte or string.
// As a hack, we always call the utf8 function operating on strings,
// but always truncate the input such that the result is identical.
//
// Example usage:
//
// utf8.DecodeRuneInString(string(truncateMaxUTF8(b)))
//
// Converting a []byte to a string is stack allocated since
// truncateMaxUTF8 guarantees that the []byte is short.
func truncateMaxUTF8[Bytes ~[]byte | ~string](b Bytes) Bytes {
// TODO(https://go.dev/issue/56948): Remove this function and
// instead directly call generic utf8 functions wherever used.
if len(b) > utf8.UTFMax {
return b[:utf8.UTFMax]
}
return b
}
// TODO(https://go.dev/issue/70547): Use utf8.ErrInvalid instead.
var ErrInvalidUTF8 = errors.New("invalid UTF-8")
func NewInvalidCharacterError[Bytes ~[]byte | ~string](prefix Bytes, where string) error {
what := QuoteRune(prefix)
return errors.New("invalid character " + what + " " + where)
}
func NewInvalidEscapeSequenceError[Bytes ~[]byte | ~string](what Bytes) error {
label := "escape sequence"
if len(what) > 6 {
label = "surrogate pair"
}
needEscape := strings.IndexFunc(string(what), func(r rune) bool {
return r == '`' || r == utf8.RuneError || unicode.IsSpace(r) || !unicode.IsPrint(r)
}) >= 0
if needEscape {
return errors.New("invalid " + label + " " + strconv.Quote(string(what)) + " in string")
} else {
return errors.New("invalid " + label + " `" + string(what) + "` in string")
}
}
// TruncatePointer optionally truncates the JSON pointer,
// enforcing that the length roughly does not exceed n.
func TruncatePointer(s string, n int) string {
if len(s) <= n {
return s
}
i := n / 2
j := len(s) - n/2
// Avoid truncating a name if there are multiple names present.
if k := strings.LastIndexByte(s[:i], '/'); k > 0 {
i = k
}
if k := strings.IndexByte(s[j:], '/'); k >= 0 {
j += k + len("/")
}
// Avoid truncation in the middle of a UTF-8 rune.
for i > 0 && isInvalidUTF8(utf8.DecodeLastRuneInString(s[:i])) {
i--
}
for j < len(s) && isInvalidUTF8(utf8.DecodeRuneInString(s[j:])) {
j++
}
// Determine the right middle fragment to use.
var middle string
switch strings.Count(s[i:j], "/") {
case 0:
middle = "…"
case 1:
middle = "…/…"
default:
middle = "…/…/…"
}
if strings.HasPrefix(s[i:j], "/") && middle != "…" {
middle = strings.TrimPrefix(middle, "…")
}
if strings.HasSuffix(s[i:j], "/") && middle != "…" {
middle = strings.TrimSuffix(middle, "…")
}
return s[:i] + middle + s[j:]
}
func isInvalidUTF8(r rune, rn int) bool {
return r == utf8.RuneError && rn == 1
}