vendor dependencies
This commit is contained in:
627
vendor/github.com/go-json-experiment/json/internal/jsonwire/decode.go
generated
vendored
Normal file
627
vendor/github.com/go-json-experiment/json/internal/jsonwire/decode.go
generated
vendored
Normal file
@@ -0,0 +1,627 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package jsonwire
|
||||
|
||||
import (
|
||||
"io"
|
||||
"math"
|
||||
"slices"
|
||||
"strconv"
|
||||
"unicode/utf16"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type ValueFlags uint
|
||||
|
||||
const (
|
||||
_ ValueFlags = (1 << iota) / 2 // powers of two starting with zero
|
||||
|
||||
stringNonVerbatim // string cannot be naively treated as valid UTF-8
|
||||
stringNonCanonical // string not formatted according to RFC 8785, section 3.2.2.2.
|
||||
// TODO: Track whether a number is a non-integer?
|
||||
)
|
||||
|
||||
func (f *ValueFlags) Join(f2 ValueFlags) { *f |= f2 }
|
||||
func (f ValueFlags) IsVerbatim() bool { return f&stringNonVerbatim == 0 }
|
||||
func (f ValueFlags) IsCanonical() bool { return f&stringNonCanonical == 0 }
|
||||
|
||||
// ConsumeWhitespace consumes leading JSON whitespace per RFC 7159, section 2.
|
||||
func ConsumeWhitespace(b []byte) (n int) {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
for len(b) > n && (b[n] == ' ' || b[n] == '\t' || b[n] == '\r' || b[n] == '\n') {
|
||||
n++
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// ConsumeNull consumes the next JSON null literal per RFC 7159, section 3.
|
||||
// It returns 0 if it is invalid, in which case consumeLiteral should be used.
|
||||
func ConsumeNull(b []byte) int {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
const literal = "null"
|
||||
if len(b) >= len(literal) && string(b[:len(literal)]) == literal {
|
||||
return len(literal)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// ConsumeFalse consumes the next JSON false literal per RFC 7159, section 3.
|
||||
// It returns 0 if it is invalid, in which case consumeLiteral should be used.
|
||||
func ConsumeFalse(b []byte) int {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
const literal = "false"
|
||||
if len(b) >= len(literal) && string(b[:len(literal)]) == literal {
|
||||
return len(literal)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// ConsumeTrue consumes the next JSON true literal per RFC 7159, section 3.
|
||||
// It returns 0 if it is invalid, in which case consumeLiteral should be used.
|
||||
func ConsumeTrue(b []byte) int {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
const literal = "true"
|
||||
if len(b) >= len(literal) && string(b[:len(literal)]) == literal {
|
||||
return len(literal)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// ConsumeLiteral consumes the next JSON literal per RFC 7159, section 3.
|
||||
// If the input appears truncated, it returns io.ErrUnexpectedEOF.
|
||||
func ConsumeLiteral(b []byte, lit string) (n int, err error) {
|
||||
for i := 0; i < len(b) && i < len(lit); i++ {
|
||||
if b[i] != lit[i] {
|
||||
return i, NewInvalidCharacterError(b[i:], "in literal "+lit+" (expecting "+strconv.QuoteRune(rune(lit[i]))+")")
|
||||
}
|
||||
}
|
||||
if len(b) < len(lit) {
|
||||
return len(b), io.ErrUnexpectedEOF
|
||||
}
|
||||
return len(lit), nil
|
||||
}
|
||||
|
||||
// ConsumeSimpleString consumes the next JSON string per RFC 7159, section 7
|
||||
// but is limited to the grammar for an ASCII string without escape sequences.
|
||||
// It returns 0 if it is invalid or more complicated than a simple string,
|
||||
// in which case consumeString should be called.
|
||||
//
|
||||
// It rejects '<', '>', and '&' for compatibility reasons since these were
|
||||
// always escaped in the v1 implementation. Thus, if this function reports
|
||||
// non-zero then we know that the string would be encoded the same way
|
||||
// under both v1 or v2 escape semantics.
|
||||
func ConsumeSimpleString(b []byte) (n int) {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
if len(b) > 0 && b[0] == '"' {
|
||||
n++
|
||||
for len(b) > n && b[n] < utf8.RuneSelf && escapeASCII[b[n]] == 0 {
|
||||
n++
|
||||
}
|
||||
if uint(len(b)) > uint(n) && b[n] == '"' {
|
||||
n++
|
||||
return n
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// ConsumeString consumes the next JSON string per RFC 7159, section 7.
|
||||
// If validateUTF8 is false, then this allows the presence of invalid UTF-8
|
||||
// characters within the string itself.
|
||||
// It reports the number of bytes consumed and whether an error was encountered.
|
||||
// If the input appears truncated, it returns io.ErrUnexpectedEOF.
|
||||
func ConsumeString(flags *ValueFlags, b []byte, validateUTF8 bool) (n int, err error) {
|
||||
return ConsumeStringResumable(flags, b, 0, validateUTF8)
|
||||
}
|
||||
|
||||
// ConsumeStringResumable is identical to consumeString but supports resuming
|
||||
// from a previous call that returned io.ErrUnexpectedEOF.
|
||||
func ConsumeStringResumable(flags *ValueFlags, b []byte, resumeOffset int, validateUTF8 bool) (n int, err error) {
|
||||
// Consume the leading double quote.
|
||||
switch {
|
||||
case resumeOffset > 0:
|
||||
n = resumeOffset // already handled the leading quote
|
||||
case uint(len(b)) == 0:
|
||||
return n, io.ErrUnexpectedEOF
|
||||
case b[0] == '"':
|
||||
n++
|
||||
default:
|
||||
return n, NewInvalidCharacterError(b[n:], `at start of string (expecting '"')`)
|
||||
}
|
||||
|
||||
// Consume every character in the string.
|
||||
for uint(len(b)) > uint(n) {
|
||||
// Optimize for long sequences of unescaped characters.
|
||||
noEscape := func(c byte) bool {
|
||||
return c < utf8.RuneSelf && ' ' <= c && c != '\\' && c != '"'
|
||||
}
|
||||
for uint(len(b)) > uint(n) && noEscape(b[n]) {
|
||||
n++
|
||||
}
|
||||
if uint(len(b)) <= uint(n) {
|
||||
return n, io.ErrUnexpectedEOF
|
||||
}
|
||||
|
||||
// Check for terminating double quote.
|
||||
if b[n] == '"' {
|
||||
n++
|
||||
return n, nil
|
||||
}
|
||||
|
||||
switch r, rn := utf8.DecodeRune(b[n:]); {
|
||||
// Handle UTF-8 encoded byte sequence.
|
||||
// Due to specialized handling of ASCII above, we know that
|
||||
// all normal sequences at this point must be 2 bytes or larger.
|
||||
case rn > 1:
|
||||
n += rn
|
||||
// Handle escape sequence.
|
||||
case r == '\\':
|
||||
flags.Join(stringNonVerbatim)
|
||||
resumeOffset = n
|
||||
if uint(len(b)) < uint(n+2) {
|
||||
return resumeOffset, io.ErrUnexpectedEOF
|
||||
}
|
||||
switch r := b[n+1]; r {
|
||||
case '/':
|
||||
// Forward slash is the only character with 3 representations.
|
||||
// Per RFC 8785, section 3.2.2.2., this must not be escaped.
|
||||
flags.Join(stringNonCanonical)
|
||||
n += 2
|
||||
case '"', '\\', 'b', 'f', 'n', 'r', 't':
|
||||
n += 2
|
||||
case 'u':
|
||||
if uint(len(b)) < uint(n+6) {
|
||||
if hasEscapedUTF16Prefix(b[n:], false) {
|
||||
return resumeOffset, io.ErrUnexpectedEOF
|
||||
}
|
||||
flags.Join(stringNonCanonical)
|
||||
return n, NewInvalidEscapeSequenceError(b[n:])
|
||||
}
|
||||
v1, ok := parseHexUint16(b[n+2 : n+6])
|
||||
if !ok {
|
||||
flags.Join(stringNonCanonical)
|
||||
return n, NewInvalidEscapeSequenceError(b[n : n+6])
|
||||
}
|
||||
// Only certain control characters can use the \uFFFF notation
|
||||
// for canonical formatting (per RFC 8785, section 3.2.2.2.).
|
||||
switch v1 {
|
||||
// \uFFFF notation not permitted for these characters.
|
||||
case '\b', '\f', '\n', '\r', '\t':
|
||||
flags.Join(stringNonCanonical)
|
||||
default:
|
||||
// \uFFFF notation only permitted for control characters.
|
||||
if v1 >= ' ' {
|
||||
flags.Join(stringNonCanonical)
|
||||
} else {
|
||||
// \uFFFF notation must be lower case.
|
||||
for _, c := range b[n+2 : n+6] {
|
||||
if 'A' <= c && c <= 'F' {
|
||||
flags.Join(stringNonCanonical)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
n += 6
|
||||
|
||||
r := rune(v1)
|
||||
if validateUTF8 && utf16.IsSurrogate(r) {
|
||||
if uint(len(b)) < uint(n+6) {
|
||||
if hasEscapedUTF16Prefix(b[n:], true) {
|
||||
return resumeOffset, io.ErrUnexpectedEOF
|
||||
}
|
||||
flags.Join(stringNonCanonical)
|
||||
return n - 6, NewInvalidEscapeSequenceError(b[n-6:])
|
||||
} else if v2, ok := parseHexUint16(b[n+2 : n+6]); b[n] != '\\' || b[n+1] != 'u' || !ok {
|
||||
flags.Join(stringNonCanonical)
|
||||
return n - 6, NewInvalidEscapeSequenceError(b[n-6 : n+6])
|
||||
} else if r = utf16.DecodeRune(rune(v1), rune(v2)); r == utf8.RuneError {
|
||||
flags.Join(stringNonCanonical)
|
||||
return n - 6, NewInvalidEscapeSequenceError(b[n-6 : n+6])
|
||||
} else {
|
||||
n += 6
|
||||
}
|
||||
}
|
||||
default:
|
||||
flags.Join(stringNonCanonical)
|
||||
return n, NewInvalidEscapeSequenceError(b[n : n+2])
|
||||
}
|
||||
// Handle invalid UTF-8.
|
||||
case r == utf8.RuneError:
|
||||
if !utf8.FullRune(b[n:]) {
|
||||
return n, io.ErrUnexpectedEOF
|
||||
}
|
||||
flags.Join(stringNonVerbatim | stringNonCanonical)
|
||||
if validateUTF8 {
|
||||
return n, ErrInvalidUTF8
|
||||
}
|
||||
n++
|
||||
// Handle invalid control characters.
|
||||
case r < ' ':
|
||||
flags.Join(stringNonVerbatim | stringNonCanonical)
|
||||
return n, NewInvalidCharacterError(b[n:], "in string (expecting non-control character)")
|
||||
default:
|
||||
panic("BUG: unhandled character " + QuoteRune(b[n:]))
|
||||
}
|
||||
}
|
||||
return n, io.ErrUnexpectedEOF
|
||||
}
|
||||
|
||||
// AppendUnquote appends the unescaped form of a JSON string in src to dst.
|
||||
// Any invalid UTF-8 within the string will be replaced with utf8.RuneError,
|
||||
// but the error will be specified as having encountered such an error.
|
||||
// The input must be an entire JSON string with no surrounding whitespace.
|
||||
func AppendUnquote[Bytes ~[]byte | ~string](dst []byte, src Bytes) (v []byte, err error) {
|
||||
dst = slices.Grow(dst, len(src))
|
||||
|
||||
// Consume the leading double quote.
|
||||
var i, n int
|
||||
switch {
|
||||
case uint(len(src)) == 0:
|
||||
return dst, io.ErrUnexpectedEOF
|
||||
case src[0] == '"':
|
||||
i, n = 1, 1
|
||||
default:
|
||||
return dst, NewInvalidCharacterError(src, `at start of string (expecting '"')`)
|
||||
}
|
||||
|
||||
// Consume every character in the string.
|
||||
for uint(len(src)) > uint(n) {
|
||||
// Optimize for long sequences of unescaped characters.
|
||||
noEscape := func(c byte) bool {
|
||||
return c < utf8.RuneSelf && ' ' <= c && c != '\\' && c != '"'
|
||||
}
|
||||
for uint(len(src)) > uint(n) && noEscape(src[n]) {
|
||||
n++
|
||||
}
|
||||
if uint(len(src)) <= uint(n) {
|
||||
dst = append(dst, src[i:n]...)
|
||||
return dst, io.ErrUnexpectedEOF
|
||||
}
|
||||
|
||||
// Check for terminating double quote.
|
||||
if src[n] == '"' {
|
||||
dst = append(dst, src[i:n]...)
|
||||
n++
|
||||
if n < len(src) {
|
||||
err = NewInvalidCharacterError(src[n:], "after string value")
|
||||
}
|
||||
return dst, err
|
||||
}
|
||||
|
||||
switch r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:]))); {
|
||||
// Handle UTF-8 encoded byte sequence.
|
||||
// Due to specialized handling of ASCII above, we know that
|
||||
// all normal sequences at this point must be 2 bytes or larger.
|
||||
case rn > 1:
|
||||
n += rn
|
||||
// Handle escape sequence.
|
||||
case r == '\\':
|
||||
dst = append(dst, src[i:n]...)
|
||||
|
||||
// Handle escape sequence.
|
||||
if uint(len(src)) < uint(n+2) {
|
||||
return dst, io.ErrUnexpectedEOF
|
||||
}
|
||||
switch r := src[n+1]; r {
|
||||
case '"', '\\', '/':
|
||||
dst = append(dst, r)
|
||||
n += 2
|
||||
case 'b':
|
||||
dst = append(dst, '\b')
|
||||
n += 2
|
||||
case 'f':
|
||||
dst = append(dst, '\f')
|
||||
n += 2
|
||||
case 'n':
|
||||
dst = append(dst, '\n')
|
||||
n += 2
|
||||
case 'r':
|
||||
dst = append(dst, '\r')
|
||||
n += 2
|
||||
case 't':
|
||||
dst = append(dst, '\t')
|
||||
n += 2
|
||||
case 'u':
|
||||
if uint(len(src)) < uint(n+6) {
|
||||
if hasEscapedUTF16Prefix(src[n:], false) {
|
||||
return dst, io.ErrUnexpectedEOF
|
||||
}
|
||||
return dst, NewInvalidEscapeSequenceError(src[n:])
|
||||
}
|
||||
v1, ok := parseHexUint16(src[n+2 : n+6])
|
||||
if !ok {
|
||||
return dst, NewInvalidEscapeSequenceError(src[n : n+6])
|
||||
}
|
||||
n += 6
|
||||
|
||||
// Check whether this is a surrogate half.
|
||||
r := rune(v1)
|
||||
if utf16.IsSurrogate(r) {
|
||||
r = utf8.RuneError // assume failure unless the following succeeds
|
||||
if uint(len(src)) < uint(n+6) {
|
||||
if hasEscapedUTF16Prefix(src[n:], true) {
|
||||
return utf8.AppendRune(dst, r), io.ErrUnexpectedEOF
|
||||
}
|
||||
err = NewInvalidEscapeSequenceError(src[n-6:])
|
||||
} else if v2, ok := parseHexUint16(src[n+2 : n+6]); src[n] != '\\' || src[n+1] != 'u' || !ok {
|
||||
err = NewInvalidEscapeSequenceError(src[n-6 : n+6])
|
||||
} else if r = utf16.DecodeRune(rune(v1), rune(v2)); r == utf8.RuneError {
|
||||
err = NewInvalidEscapeSequenceError(src[n-6 : n+6])
|
||||
} else {
|
||||
n += 6
|
||||
}
|
||||
}
|
||||
|
||||
dst = utf8.AppendRune(dst, r)
|
||||
default:
|
||||
return dst, NewInvalidEscapeSequenceError(src[n : n+2])
|
||||
}
|
||||
i = n
|
||||
// Handle invalid UTF-8.
|
||||
case r == utf8.RuneError:
|
||||
dst = append(dst, src[i:n]...)
|
||||
if !utf8.FullRuneInString(string(truncateMaxUTF8(src[n:]))) {
|
||||
return dst, io.ErrUnexpectedEOF
|
||||
}
|
||||
// NOTE: An unescaped string may be longer than the escaped string
|
||||
// because invalid UTF-8 bytes are being replaced.
|
||||
dst = append(dst, "\uFFFD"...)
|
||||
n += rn
|
||||
i = n
|
||||
err = ErrInvalidUTF8
|
||||
// Handle invalid control characters.
|
||||
case r < ' ':
|
||||
dst = append(dst, src[i:n]...)
|
||||
return dst, NewInvalidCharacterError(src[n:], "in string (expecting non-control character)")
|
||||
default:
|
||||
panic("BUG: unhandled character " + QuoteRune(src[n:]))
|
||||
}
|
||||
}
|
||||
dst = append(dst, src[i:n]...)
|
||||
return dst, io.ErrUnexpectedEOF
|
||||
}
|
||||
|
||||
// hasEscapedUTF16Prefix reports whether b is possibly
|
||||
// the truncated prefix of a \uFFFF escape sequence.
|
||||
func hasEscapedUTF16Prefix[Bytes ~[]byte | ~string](b Bytes, lowerSurrogateHalf bool) bool {
|
||||
for i := range len(b) {
|
||||
switch c := b[i]; {
|
||||
case i == 0 && c != '\\':
|
||||
return false
|
||||
case i == 1 && c != 'u':
|
||||
return false
|
||||
case i == 2 && lowerSurrogateHalf && c != 'd' && c != 'D':
|
||||
return false // not within ['\uDC00':'\uDFFF']
|
||||
case i == 3 && lowerSurrogateHalf && !('c' <= c && c <= 'f') && !('C' <= c && c <= 'F'):
|
||||
return false // not within ['\uDC00':'\uDFFF']
|
||||
case i >= 2 && i < 6 && !('0' <= c && c <= '9') && !('a' <= c && c <= 'f') && !('A' <= c && c <= 'F'):
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// UnquoteMayCopy returns the unescaped form of b.
|
||||
// If there are no escaped characters, the output is simply a subslice of
|
||||
// the input with the surrounding quotes removed.
|
||||
// Otherwise, a new buffer is allocated for the output.
|
||||
// It assumes the input is valid.
|
||||
func UnquoteMayCopy(b []byte, isVerbatim bool) []byte {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
if isVerbatim {
|
||||
return b[len(`"`) : len(b)-len(`"`)]
|
||||
}
|
||||
b, _ = AppendUnquote(nil, b)
|
||||
return b
|
||||
}
|
||||
|
||||
// ConsumeSimpleNumber consumes the next JSON number per RFC 7159, section 6
|
||||
// but is limited to the grammar for a positive integer.
|
||||
// It returns 0 if it is invalid or more complicated than a simple integer,
|
||||
// in which case consumeNumber should be called.
|
||||
func ConsumeSimpleNumber(b []byte) (n int) {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
if len(b) > 0 {
|
||||
if b[0] == '0' {
|
||||
n++
|
||||
} else if '1' <= b[0] && b[0] <= '9' {
|
||||
n++
|
||||
for len(b) > n && ('0' <= b[n] && b[n] <= '9') {
|
||||
n++
|
||||
}
|
||||
} else {
|
||||
return 0
|
||||
}
|
||||
if uint(len(b)) <= uint(n) || (b[n] != '.' && b[n] != 'e' && b[n] != 'E') {
|
||||
return n
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
type ConsumeNumberState uint
|
||||
|
||||
const (
|
||||
consumeNumberInit ConsumeNumberState = iota
|
||||
beforeIntegerDigits
|
||||
withinIntegerDigits
|
||||
beforeFractionalDigits
|
||||
withinFractionalDigits
|
||||
beforeExponentDigits
|
||||
withinExponentDigits
|
||||
)
|
||||
|
||||
// ConsumeNumber consumes the next JSON number per RFC 7159, section 6.
|
||||
// It reports the number of bytes consumed and whether an error was encountered.
|
||||
// If the input appears truncated, it returns io.ErrUnexpectedEOF.
|
||||
//
|
||||
// Note that JSON numbers are not self-terminating.
|
||||
// If the entire input is consumed, then the caller needs to consider whether
|
||||
// there may be subsequent unread data that may still be part of this number.
|
||||
func ConsumeNumber(b []byte) (n int, err error) {
|
||||
n, _, err = ConsumeNumberResumable(b, 0, consumeNumberInit)
|
||||
return n, err
|
||||
}
|
||||
|
||||
// ConsumeNumberResumable is identical to consumeNumber but supports resuming
|
||||
// from a previous call that returned io.ErrUnexpectedEOF.
|
||||
func ConsumeNumberResumable(b []byte, resumeOffset int, state ConsumeNumberState) (n int, _ ConsumeNumberState, err error) {
|
||||
// Jump to the right state when resuming from a partial consumption.
|
||||
n = resumeOffset
|
||||
if state > consumeNumberInit {
|
||||
switch state {
|
||||
case withinIntegerDigits, withinFractionalDigits, withinExponentDigits:
|
||||
// Consume leading digits.
|
||||
for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
|
||||
n++
|
||||
}
|
||||
if uint(len(b)) <= uint(n) {
|
||||
return n, state, nil // still within the same state
|
||||
}
|
||||
state++ // switches "withinX" to "beforeY" where Y is the state after X
|
||||
}
|
||||
switch state {
|
||||
case beforeIntegerDigits:
|
||||
goto beforeInteger
|
||||
case beforeFractionalDigits:
|
||||
goto beforeFractional
|
||||
case beforeExponentDigits:
|
||||
goto beforeExponent
|
||||
default:
|
||||
return n, state, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Consume required integer component (with optional minus sign).
|
||||
beforeInteger:
|
||||
resumeOffset = n
|
||||
if uint(len(b)) > 0 && b[0] == '-' {
|
||||
n++
|
||||
}
|
||||
switch {
|
||||
case uint(len(b)) <= uint(n):
|
||||
return resumeOffset, beforeIntegerDigits, io.ErrUnexpectedEOF
|
||||
case b[n] == '0':
|
||||
n++
|
||||
state = beforeFractionalDigits
|
||||
case '1' <= b[n] && b[n] <= '9':
|
||||
n++
|
||||
for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
|
||||
n++
|
||||
}
|
||||
state = withinIntegerDigits
|
||||
default:
|
||||
return n, state, NewInvalidCharacterError(b[n:], "in number (expecting digit)")
|
||||
}
|
||||
|
||||
// Consume optional fractional component.
|
||||
beforeFractional:
|
||||
if uint(len(b)) > uint(n) && b[n] == '.' {
|
||||
resumeOffset = n
|
||||
n++
|
||||
switch {
|
||||
case uint(len(b)) <= uint(n):
|
||||
return resumeOffset, beforeFractionalDigits, io.ErrUnexpectedEOF
|
||||
case '0' <= b[n] && b[n] <= '9':
|
||||
n++
|
||||
default:
|
||||
return n, state, NewInvalidCharacterError(b[n:], "in number (expecting digit)")
|
||||
}
|
||||
for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
|
||||
n++
|
||||
}
|
||||
state = withinFractionalDigits
|
||||
}
|
||||
|
||||
// Consume optional exponent component.
|
||||
beforeExponent:
|
||||
if uint(len(b)) > uint(n) && (b[n] == 'e' || b[n] == 'E') {
|
||||
resumeOffset = n
|
||||
n++
|
||||
if uint(len(b)) > uint(n) && (b[n] == '-' || b[n] == '+') {
|
||||
n++
|
||||
}
|
||||
switch {
|
||||
case uint(len(b)) <= uint(n):
|
||||
return resumeOffset, beforeExponentDigits, io.ErrUnexpectedEOF
|
||||
case '0' <= b[n] && b[n] <= '9':
|
||||
n++
|
||||
default:
|
||||
return n, state, NewInvalidCharacterError(b[n:], "in number (expecting digit)")
|
||||
}
|
||||
for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
|
||||
n++
|
||||
}
|
||||
state = withinExponentDigits
|
||||
}
|
||||
|
||||
return n, state, nil
|
||||
}
|
||||
|
||||
// parseHexUint16 is similar to strconv.ParseUint,
|
||||
// but operates directly on []byte and is optimized for base-16.
|
||||
// See https://go.dev/issue/42429.
|
||||
func parseHexUint16[Bytes ~[]byte | ~string](b Bytes) (v uint16, ok bool) {
|
||||
if len(b) != 4 {
|
||||
return 0, false
|
||||
}
|
||||
for i := range 4 {
|
||||
c := b[i]
|
||||
switch {
|
||||
case '0' <= c && c <= '9':
|
||||
c = c - '0'
|
||||
case 'a' <= c && c <= 'f':
|
||||
c = 10 + c - 'a'
|
||||
case 'A' <= c && c <= 'F':
|
||||
c = 10 + c - 'A'
|
||||
default:
|
||||
return 0, false
|
||||
}
|
||||
v = v*16 + uint16(c)
|
||||
}
|
||||
return v, true
|
||||
}
|
||||
|
||||
// ParseUint parses b as a decimal unsigned integer according to
|
||||
// a strict subset of the JSON number grammar, returning the value if valid.
|
||||
// It returns (0, false) if there is a syntax error and
|
||||
// returns (math.MaxUint64, false) if there is an overflow.
|
||||
func ParseUint(b []byte) (v uint64, ok bool) {
|
||||
const unsafeWidth = 20 // len(fmt.Sprint(uint64(math.MaxUint64)))
|
||||
var n int
|
||||
for ; len(b) > n && ('0' <= b[n] && b[n] <= '9'); n++ {
|
||||
v = 10*v + uint64(b[n]-'0')
|
||||
}
|
||||
switch {
|
||||
case n == 0 || len(b) != n || (b[0] == '0' && string(b) != "0"):
|
||||
return 0, false
|
||||
case n >= unsafeWidth && (b[0] != '1' || v < 1e19 || n > unsafeWidth):
|
||||
return math.MaxUint64, false
|
||||
}
|
||||
return v, true
|
||||
}
|
||||
|
||||
// ParseFloat parses a floating point number according to the Go float grammar.
|
||||
// Note that the JSON number grammar is a strict subset.
|
||||
//
|
||||
// If the number overflows the finite representation of a float,
|
||||
// then we return MaxFloat since any finite value will always be infinitely
|
||||
// more accurate at representing another finite value than an infinite value.
|
||||
func ParseFloat(b []byte, bits int) (v float64, ok bool) {
|
||||
fv, err := strconv.ParseFloat(string(b), bits)
|
||||
if math.IsInf(fv, 0) {
|
||||
switch {
|
||||
case bits == 32 && math.IsInf(fv, +1):
|
||||
fv = +math.MaxFloat32
|
||||
case bits == 64 && math.IsInf(fv, +1):
|
||||
fv = +math.MaxFloat64
|
||||
case bits == 32 && math.IsInf(fv, -1):
|
||||
fv = -math.MaxFloat32
|
||||
case bits == 64 && math.IsInf(fv, -1):
|
||||
fv = -math.MaxFloat64
|
||||
}
|
||||
}
|
||||
return fv, err == nil
|
||||
}
|
292
vendor/github.com/go-json-experiment/json/internal/jsonwire/encode.go
generated
vendored
Normal file
292
vendor/github.com/go-json-experiment/json/internal/jsonwire/encode.go
generated
vendored
Normal file
@@ -0,0 +1,292 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package jsonwire
|
||||
|
||||
import (
|
||||
"math"
|
||||
"slices"
|
||||
"strconv"
|
||||
"unicode/utf16"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/go-json-experiment/json/internal/jsonflags"
|
||||
)
|
||||
|
||||
// escapeASCII reports whether the ASCII character needs to be escaped.
|
||||
// It conservatively assumes EscapeForHTML.
|
||||
var escapeASCII = [...]uint8{
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // escape control characters
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // escape control characters
|
||||
0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // escape '"' and '&'
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, // escape '<' and '>'
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // escape '\\'
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
}
|
||||
|
||||
// NeedEscape reports whether src needs escaping of any characters.
|
||||
// It conservatively assumes EscapeForHTML and EscapeForJS.
|
||||
// It reports true for inputs with invalid UTF-8.
|
||||
func NeedEscape[Bytes ~[]byte | ~string](src Bytes) bool {
|
||||
var i int
|
||||
for uint(len(src)) > uint(i) {
|
||||
if c := src[i]; c < utf8.RuneSelf {
|
||||
if escapeASCII[c] > 0 {
|
||||
return true
|
||||
}
|
||||
i++
|
||||
} else {
|
||||
r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[i:])))
|
||||
if r == utf8.RuneError || r == '\u2028' || r == '\u2029' {
|
||||
return true
|
||||
}
|
||||
i += rn
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// AppendQuote appends src to dst as a JSON string per RFC 7159, section 7.
|
||||
//
|
||||
// It takes in flags and respects the following:
|
||||
// - EscapeForHTML escapes '<', '>', and '&'.
|
||||
// - EscapeForJS escapes '\u2028' and '\u2029'.
|
||||
// - AllowInvalidUTF8 avoids reporting an error for invalid UTF-8.
|
||||
//
|
||||
// Regardless of whether AllowInvalidUTF8 is specified,
|
||||
// invalid bytes are replaced with the Unicode replacement character ('\ufffd').
|
||||
// If no escape flags are set, then the shortest representable form is used,
|
||||
// which is also the canonical form for strings (RFC 8785, section 3.2.2.2).
|
||||
func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes, flags *jsonflags.Flags) ([]byte, error) {
|
||||
var i, n int
|
||||
var hasInvalidUTF8 bool
|
||||
dst = slices.Grow(dst, len(`"`)+len(src)+len(`"`))
|
||||
dst = append(dst, '"')
|
||||
for uint(len(src)) > uint(n) {
|
||||
if c := src[n]; c < utf8.RuneSelf {
|
||||
// Handle single-byte ASCII.
|
||||
n++
|
||||
if escapeASCII[c] == 0 {
|
||||
continue // no escaping possibly needed
|
||||
}
|
||||
// Handle escaping of single-byte ASCII.
|
||||
if !(c == '<' || c == '>' || c == '&') || flags.Get(jsonflags.EscapeForHTML) {
|
||||
dst = append(dst, src[i:n-1]...)
|
||||
dst = appendEscapedASCII(dst, c)
|
||||
i = n
|
||||
}
|
||||
} else {
|
||||
// Handle multi-byte Unicode.
|
||||
r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:])))
|
||||
n += rn
|
||||
if r != utf8.RuneError && r != '\u2028' && r != '\u2029' {
|
||||
continue // no escaping possibly needed
|
||||
}
|
||||
// Handle escaping of multi-byte Unicode.
|
||||
switch {
|
||||
case isInvalidUTF8(r, rn):
|
||||
hasInvalidUTF8 = true
|
||||
dst = append(dst, src[i:n-rn]...)
|
||||
if flags.Get(jsonflags.EscapeInvalidUTF8) {
|
||||
dst = append(dst, `\ufffd`...)
|
||||
} else {
|
||||
dst = append(dst, "\ufffd"...)
|
||||
}
|
||||
i = n
|
||||
case (r == '\u2028' || r == '\u2029') && flags.Get(jsonflags.EscapeForJS):
|
||||
dst = append(dst, src[i:n-rn]...)
|
||||
dst = appendEscapedUnicode(dst, r)
|
||||
i = n
|
||||
}
|
||||
}
|
||||
}
|
||||
dst = append(dst, src[i:n]...)
|
||||
dst = append(dst, '"')
|
||||
if hasInvalidUTF8 && !flags.Get(jsonflags.AllowInvalidUTF8) {
|
||||
return dst, ErrInvalidUTF8
|
||||
}
|
||||
return dst, nil
|
||||
}
|
||||
|
||||
func appendEscapedASCII(dst []byte, c byte) []byte {
|
||||
switch c {
|
||||
case '"', '\\':
|
||||
dst = append(dst, '\\', c)
|
||||
case '\b':
|
||||
dst = append(dst, "\\b"...)
|
||||
case '\f':
|
||||
dst = append(dst, "\\f"...)
|
||||
case '\n':
|
||||
dst = append(dst, "\\n"...)
|
||||
case '\r':
|
||||
dst = append(dst, "\\r"...)
|
||||
case '\t':
|
||||
dst = append(dst, "\\t"...)
|
||||
default:
|
||||
dst = appendEscapedUTF16(dst, uint16(c))
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
func appendEscapedUnicode(dst []byte, r rune) []byte {
|
||||
if r1, r2 := utf16.EncodeRune(r); r1 != '\ufffd' && r2 != '\ufffd' {
|
||||
dst = appendEscapedUTF16(dst, uint16(r1))
|
||||
dst = appendEscapedUTF16(dst, uint16(r2))
|
||||
} else {
|
||||
dst = appendEscapedUTF16(dst, uint16(r))
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
func appendEscapedUTF16(dst []byte, x uint16) []byte {
|
||||
const hex = "0123456789abcdef"
|
||||
return append(dst, '\\', 'u', hex[(x>>12)&0xf], hex[(x>>8)&0xf], hex[(x>>4)&0xf], hex[(x>>0)&0xf])
|
||||
}
|
||||
|
||||
// ReformatString consumes a JSON string from src and appends it to dst,
|
||||
// reformatting it if necessary according to the specified flags.
|
||||
// It returns the appended output and the number of consumed input bytes.
|
||||
func ReformatString(dst, src []byte, flags *jsonflags.Flags) ([]byte, int, error) {
|
||||
// TODO: Should this update ValueFlags as input?
|
||||
var valFlags ValueFlags
|
||||
n, err := ConsumeString(&valFlags, src, !flags.Get(jsonflags.AllowInvalidUTF8))
|
||||
if err != nil {
|
||||
return dst, n, err
|
||||
}
|
||||
|
||||
// If the output requires no special escapes, and the input
|
||||
// is already in canonical form or should be preserved verbatim,
|
||||
// then directly copy the input to the output.
|
||||
if !flags.Get(jsonflags.AnyEscape) &&
|
||||
(valFlags.IsCanonical() || flags.Get(jsonflags.PreserveRawStrings)) {
|
||||
dst = append(dst, src[:n]...) // copy the string verbatim
|
||||
return dst, n, nil
|
||||
}
|
||||
|
||||
// Under [jsonflags.PreserveRawStrings], any pre-escaped sequences
|
||||
// remain escaped, however we still need to respect the
|
||||
// [jsonflags.EscapeForHTML] and [jsonflags.EscapeForJS] options.
|
||||
if flags.Get(jsonflags.PreserveRawStrings) {
|
||||
var i, lastAppendIndex int
|
||||
for i < n {
|
||||
if c := src[i]; c < utf8.RuneSelf {
|
||||
if (c == '<' || c == '>' || c == '&') && flags.Get(jsonflags.EscapeForHTML) {
|
||||
dst = append(dst, src[lastAppendIndex:i]...)
|
||||
dst = appendEscapedASCII(dst, c)
|
||||
lastAppendIndex = i + 1
|
||||
}
|
||||
i++
|
||||
} else {
|
||||
r, rn := utf8.DecodeRune(truncateMaxUTF8(src[i:]))
|
||||
if (r == '\u2028' || r == '\u2029') && flags.Get(jsonflags.EscapeForJS) {
|
||||
dst = append(dst, src[lastAppendIndex:i]...)
|
||||
dst = appendEscapedUnicode(dst, r)
|
||||
lastAppendIndex = i + rn
|
||||
}
|
||||
i += rn
|
||||
}
|
||||
}
|
||||
return append(dst, src[lastAppendIndex:n]...), n, nil
|
||||
}
|
||||
|
||||
// The input contains characters that might need escaping,
|
||||
// unnecessary escape sequences, or invalid UTF-8.
|
||||
// Perform a round-trip unquote and quote to properly reformat
|
||||
// these sequences according the current flags.
|
||||
b, _ := AppendUnquote(nil, src[:n])
|
||||
dst, _ = AppendQuote(dst, b, flags)
|
||||
return dst, n, nil
|
||||
}
|
||||
|
||||
// AppendFloat appends src to dst as a JSON number per RFC 7159, section 6.
|
||||
// It formats numbers similar to the ES6 number-to-string conversion.
|
||||
// See https://go.dev/issue/14135.
|
||||
//
|
||||
// The output is identical to ECMA-262, 6th edition, section 7.1.12.1 and with
|
||||
// RFC 8785, section 3.2.2.3 for 64-bit floating-point numbers except for -0,
|
||||
// which is formatted as -0 instead of just 0.
|
||||
//
|
||||
// For 32-bit floating-point numbers,
|
||||
// the output is a 32-bit equivalent of the algorithm.
|
||||
// Note that ECMA-262 specifies no algorithm for 32-bit numbers.
|
||||
func AppendFloat(dst []byte, src float64, bits int) []byte {
|
||||
if bits == 32 {
|
||||
src = float64(float32(src))
|
||||
}
|
||||
|
||||
abs := math.Abs(src)
|
||||
fmt := byte('f')
|
||||
if abs != 0 {
|
||||
if bits == 64 && (float64(abs) < 1e-6 || float64(abs) >= 1e21) ||
|
||||
bits == 32 && (float32(abs) < 1e-6 || float32(abs) >= 1e21) {
|
||||
fmt = 'e'
|
||||
}
|
||||
}
|
||||
dst = strconv.AppendFloat(dst, src, fmt, -1, bits)
|
||||
if fmt == 'e' {
|
||||
// Clean up e-09 to e-9.
|
||||
n := len(dst)
|
||||
if n >= 4 && dst[n-4] == 'e' && dst[n-3] == '-' && dst[n-2] == '0' {
|
||||
dst[n-2] = dst[n-1]
|
||||
dst = dst[:n-1]
|
||||
}
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
// ReformatNumber consumes a JSON string from src and appends it to dst,
|
||||
// canonicalizing it if specified.
|
||||
// It returns the appended output and the number of consumed input bytes.
|
||||
func ReformatNumber(dst, src []byte, flags *jsonflags.Flags) ([]byte, int, error) {
|
||||
n, err := ConsumeNumber(src)
|
||||
if err != nil {
|
||||
return dst, n, err
|
||||
}
|
||||
if !flags.Get(jsonflags.CanonicalizeNumbers) {
|
||||
dst = append(dst, src[:n]...) // copy the number verbatim
|
||||
return dst, n, nil
|
||||
}
|
||||
|
||||
// Identify the kind of number.
|
||||
var isFloat bool
|
||||
for _, c := range src[:n] {
|
||||
if c == '.' || c == 'e' || c == 'E' {
|
||||
isFloat = true // has fraction or exponent
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Check if need to canonicalize this kind of number.
|
||||
switch {
|
||||
case string(src[:n]) == "-0":
|
||||
break // canonicalize -0 as 0 regardless of kind
|
||||
case isFloat:
|
||||
if !flags.Get(jsonflags.CanonicalizeRawFloats) {
|
||||
dst = append(dst, src[:n]...) // copy the number verbatim
|
||||
return dst, n, nil
|
||||
}
|
||||
default:
|
||||
// As an optimization, we can copy integer numbers below 2⁵³ verbatim
|
||||
// since the canonical form is always identical.
|
||||
const maxExactIntegerDigits = 16 // len(strconv.AppendUint(nil, 1<<53, 10))
|
||||
if !flags.Get(jsonflags.CanonicalizeRawInts) || n < maxExactIntegerDigits {
|
||||
dst = append(dst, src[:n]...) // copy the number verbatim
|
||||
return dst, n, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Parse and reformat the number (which uses a canonical format).
|
||||
fv, _ := strconv.ParseFloat(string(src[:n]), 64)
|
||||
switch {
|
||||
case fv == 0:
|
||||
fv = 0 // normalize negative zero as just zero
|
||||
case math.IsInf(fv, +1):
|
||||
fv = +math.MaxFloat64
|
||||
case math.IsInf(fv, -1):
|
||||
fv = -math.MaxFloat64
|
||||
}
|
||||
return AppendFloat(dst, fv, 64), n, nil
|
||||
}
|
215
vendor/github.com/go-json-experiment/json/internal/jsonwire/wire.go
generated
vendored
Normal file
215
vendor/github.com/go-json-experiment/json/internal/jsonwire/wire.go
generated
vendored
Normal file
@@ -0,0 +1,215 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package jsonwire implements stateless functionality for handling JSON text.
|
||||
package jsonwire
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"errors"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf16"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// TrimSuffixWhitespace trims JSON from the end of b.
|
||||
func TrimSuffixWhitespace(b []byte) []byte {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
n := len(b) - 1
|
||||
for n >= 0 && (b[n] == ' ' || b[n] == '\t' || b[n] == '\r' || b[n] == '\n') {
|
||||
n--
|
||||
}
|
||||
return b[:n+1]
|
||||
}
|
||||
|
||||
// TrimSuffixString trims a valid JSON string at the end of b.
|
||||
// The behavior is undefined if there is not a valid JSON string present.
|
||||
func TrimSuffixString(b []byte) []byte {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
if len(b) > 0 && b[len(b)-1] == '"' {
|
||||
b = b[:len(b)-1]
|
||||
}
|
||||
for len(b) >= 2 && !(b[len(b)-1] == '"' && b[len(b)-2] != '\\') {
|
||||
b = b[:len(b)-1] // trim all characters except an unescaped quote
|
||||
}
|
||||
if len(b) > 0 && b[len(b)-1] == '"' {
|
||||
b = b[:len(b)-1]
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// HasSuffixByte reports whether b ends with c.
|
||||
func HasSuffixByte(b []byte, c byte) bool {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
return len(b) > 0 && b[len(b)-1] == c
|
||||
}
|
||||
|
||||
// TrimSuffixByte removes c from the end of b if it is present.
|
||||
func TrimSuffixByte(b []byte, c byte) []byte {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
if len(b) > 0 && b[len(b)-1] == c {
|
||||
return b[:len(b)-1]
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// QuoteRune quotes the first rune in the input.
|
||||
func QuoteRune[Bytes ~[]byte | ~string](b Bytes) string {
|
||||
r, n := utf8.DecodeRuneInString(string(truncateMaxUTF8(b)))
|
||||
if r == utf8.RuneError && n == 1 {
|
||||
return `'\x` + strconv.FormatUint(uint64(b[0]), 16) + `'`
|
||||
}
|
||||
return strconv.QuoteRune(r)
|
||||
}
|
||||
|
||||
// CompareUTF16 lexicographically compares x to y according
|
||||
// to the UTF-16 codepoints of the UTF-8 encoded input strings.
|
||||
// This implements the ordering specified in RFC 8785, section 3.2.3.
|
||||
func CompareUTF16[Bytes ~[]byte | ~string](x, y Bytes) int {
|
||||
// NOTE: This is an optimized, mostly allocation-free implementation
|
||||
// of CompareUTF16Simple in wire_test.go. FuzzCompareUTF16 verifies that the
|
||||
// two implementations agree on the result of comparing any two strings.
|
||||
isUTF16Self := func(r rune) bool {
|
||||
return ('\u0000' <= r && r <= '\uD7FF') || ('\uE000' <= r && r <= '\uFFFF')
|
||||
}
|
||||
|
||||
for {
|
||||
if len(x) == 0 || len(y) == 0 {
|
||||
return cmp.Compare(len(x), len(y))
|
||||
}
|
||||
|
||||
// ASCII fast-path.
|
||||
if x[0] < utf8.RuneSelf || y[0] < utf8.RuneSelf {
|
||||
if x[0] != y[0] {
|
||||
return cmp.Compare(x[0], y[0])
|
||||
}
|
||||
x, y = x[1:], y[1:]
|
||||
continue
|
||||
}
|
||||
|
||||
// Decode next pair of runes as UTF-8.
|
||||
rx, nx := utf8.DecodeRuneInString(string(truncateMaxUTF8(x)))
|
||||
ry, ny := utf8.DecodeRuneInString(string(truncateMaxUTF8(y)))
|
||||
|
||||
selfx := isUTF16Self(rx)
|
||||
selfy := isUTF16Self(ry)
|
||||
switch {
|
||||
// The x rune is a single UTF-16 codepoint, while
|
||||
// the y rune is a surrogate pair of UTF-16 codepoints.
|
||||
case selfx && !selfy:
|
||||
ry, _ = utf16.EncodeRune(ry)
|
||||
// The y rune is a single UTF-16 codepoint, while
|
||||
// the x rune is a surrogate pair of UTF-16 codepoints.
|
||||
case selfy && !selfx:
|
||||
rx, _ = utf16.EncodeRune(rx)
|
||||
}
|
||||
if rx != ry {
|
||||
return cmp.Compare(rx, ry)
|
||||
}
|
||||
|
||||
// Check for invalid UTF-8, in which case,
|
||||
// we just perform a byte-for-byte comparison.
|
||||
if isInvalidUTF8(rx, nx) || isInvalidUTF8(ry, ny) {
|
||||
if x[0] != y[0] {
|
||||
return cmp.Compare(x[0], y[0])
|
||||
}
|
||||
}
|
||||
x, y = x[nx:], y[ny:]
|
||||
}
|
||||
}
|
||||
|
||||
// truncateMaxUTF8 truncates b such it contains at least one rune.
|
||||
//
|
||||
// The utf8 package currently lacks generic variants, which complicates
|
||||
// generic functions that operates on either []byte or string.
|
||||
// As a hack, we always call the utf8 function operating on strings,
|
||||
// but always truncate the input such that the result is identical.
|
||||
//
|
||||
// Example usage:
|
||||
//
|
||||
// utf8.DecodeRuneInString(string(truncateMaxUTF8(b)))
|
||||
//
|
||||
// Converting a []byte to a string is stack allocated since
|
||||
// truncateMaxUTF8 guarantees that the []byte is short.
|
||||
func truncateMaxUTF8[Bytes ~[]byte | ~string](b Bytes) Bytes {
|
||||
// TODO(https://go.dev/issue/56948): Remove this function and
|
||||
// instead directly call generic utf8 functions wherever used.
|
||||
if len(b) > utf8.UTFMax {
|
||||
return b[:utf8.UTFMax]
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// TODO(https://go.dev/issue/70547): Use utf8.ErrInvalid instead.
|
||||
var ErrInvalidUTF8 = errors.New("invalid UTF-8")
|
||||
|
||||
func NewInvalidCharacterError[Bytes ~[]byte | ~string](prefix Bytes, where string) error {
|
||||
what := QuoteRune(prefix)
|
||||
return errors.New("invalid character " + what + " " + where)
|
||||
}
|
||||
|
||||
func NewInvalidEscapeSequenceError[Bytes ~[]byte | ~string](what Bytes) error {
|
||||
label := "escape sequence"
|
||||
if len(what) > 6 {
|
||||
label = "surrogate pair"
|
||||
}
|
||||
needEscape := strings.IndexFunc(string(what), func(r rune) bool {
|
||||
return r == '`' || r == utf8.RuneError || unicode.IsSpace(r) || !unicode.IsPrint(r)
|
||||
}) >= 0
|
||||
if needEscape {
|
||||
return errors.New("invalid " + label + " " + strconv.Quote(string(what)) + " in string")
|
||||
} else {
|
||||
return errors.New("invalid " + label + " `" + string(what) + "` in string")
|
||||
}
|
||||
}
|
||||
|
||||
// TruncatePointer optionally truncates the JSON pointer,
|
||||
// enforcing that the length roughly does not exceed n.
|
||||
func TruncatePointer(s string, n int) string {
|
||||
if len(s) <= n {
|
||||
return s
|
||||
}
|
||||
i := n / 2
|
||||
j := len(s) - n/2
|
||||
|
||||
// Avoid truncating a name if there are multiple names present.
|
||||
if k := strings.LastIndexByte(s[:i], '/'); k > 0 {
|
||||
i = k
|
||||
}
|
||||
if k := strings.IndexByte(s[j:], '/'); k >= 0 {
|
||||
j += k + len("/")
|
||||
}
|
||||
|
||||
// Avoid truncation in the middle of a UTF-8 rune.
|
||||
for i > 0 && isInvalidUTF8(utf8.DecodeLastRuneInString(s[:i])) {
|
||||
i--
|
||||
}
|
||||
for j < len(s) && isInvalidUTF8(utf8.DecodeRuneInString(s[j:])) {
|
||||
j++
|
||||
}
|
||||
|
||||
// Determine the right middle fragment to use.
|
||||
var middle string
|
||||
switch strings.Count(s[i:j], "/") {
|
||||
case 0:
|
||||
middle = "…"
|
||||
case 1:
|
||||
middle = "…/…"
|
||||
default:
|
||||
middle = "…/…/…"
|
||||
}
|
||||
if strings.HasPrefix(s[i:j], "/") && middle != "…" {
|
||||
middle = strings.TrimPrefix(middle, "…")
|
||||
}
|
||||
if strings.HasSuffix(s[i:j], "/") && middle != "…" {
|
||||
middle = strings.TrimSuffix(middle, "…")
|
||||
}
|
||||
return s[:i] + middle + s[j:]
|
||||
}
|
||||
|
||||
func isInvalidUTF8(r rune, rn int) bool {
|
||||
return r == utf8.RuneError && rn == 1
|
||||
}
|
Reference in New Issue
Block a user