chord/vendor/github.com/go-json-experiment/json/jsontext/decode.go
2025-03-15 20:42:37 -04:00

1167 lines
37 KiB
Go

// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package jsontext
import (
"bytes"
"errors"
"io"
"github.com/go-json-experiment/json/internal/jsonflags"
"github.com/go-json-experiment/json/internal/jsonopts"
"github.com/go-json-experiment/json/internal/jsonwire"
)
// NOTE: The logic for decoding is complicated by the fact that reading from
// an io.Reader into a temporary buffer means that the buffer may contain a
// truncated portion of some valid input, requiring the need to fetch more data.
//
// This file is structured in the following way:
//
// - consumeXXX functions parse an exact JSON token from a []byte.
// If the buffer appears truncated, then it returns io.ErrUnexpectedEOF.
// The consumeSimpleXXX functions are so named because they only handle
// a subset of the grammar for the JSON token being parsed.
// They do not handle the full grammar to keep these functions inlinable.
//
// - Decoder.consumeXXX methods parse the next JSON token from Decoder.buf,
// automatically fetching more input if necessary. These methods take
// a position relative to the start of Decoder.buf as an argument and
// return the end of the consumed JSON token as a position,
// also relative to the start of Decoder.buf.
//
// - In the event of an I/O errors or state machine violations,
// the implementation avoids mutating the state of Decoder
// (aside from the book-keeping needed to implement Decoder.fetch).
// For this reason, only Decoder.ReadToken and Decoder.ReadValue are
// responsible for updated Decoder.prevStart and Decoder.prevEnd.
//
// - For performance, much of the implementation uses the pattern of calling
// the inlinable consumeXXX functions first, and if more work is necessary,
// then it calls the slower Decoder.consumeXXX methods.
// TODO: Revisit this pattern if the Go compiler provides finer control
// over exactly which calls are inlined or not.
// Decoder is a streaming decoder for raw JSON tokens and values.
// It is used to read a stream of top-level JSON values,
// each separated by optional whitespace characters.
//
// [Decoder.ReadToken] and [Decoder.ReadValue] calls may be interleaved.
// For example, the following JSON value:
//
// {"name":"value","array":[null,false,true,3.14159],"object":{"k":"v"}}
//
// can be parsed with the following calls (ignoring errors for brevity):
//
// d.ReadToken() // {
// d.ReadToken() // "name"
// d.ReadToken() // "value"
// d.ReadValue() // "array"
// d.ReadToken() // [
// d.ReadToken() // null
// d.ReadToken() // false
// d.ReadValue() // true
// d.ReadToken() // 3.14159
// d.ReadToken() // ]
// d.ReadValue() // "object"
// d.ReadValue() // {"k":"v"}
// d.ReadToken() // }
//
// The above is one of many possible sequence of calls and
// may not represent the most sensible method to call for any given token/value.
// For example, it is probably more common to call [Decoder.ReadToken] to obtain a
// string token for object names.
type Decoder struct {
s decoderState
}
// decoderState is the low-level state of Decoder.
// It has exported fields and method for use by the "json" package.
type decoderState struct {
state
decodeBuffer
jsonopts.Struct
StringCache *[256]string // only used when unmarshaling; identical to json.stringCache
}
// decodeBuffer is a buffer split into 4 segments:
//
// - buf[0:prevEnd] // already read portion of the buffer
// - buf[prevStart:prevEnd] // previously read value
// - buf[prevEnd:len(buf)] // unread portion of the buffer
// - buf[len(buf):cap(buf)] // unused portion of the buffer
//
// Invariants:
//
// 0 ≤ prevStart ≤ prevEnd ≤ len(buf) ≤ cap(buf)
type decodeBuffer struct {
peekPos int // non-zero if valid offset into buf for start of next token
peekErr error // implies peekPos is -1
buf []byte // may alias rd if it is a bytes.Buffer
prevStart int
prevEnd int
// baseOffset is added to prevStart and prevEnd to obtain
// the absolute offset relative to the start of io.Reader stream.
baseOffset int64
rd io.Reader
}
// NewDecoder constructs a new streaming decoder reading from r.
//
// If r is a [bytes.Buffer], then the decoder parses directly from the buffer
// without first copying the contents to an intermediate buffer.
// Additional writes to the buffer must not occur while the decoder is in use.
func NewDecoder(r io.Reader, opts ...Options) *Decoder {
d := new(Decoder)
d.Reset(r, opts...)
return d
}
// Reset resets a decoder such that it is reading afresh from r and
// configured with the provided options. Reset must not be called on an
// a Decoder passed to the [encoding/json/v2.UnmarshalerFrom.UnmarshalJSONFrom] method
// or the [encoding/json/v2.UnmarshalFromFunc] function.
func (d *Decoder) Reset(r io.Reader, opts ...Options) {
switch {
case d == nil:
panic("jsontext: invalid nil Decoder")
case r == nil:
panic("jsontext: invalid nil io.Reader")
case d.s.Flags.Get(jsonflags.WithinArshalCall):
panic("jsontext: cannot reset Decoder passed to json.UnmarshalerFrom")
}
d.s.reset(nil, r, opts...)
}
func (d *decoderState) reset(b []byte, r io.Reader, opts ...Options) {
d.state.reset()
d.decodeBuffer = decodeBuffer{buf: b, rd: r}
opts2 := jsonopts.Struct{} // avoid mutating d.Struct in case it is part of opts
opts2.Join(opts...)
d.Struct = opts2
}
// Options returns the options used to construct the encoder and
// may additionally contain semantic options passed to a
// [encoding/json/v2.UnmarshalDecode] call.
//
// If operating within
// a [encoding/json/v2.UnmarshalerFrom.UnmarshalJSONFrom] method call or
// a [encoding/json/v2.UnmarshalFromFunc] function call,
// then the returned options are only valid within the call.
func (d *Decoder) Options() Options {
return &d.s.Struct
}
var errBufferWriteAfterNext = errors.New("invalid bytes.Buffer.Write call after calling bytes.Buffer.Next")
// fetch reads at least 1 byte from the underlying io.Reader.
// It returns io.ErrUnexpectedEOF if zero bytes were read and io.EOF was seen.
func (d *decoderState) fetch() error {
if d.rd == nil {
return io.ErrUnexpectedEOF
}
// Inform objectNameStack that we are about to fetch new buffer content.
d.Names.copyQuotedBuffer(d.buf)
// Specialize bytes.Buffer for better performance.
if bb, ok := d.rd.(*bytes.Buffer); ok {
switch {
case bb.Len() == 0:
return io.ErrUnexpectedEOF
case len(d.buf) == 0:
d.buf = bb.Next(bb.Len()) // "read" all data in the buffer
return nil
default:
// This only occurs if a partially filled bytes.Buffer was provided
// and more data is written to it while Decoder is reading from it.
// This practice will lead to data corruption since future writes
// may overwrite the contents of the current buffer.
//
// The user is trying to use a bytes.Buffer as a pipe,
// but a bytes.Buffer is poor implementation of a pipe,
// the purpose-built io.Pipe should be used instead.
return &ioError{action: "read", err: errBufferWriteAfterNext}
}
}
// Allocate initial buffer if empty.
if cap(d.buf) == 0 {
d.buf = make([]byte, 0, 64)
}
// Check whether to grow the buffer.
const maxBufferSize = 4 << 10
const growthSizeFactor = 2 // higher value is faster
const growthRateFactor = 2 // higher value is slower
// By default, grow if below the maximum buffer size.
grow := cap(d.buf) <= maxBufferSize/growthSizeFactor
// Growing can be expensive, so only grow
// if a sufficient number of bytes have been processed.
grow = grow && int64(cap(d.buf)) < d.previousOffsetEnd()/growthRateFactor
// If prevStart==0, then fetch was called in order to fetch more data
// to finish consuming a large JSON value contiguously.
// Grow if less than 25% of the remaining capacity is available.
// Note that this may cause the input buffer to exceed maxBufferSize.
grow = grow || (d.prevStart == 0 && len(d.buf) >= 3*cap(d.buf)/4)
if grow {
// Allocate a new buffer and copy the contents of the old buffer over.
// TODO: Provide a hard limit on the maximum internal buffer size?
buf := make([]byte, 0, cap(d.buf)*growthSizeFactor)
d.buf = append(buf, d.buf[d.prevStart:]...)
} else {
// Move unread portion of the data to the front.
n := copy(d.buf[:cap(d.buf)], d.buf[d.prevStart:])
d.buf = d.buf[:n]
}
d.baseOffset += int64(d.prevStart)
d.prevEnd -= d.prevStart
d.prevStart = 0
// Read more data into the internal buffer.
for {
n, err := d.rd.Read(d.buf[len(d.buf):cap(d.buf)])
switch {
case n > 0:
d.buf = d.buf[:len(d.buf)+n]
return nil // ignore errors if any bytes are read
case err == io.EOF:
return io.ErrUnexpectedEOF
case err != nil:
return &ioError{action: "read", err: err}
default:
continue // Read returned (0, nil)
}
}
}
const invalidateBufferByte = '#' // invalid starting character for JSON grammar
// invalidatePreviousRead invalidates buffers returned by Peek and Read calls
// so that the first byte is an invalid character.
// This Hyrum-proofs the API against faulty application code that assumes
// values returned by ReadValue remain valid past subsequent Read calls.
func (d *decodeBuffer) invalidatePreviousRead() {
// Avoid mutating the buffer if d.rd is nil which implies that d.buf
// is provided by the user code and may not expect mutations.
isBytesBuffer := func(r io.Reader) bool {
_, ok := r.(*bytes.Buffer)
return ok
}
if d.rd != nil && !isBytesBuffer(d.rd) && d.prevStart < d.prevEnd && uint(d.prevStart) < uint(len(d.buf)) {
d.buf[d.prevStart] = invalidateBufferByte
d.prevStart = d.prevEnd
}
}
// needMore reports whether there are no more unread bytes.
func (d *decodeBuffer) needMore(pos int) bool {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
return pos == len(d.buf)
}
func (d *decodeBuffer) offsetAt(pos int) int64 { return d.baseOffset + int64(pos) }
func (d *decodeBuffer) previousOffsetStart() int64 { return d.baseOffset + int64(d.prevStart) }
func (d *decodeBuffer) previousOffsetEnd() int64 { return d.baseOffset + int64(d.prevEnd) }
func (d *decodeBuffer) previousBuffer() []byte { return d.buf[d.prevStart:d.prevEnd] }
func (d *decodeBuffer) unreadBuffer() []byte { return d.buf[d.prevEnd:len(d.buf)] }
// PreviousTokenOrValue returns the previously read token or value
// unless it has been invalidated by a call to PeekKind.
// If a token is just a delimiter, then this returns a 1-byte buffer.
// This method is used for error reporting at the semantic layer.
func (d *decodeBuffer) PreviousTokenOrValue() []byte {
b := d.previousBuffer()
// If peek was called, then the previous token or buffer is invalidated.
if d.peekPos > 0 || len(b) > 0 && b[0] == invalidateBufferByte {
return nil
}
// ReadToken does not preserve the buffer for null, bools, or delimiters.
// Manually re-construct that buffer.
if len(b) == 0 {
b = d.buf[:d.prevEnd] // entirety of the previous buffer
for _, tok := range []string{"null", "false", "true", "{", "}", "[", "]"} {
if len(b) >= len(tok) && string(b[len(b)-len(tok):]) == tok {
return b[len(b)-len(tok):]
}
}
}
return b
}
// PeekKind retrieves the next token kind, but does not advance the read offset.
//
// It returns 0 if an error occurs. Any such error is cached until
// the next read call and it is the caller's responsibility to eventually
// follow up a PeekKind call with a read call.
func (d *Decoder) PeekKind() Kind {
return d.s.PeekKind()
}
func (d *decoderState) PeekKind() Kind {
// Check whether we have a cached peek result.
if d.peekPos > 0 {
return Kind(d.buf[d.peekPos]).normalize()
}
var err error
d.invalidatePreviousRead()
pos := d.prevEnd
// Consume leading whitespace.
pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
if d.needMore(pos) {
if pos, err = d.consumeWhitespace(pos); err != nil {
if err == io.ErrUnexpectedEOF && d.Tokens.Depth() == 1 {
err = io.EOF // EOF possibly if no Tokens present after top-level value
}
d.peekPos, d.peekErr = -1, wrapSyntacticError(d, err, pos, 0)
return invalidKind
}
}
// Consume colon or comma.
var delim byte
if c := d.buf[pos]; c == ':' || c == ',' {
delim = c
pos += 1
pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
if d.needMore(pos) {
if pos, err = d.consumeWhitespace(pos); err != nil {
err = wrapSyntacticError(d, err, pos, 0)
d.peekPos, d.peekErr = -1, d.checkDelimBeforeIOError(delim, err)
return invalidKind
}
}
}
next := Kind(d.buf[pos]).normalize()
if d.Tokens.needDelim(next) != delim {
d.peekPos, d.peekErr = -1, d.checkDelim(delim, next)
return invalidKind
}
// This may set peekPos to zero, which is indistinguishable from
// the uninitialized state. While a small hit to performance, it is correct
// since ReadValue and ReadToken will disregard the cached result and
// recompute the next kind.
d.peekPos, d.peekErr = pos, nil
return next
}
// checkDelimBeforeIOError checks whether the delim is even valid
// before returning an IO error, which occurs after the delim.
func (d *decoderState) checkDelimBeforeIOError(delim byte, err error) error {
// Since an IO error occurred, we do not know what the next kind is.
// However, knowing the next kind is necessary to validate
// whether the current delim is at least potentially valid.
// Since a JSON string is always valid as the next token,
// conservatively assume that is the next kind for validation.
const next = Kind('"')
if d.Tokens.needDelim(next) != delim {
err = d.checkDelim(delim, next)
}
return err
}
// CountNextDelimWhitespace counts the number of upcoming bytes of
// delimiter or whitespace characters.
// This method is used for error reporting at the semantic layer.
func (d *decoderState) CountNextDelimWhitespace() int {
d.PeekKind() // populate unreadBuffer
return len(d.unreadBuffer()) - len(bytes.TrimLeft(d.unreadBuffer(), ",: \n\r\t"))
}
// checkDelim checks whether delim is valid for the given next kind.
func (d *decoderState) checkDelim(delim byte, next Kind) error {
where := "at start of value"
switch d.Tokens.needDelim(next) {
case delim:
return nil
case ':':
where = "after object name (expecting ':')"
case ',':
if d.Tokens.Last.isObject() {
where = "after object value (expecting ',' or '}')"
} else {
where = "after array element (expecting ',' or ']')"
}
}
pos := d.prevEnd // restore position to right after leading whitespace
pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
err := jsonwire.NewInvalidCharacterError(d.buf[pos:], where)
return wrapSyntacticError(d, err, pos, 0)
}
// SkipValue is semantically equivalent to calling [Decoder.ReadValue] and discarding
// the result except that memory is not wasted trying to hold the entire result.
func (d *Decoder) SkipValue() error {
return d.s.SkipValue()
}
func (d *decoderState) SkipValue() error {
switch d.PeekKind() {
case '{', '[':
// For JSON objects and arrays, keep skipping all tokens
// until the depth matches the starting depth.
depth := d.Tokens.Depth()
for {
if _, err := d.ReadToken(); err != nil {
return err
}
if depth >= d.Tokens.Depth() {
return nil
}
}
default:
// Trying to skip a value when the next token is a '}' or ']'
// will result in an error being returned here.
var flags jsonwire.ValueFlags
if _, err := d.ReadValue(&flags); err != nil {
return err
}
return nil
}
}
// SkipValueRemainder skips the remainder of a value
// after reading a '{' or '[' token.
func (d *decoderState) SkipValueRemainder() error {
if d.Tokens.Depth()-1 > 0 && d.Tokens.Last.Length() == 0 {
for n := d.Tokens.Depth(); d.Tokens.Depth() >= n; {
if _, err := d.ReadToken(); err != nil {
return err
}
}
}
return nil
}
// SkipUntil skips all tokens until the state machine
// is at or past the specified depth and length.
func (d *decoderState) SkipUntil(depth int, length int64) error {
for d.Tokens.Depth() > depth || (d.Tokens.Depth() == depth && d.Tokens.Last.Length() < length) {
if _, err := d.ReadToken(); err != nil {
return err
}
}
return nil
}
// ReadToken reads the next [Token], advancing the read offset.
// The returned token is only valid until the next Peek, Read, or Skip call.
// It returns [io.EOF] if there are no more tokens.
func (d *Decoder) ReadToken() (Token, error) {
return d.s.ReadToken()
}
func (d *decoderState) ReadToken() (Token, error) {
// Determine the next kind.
var err error
var next Kind
pos := d.peekPos
if pos != 0 {
// Use cached peek result.
if d.peekErr != nil {
err := d.peekErr
d.peekPos, d.peekErr = 0, nil // possibly a transient I/O error
return Token{}, err
}
next = Kind(d.buf[pos]).normalize()
d.peekPos = 0 // reset cache
} else {
d.invalidatePreviousRead()
pos = d.prevEnd
// Consume leading whitespace.
pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
if d.needMore(pos) {
if pos, err = d.consumeWhitespace(pos); err != nil {
if err == io.ErrUnexpectedEOF && d.Tokens.Depth() == 1 {
err = io.EOF // EOF possibly if no Tokens present after top-level value
}
return Token{}, wrapSyntacticError(d, err, pos, 0)
}
}
// Consume colon or comma.
var delim byte
if c := d.buf[pos]; c == ':' || c == ',' {
delim = c
pos += 1
pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
if d.needMore(pos) {
if pos, err = d.consumeWhitespace(pos); err != nil {
err = wrapSyntacticError(d, err, pos, 0)
return Token{}, d.checkDelimBeforeIOError(delim, err)
}
}
}
next = Kind(d.buf[pos]).normalize()
if d.Tokens.needDelim(next) != delim {
return Token{}, d.checkDelim(delim, next)
}
}
// Handle the next token.
var n int
switch next {
case 'n':
if jsonwire.ConsumeNull(d.buf[pos:]) == 0 {
pos, err = d.consumeLiteral(pos, "null")
if err != nil {
return Token{}, wrapSyntacticError(d, err, pos, +1)
}
} else {
pos += len("null")
}
if err = d.Tokens.appendLiteral(); err != nil {
return Token{}, wrapSyntacticError(d, err, pos-len("null"), +1) // report position at start of literal
}
d.prevStart, d.prevEnd = pos, pos
return Null, nil
case 'f':
if jsonwire.ConsumeFalse(d.buf[pos:]) == 0 {
pos, err = d.consumeLiteral(pos, "false")
if err != nil {
return Token{}, wrapSyntacticError(d, err, pos, +1)
}
} else {
pos += len("false")
}
if err = d.Tokens.appendLiteral(); err != nil {
return Token{}, wrapSyntacticError(d, err, pos-len("false"), +1) // report position at start of literal
}
d.prevStart, d.prevEnd = pos, pos
return False, nil
case 't':
if jsonwire.ConsumeTrue(d.buf[pos:]) == 0 {
pos, err = d.consumeLiteral(pos, "true")
if err != nil {
return Token{}, wrapSyntacticError(d, err, pos, +1)
}
} else {
pos += len("true")
}
if err = d.Tokens.appendLiteral(); err != nil {
return Token{}, wrapSyntacticError(d, err, pos-len("true"), +1) // report position at start of literal
}
d.prevStart, d.prevEnd = pos, pos
return True, nil
case '"':
var flags jsonwire.ValueFlags // TODO: Preserve this in Token?
if n = jsonwire.ConsumeSimpleString(d.buf[pos:]); n == 0 {
oldAbsPos := d.baseOffset + int64(pos)
pos, err = d.consumeString(&flags, pos)
newAbsPos := d.baseOffset + int64(pos)
n = int(newAbsPos - oldAbsPos)
if err != nil {
return Token{}, wrapSyntacticError(d, err, pos, +1)
}
} else {
pos += n
}
if d.Tokens.Last.NeedObjectName() {
if !d.Flags.Get(jsonflags.AllowDuplicateNames) {
if !d.Tokens.Last.isValidNamespace() {
return Token{}, wrapSyntacticError(d, errInvalidNamespace, pos-n, +1)
}
if d.Tokens.Last.isActiveNamespace() && !d.Namespaces.Last().insertQuoted(d.buf[pos-n:pos], flags.IsVerbatim()) {
err = wrapWithObjectName(ErrDuplicateName, d.buf[pos-n:pos])
return Token{}, wrapSyntacticError(d, err, pos-n, +1) // report position at start of string
}
}
d.Names.ReplaceLastQuotedOffset(pos - n) // only replace if insertQuoted succeeds
}
if err = d.Tokens.appendString(); err != nil {
return Token{}, wrapSyntacticError(d, err, pos-n, +1) // report position at start of string
}
d.prevStart, d.prevEnd = pos-n, pos
return Token{raw: &d.decodeBuffer, num: uint64(d.previousOffsetStart())}, nil
case '0':
// NOTE: Since JSON numbers are not self-terminating,
// we need to make sure that the next byte is not part of a number.
if n = jsonwire.ConsumeSimpleNumber(d.buf[pos:]); n == 0 || d.needMore(pos+n) {
oldAbsPos := d.baseOffset + int64(pos)
pos, err = d.consumeNumber(pos)
newAbsPos := d.baseOffset + int64(pos)
n = int(newAbsPos - oldAbsPos)
if err != nil {
return Token{}, wrapSyntacticError(d, err, pos, +1)
}
} else {
pos += n
}
if err = d.Tokens.appendNumber(); err != nil {
return Token{}, wrapSyntacticError(d, err, pos-n, +1) // report position at start of number
}
d.prevStart, d.prevEnd = pos-n, pos
return Token{raw: &d.decodeBuffer, num: uint64(d.previousOffsetStart())}, nil
case '{':
if err = d.Tokens.pushObject(); err != nil {
return Token{}, wrapSyntacticError(d, err, pos, +1)
}
d.Names.push()
if !d.Flags.Get(jsonflags.AllowDuplicateNames) {
d.Namespaces.push()
}
pos += 1
d.prevStart, d.prevEnd = pos, pos
return BeginObject, nil
case '}':
if err = d.Tokens.popObject(); err != nil {
return Token{}, wrapSyntacticError(d, err, pos, +1)
}
d.Names.pop()
if !d.Flags.Get(jsonflags.AllowDuplicateNames) {
d.Namespaces.pop()
}
pos += 1
d.prevStart, d.prevEnd = pos, pos
return EndObject, nil
case '[':
if err = d.Tokens.pushArray(); err != nil {
return Token{}, wrapSyntacticError(d, err, pos, +1)
}
pos += 1
d.prevStart, d.prevEnd = pos, pos
return BeginArray, nil
case ']':
if err = d.Tokens.popArray(); err != nil {
return Token{}, wrapSyntacticError(d, err, pos, +1)
}
pos += 1
d.prevStart, d.prevEnd = pos, pos
return EndArray, nil
default:
err = jsonwire.NewInvalidCharacterError(d.buf[pos:], "at start of value")
return Token{}, wrapSyntacticError(d, err, pos, +1)
}
}
// ReadValue returns the next raw JSON value, advancing the read offset.
// The value is stripped of any leading or trailing whitespace and
// contains the exact bytes of the input, which may contain invalid UTF-8
// if [AllowInvalidUTF8] is specified.
//
// The returned value is only valid until the next Peek, Read, or Skip call and
// may not be mutated while the Decoder remains in use.
// If the decoder is currently at the end token for an object or array,
// then it reports a [SyntacticError] and the internal state remains unchanged.
// It returns [io.EOF] if there are no more values.
func (d *Decoder) ReadValue() (Value, error) {
var flags jsonwire.ValueFlags
return d.s.ReadValue(&flags)
}
func (d *decoderState) ReadValue(flags *jsonwire.ValueFlags) (Value, error) {
// Determine the next kind.
var err error
var next Kind
pos := d.peekPos
if pos != 0 {
// Use cached peek result.
if d.peekErr != nil {
err := d.peekErr
d.peekPos, d.peekErr = 0, nil // possibly a transient I/O error
return nil, err
}
next = Kind(d.buf[pos]).normalize()
d.peekPos = 0 // reset cache
} else {
d.invalidatePreviousRead()
pos = d.prevEnd
// Consume leading whitespace.
pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
if d.needMore(pos) {
if pos, err = d.consumeWhitespace(pos); err != nil {
if err == io.ErrUnexpectedEOF && d.Tokens.Depth() == 1 {
err = io.EOF // EOF possibly if no Tokens present after top-level value
}
return nil, wrapSyntacticError(d, err, pos, 0)
}
}
// Consume colon or comma.
var delim byte
if c := d.buf[pos]; c == ':' || c == ',' {
delim = c
pos += 1
pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
if d.needMore(pos) {
if pos, err = d.consumeWhitespace(pos); err != nil {
err = wrapSyntacticError(d, err, pos, 0)
return nil, d.checkDelimBeforeIOError(delim, err)
}
}
}
next = Kind(d.buf[pos]).normalize()
if d.Tokens.needDelim(next) != delim {
return nil, d.checkDelim(delim, next)
}
}
// Handle the next value.
oldAbsPos := d.baseOffset + int64(pos)
pos, err = d.consumeValue(flags, pos, d.Tokens.Depth())
newAbsPos := d.baseOffset + int64(pos)
n := int(newAbsPos - oldAbsPos)
if err != nil {
return nil, wrapSyntacticError(d, err, pos, +1)
}
switch next {
case 'n', 't', 'f':
err = d.Tokens.appendLiteral()
case '"':
if d.Tokens.Last.NeedObjectName() {
if !d.Flags.Get(jsonflags.AllowDuplicateNames) {
if !d.Tokens.Last.isValidNamespace() {
err = errInvalidNamespace
break
}
if d.Tokens.Last.isActiveNamespace() && !d.Namespaces.Last().insertQuoted(d.buf[pos-n:pos], flags.IsVerbatim()) {
err = wrapWithObjectName(ErrDuplicateName, d.buf[pos-n:pos])
break
}
}
d.Names.ReplaceLastQuotedOffset(pos - n) // only replace if insertQuoted succeeds
}
err = d.Tokens.appendString()
case '0':
err = d.Tokens.appendNumber()
case '{':
if err = d.Tokens.pushObject(); err != nil {
break
}
if err = d.Tokens.popObject(); err != nil {
panic("BUG: popObject should never fail immediately after pushObject: " + err.Error())
}
case '[':
if err = d.Tokens.pushArray(); err != nil {
break
}
if err = d.Tokens.popArray(); err != nil {
panic("BUG: popArray should never fail immediately after pushArray: " + err.Error())
}
}
if err != nil {
return nil, wrapSyntacticError(d, err, pos-n, +1) // report position at start of value
}
d.prevEnd = pos
d.prevStart = pos - n
return d.buf[pos-n : pos : pos], nil
}
// CheckNextValue checks whether the next value is syntactically valid,
// but does not advance the read offset.
func (d *decoderState) CheckNextValue() error {
d.PeekKind() // populates d.peekPos and d.peekErr
pos, err := d.peekPos, d.peekErr
d.peekPos, d.peekErr = 0, nil
if err != nil {
return err
}
var flags jsonwire.ValueFlags
if pos, err := d.consumeValue(&flags, pos, d.Tokens.Depth()); err != nil {
return wrapSyntacticError(d, err, pos, +1)
}
return nil
}
// CheckEOF verifies that the input has no more data.
func (d *decoderState) CheckEOF() error {
switch pos, err := d.consumeWhitespace(d.prevEnd); err {
case nil:
err := jsonwire.NewInvalidCharacterError(d.buf[pos:], "after top-level value")
return wrapSyntacticError(d, err, pos, 0)
case io.ErrUnexpectedEOF:
return nil
default:
return err
}
}
// consumeWhitespace consumes all whitespace starting at d.buf[pos:].
// It returns the new position in d.buf immediately after the last whitespace.
// If it returns nil, there is guaranteed to at least be one unread byte.
//
// The following pattern is common in this implementation:
//
// pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
// if d.needMore(pos) {
// if pos, err = d.consumeWhitespace(pos); err != nil {
// return ...
// }
// }
//
// It is difficult to simplify this without sacrificing performance since
// consumeWhitespace must be inlined. The body of the if statement is
// executed only in rare situations where we need to fetch more data.
// Since fetching may return an error, we also need to check the error.
func (d *decoderState) consumeWhitespace(pos int) (newPos int, err error) {
for {
pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
if d.needMore(pos) {
absPos := d.baseOffset + int64(pos)
err = d.fetch() // will mutate d.buf and invalidate pos
pos = int(absPos - d.baseOffset)
if err != nil {
return pos, err
}
continue
}
return pos, nil
}
}
// consumeValue consumes a single JSON value starting at d.buf[pos:].
// It returns the new position in d.buf immediately after the value.
func (d *decoderState) consumeValue(flags *jsonwire.ValueFlags, pos, depth int) (newPos int, err error) {
for {
var n int
var err error
switch next := Kind(d.buf[pos]).normalize(); next {
case 'n':
if n = jsonwire.ConsumeNull(d.buf[pos:]); n == 0 {
n, err = jsonwire.ConsumeLiteral(d.buf[pos:], "null")
}
case 'f':
if n = jsonwire.ConsumeFalse(d.buf[pos:]); n == 0 {
n, err = jsonwire.ConsumeLiteral(d.buf[pos:], "false")
}
case 't':
if n = jsonwire.ConsumeTrue(d.buf[pos:]); n == 0 {
n, err = jsonwire.ConsumeLiteral(d.buf[pos:], "true")
}
case '"':
if n = jsonwire.ConsumeSimpleString(d.buf[pos:]); n == 0 {
return d.consumeString(flags, pos)
}
case '0':
// NOTE: Since JSON numbers are not self-terminating,
// we need to make sure that the next byte is not part of a number.
if n = jsonwire.ConsumeSimpleNumber(d.buf[pos:]); n == 0 || d.needMore(pos+n) {
return d.consumeNumber(pos)
}
case '{':
return d.consumeObject(flags, pos, depth)
case '[':
return d.consumeArray(flags, pos, depth)
default:
if (d.Tokens.Last.isObject() && next == ']') || (d.Tokens.Last.isArray() && next == '}') {
return pos, errMismatchDelim
}
return pos, jsonwire.NewInvalidCharacterError(d.buf[pos:], "at start of value")
}
if err == io.ErrUnexpectedEOF {
absPos := d.baseOffset + int64(pos)
err = d.fetch() // will mutate d.buf and invalidate pos
pos = int(absPos - d.baseOffset)
if err != nil {
return pos + n, err
}
continue
}
return pos + n, err
}
}
// consumeLiteral consumes a single JSON literal starting at d.buf[pos:].
// It returns the new position in d.buf immediately after the literal.
func (d *decoderState) consumeLiteral(pos int, lit string) (newPos int, err error) {
for {
n, err := jsonwire.ConsumeLiteral(d.buf[pos:], lit)
if err == io.ErrUnexpectedEOF {
absPos := d.baseOffset + int64(pos)
err = d.fetch() // will mutate d.buf and invalidate pos
pos = int(absPos - d.baseOffset)
if err != nil {
return pos + n, err
}
continue
}
return pos + n, err
}
}
// consumeString consumes a single JSON string starting at d.buf[pos:].
// It returns the new position in d.buf immediately after the string.
func (d *decoderState) consumeString(flags *jsonwire.ValueFlags, pos int) (newPos int, err error) {
var n int
for {
n, err = jsonwire.ConsumeStringResumable(flags, d.buf[pos:], n, !d.Flags.Get(jsonflags.AllowInvalidUTF8))
if err == io.ErrUnexpectedEOF {
absPos := d.baseOffset + int64(pos)
err = d.fetch() // will mutate d.buf and invalidate pos
pos = int(absPos - d.baseOffset)
if err != nil {
return pos + n, err
}
continue
}
return pos + n, err
}
}
// consumeNumber consumes a single JSON number starting at d.buf[pos:].
// It returns the new position in d.buf immediately after the number.
func (d *decoderState) consumeNumber(pos int) (newPos int, err error) {
var n int
var state jsonwire.ConsumeNumberState
for {
n, state, err = jsonwire.ConsumeNumberResumable(d.buf[pos:], n, state)
// NOTE: Since JSON numbers are not self-terminating,
// we need to make sure that the next byte is not part of a number.
if err == io.ErrUnexpectedEOF || d.needMore(pos+n) {
mayTerminate := err == nil
absPos := d.baseOffset + int64(pos)
err = d.fetch() // will mutate d.buf and invalidate pos
pos = int(absPos - d.baseOffset)
if err != nil {
if mayTerminate && err == io.ErrUnexpectedEOF {
return pos + n, nil
}
return pos, err
}
continue
}
return pos + n, err
}
}
// consumeObject consumes a single JSON object starting at d.buf[pos:].
// It returns the new position in d.buf immediately after the object.
func (d *decoderState) consumeObject(flags *jsonwire.ValueFlags, pos, depth int) (newPos int, err error) {
var n int
var names *objectNamespace
if !d.Flags.Get(jsonflags.AllowDuplicateNames) {
d.Namespaces.push()
defer d.Namespaces.pop()
names = d.Namespaces.Last()
}
// Handle before start.
if uint(pos) >= uint(len(d.buf)) || d.buf[pos] != '{' {
panic("BUG: consumeObject must be called with a buffer that starts with '{'")
} else if depth == maxNestingDepth+1 {
return pos, errMaxDepth
}
pos++
// Handle after start.
pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
if d.needMore(pos) {
if pos, err = d.consumeWhitespace(pos); err != nil {
return pos, err
}
}
if d.buf[pos] == '}' {
pos++
return pos, nil
}
depth++
for {
// Handle before name.
pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
if d.needMore(pos) {
if pos, err = d.consumeWhitespace(pos); err != nil {
return pos, err
}
}
var flags2 jsonwire.ValueFlags
if n = jsonwire.ConsumeSimpleString(d.buf[pos:]); n == 0 {
oldAbsPos := d.baseOffset + int64(pos)
pos, err = d.consumeString(&flags2, pos)
newAbsPos := d.baseOffset + int64(pos)
n = int(newAbsPos - oldAbsPos)
flags.Join(flags2)
if err != nil {
return pos, err
}
} else {
pos += n
}
quotedName := d.buf[pos-n : pos]
if !d.Flags.Get(jsonflags.AllowDuplicateNames) && !names.insertQuoted(quotedName, flags2.IsVerbatim()) {
return pos - n, wrapWithObjectName(ErrDuplicateName, quotedName)
}
// Handle after name.
pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
if d.needMore(pos) {
if pos, err = d.consumeWhitespace(pos); err != nil {
return pos, wrapWithObjectName(err, quotedName)
}
}
if d.buf[pos] != ':' {
err := jsonwire.NewInvalidCharacterError(d.buf[pos:], "after object name (expecting ':')")
return pos, wrapWithObjectName(err, quotedName)
}
pos++
// Handle before value.
pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
if d.needMore(pos) {
if pos, err = d.consumeWhitespace(pos); err != nil {
return pos, wrapWithObjectName(err, quotedName)
}
}
pos, err = d.consumeValue(flags, pos, depth)
if err != nil {
return pos, wrapWithObjectName(err, quotedName)
}
// Handle after value.
pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
if d.needMore(pos) {
if pos, err = d.consumeWhitespace(pos); err != nil {
return pos, err
}
}
switch d.buf[pos] {
case ',':
pos++
continue
case '}':
pos++
return pos, nil
default:
return pos, jsonwire.NewInvalidCharacterError(d.buf[pos:], "after object value (expecting ',' or '}')")
}
}
}
// consumeArray consumes a single JSON array starting at d.buf[pos:].
// It returns the new position in d.buf immediately after the array.
func (d *decoderState) consumeArray(flags *jsonwire.ValueFlags, pos, depth int) (newPos int, err error) {
// Handle before start.
if uint(pos) >= uint(len(d.buf)) || d.buf[pos] != '[' {
panic("BUG: consumeArray must be called with a buffer that starts with '['")
} else if depth == maxNestingDepth+1 {
return pos, errMaxDepth
}
pos++
// Handle after start.
pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
if d.needMore(pos) {
if pos, err = d.consumeWhitespace(pos); err != nil {
return pos, err
}
}
if d.buf[pos] == ']' {
pos++
return pos, nil
}
var idx int64
depth++
for {
// Handle before value.
pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
if d.needMore(pos) {
if pos, err = d.consumeWhitespace(pos); err != nil {
return pos, err
}
}
pos, err = d.consumeValue(flags, pos, depth)
if err != nil {
return pos, wrapWithArrayIndex(err, idx)
}
// Handle after value.
pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
if d.needMore(pos) {
if pos, err = d.consumeWhitespace(pos); err != nil {
return pos, err
}
}
switch d.buf[pos] {
case ',':
pos++
idx++
continue
case ']':
pos++
return pos, nil
default:
return pos, jsonwire.NewInvalidCharacterError(d.buf[pos:], "after array element (expecting ',' or ']')")
}
}
}
// InputOffset returns the current input byte offset. It gives the location
// of the next byte immediately after the most recently returned token or value.
// The number of bytes actually read from the underlying [io.Reader] may be more
// than this offset due to internal buffering effects.
func (d *Decoder) InputOffset() int64 {
return d.s.previousOffsetEnd()
}
// UnreadBuffer returns the data remaining in the unread buffer,
// which may contain zero or more bytes.
// The returned buffer must not be mutated while Decoder continues to be used.
// The buffer contents are valid until the next Peek, Read, or Skip call.
func (d *Decoder) UnreadBuffer() []byte {
return d.s.unreadBuffer()
}
// StackDepth returns the depth of the state machine for read JSON data.
// Each level on the stack represents a nested JSON object or array.
// It is incremented whenever an [BeginObject] or [BeginArray] token is encountered
// and decremented whenever an [EndObject] or [EndArray] token is encountered.
// The depth is zero-indexed, where zero represents the top-level JSON value.
func (d *Decoder) StackDepth() int {
// NOTE: Keep in sync with Encoder.StackDepth.
return d.s.Tokens.Depth() - 1
}
// StackIndex returns information about the specified stack level.
// It must be a number between 0 and [Decoder.StackDepth], inclusive.
// For each level, it reports the kind:
//
// - 0 for a level of zero,
// - '{' for a level representing a JSON object, and
// - '[' for a level representing a JSON array.
//
// It also reports the length of that JSON object or array.
// Each name and value in a JSON object is counted separately,
// so the effective number of members would be half the length.
// A complete JSON object must have an even length.
func (d *Decoder) StackIndex(i int) (Kind, int64) {
// NOTE: Keep in sync with Encoder.StackIndex.
switch s := d.s.Tokens.index(i); {
case i > 0 && s.isObject():
return '{', s.Length()
case i > 0 && s.isArray():
return '[', s.Length()
default:
return 0, s.Length()
}
}
// StackPointer returns a JSON Pointer (RFC 6901) to the most recently read value.
func (d *Decoder) StackPointer() Pointer {
return Pointer(d.s.AppendStackPointer(nil, -1))
}
func (d *decoderState) AppendStackPointer(b []byte, where int) []byte {
d.Names.copyQuotedBuffer(d.buf)
return d.state.appendStackPointer(b, where)
}