chord/vendor/github.com/go-json-experiment/json/fields.go
2025-03-15 20:42:37 -04:00

645 lines
24 KiB
Go

// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package json
import (
"cmp"
"errors"
"fmt"
"io"
"reflect"
"slices"
"strconv"
"strings"
"unicode"
"unicode/utf8"
"github.com/go-json-experiment/json/internal/jsonflags"
"github.com/go-json-experiment/json/internal/jsonwire"
)
type isZeroer interface {
IsZero() bool
}
var isZeroerType = reflect.TypeFor[isZeroer]()
type structFields struct {
flattened []structField // listed in depth-first ordering
byActualName map[string]*structField
byFoldedName map[string][]*structField
inlinedFallback *structField
}
// reindex recomputes index to avoid bounds check during runtime.
//
// During the construction of each [structField] in [makeStructFields],
// the index field is 0-indexed. However, before it returns,
// the 0th field is stored in index0 and index stores the remainder.
func (sf *structFields) reindex() {
reindex := func(f *structField) {
f.index0 = f.index[0]
f.index = f.index[1:]
if len(f.index) == 0 {
f.index = nil // avoid pinning the backing slice
}
}
for i := range sf.flattened {
reindex(&sf.flattened[i])
}
if sf.inlinedFallback != nil {
reindex(sf.inlinedFallback)
}
}
// lookupByFoldedName looks up name by a case-insensitive match
// that also ignores the presence of dashes and underscores.
func (fs *structFields) lookupByFoldedName(name []byte) []*structField {
return fs.byFoldedName[string(foldName(name))]
}
type structField struct {
id int // unique numeric ID in breadth-first ordering
index0 int // 0th index into a struct according to [reflect.Type.FieldByIndex]
index []int // 1st index and remainder according to [reflect.Type.FieldByIndex]
typ reflect.Type
fncs *arshaler
isZero func(addressableValue) bool
isEmpty func(addressableValue) bool
fieldOptions
}
var errNoExportedFields = errors.New("Go struct has no exported fields")
func makeStructFields(root reflect.Type) (fs structFields, serr *SemanticError) {
orErrorf := func(serr *SemanticError, t reflect.Type, f string, a ...any) *SemanticError {
return cmp.Or(serr, &SemanticError{GoType: t, Err: fmt.Errorf(f, a...)})
}
// Setup a queue for a breath-first search.
var queueIndex int
type queueEntry struct {
typ reflect.Type
index []int
visitChildren bool // whether to recursively visit inlined field in this struct
}
queue := []queueEntry{{root, nil, true}}
seen := map[reflect.Type]bool{root: true}
// Perform a breadth-first search over all reachable fields.
// This ensures that len(f.index) will be monotonically increasing.
var allFields, inlinedFallbacks []structField
for queueIndex < len(queue) {
qe := queue[queueIndex]
queueIndex++
t := qe.typ
inlinedFallbackIndex := -1 // index of last inlined fallback field in current struct
namesIndex := make(map[string]int) // index of each field with a given JSON object name in current struct
var hasAnyJSONTag bool // whether any Go struct field has a `json` tag
var hasAnyJSONField bool // whether any JSON serializable fields exist in current struct
for i := range t.NumField() {
sf := t.Field(i)
_, hasTag := sf.Tag.Lookup("json")
hasAnyJSONTag = hasAnyJSONTag || hasTag
options, ignored, err := parseFieldOptions(sf)
if err != nil {
serr = cmp.Or(serr, &SemanticError{GoType: t, Err: err})
}
if ignored {
continue
}
hasAnyJSONField = true
f := structField{
// Allocate a new slice (len=N+1) to hold both
// the parent index (len=N) and the current index (len=1).
// Do this to avoid clobbering the memory of the parent index.
index: append(append(make([]int, 0, len(qe.index)+1), qe.index...), i),
typ: sf.Type,
fieldOptions: options,
}
if sf.Anonymous && !f.hasName {
if indirectType(f.typ).Kind() != reflect.Struct {
serr = orErrorf(serr, t, "embedded Go struct field %s of non-struct type must be explicitly given a JSON name", sf.Name)
} else {
f.inline = true // implied by use of Go embedding without an explicit name
}
}
if f.inline || f.unknown {
// Handle an inlined field that serializes to/from
// zero or more JSON object members.
switch f.fieldOptions {
case fieldOptions{name: f.name, quotedName: f.quotedName, inline: true}:
case fieldOptions{name: f.name, quotedName: f.quotedName, unknown: true}:
case fieldOptions{name: f.name, quotedName: f.quotedName, inline: true, unknown: true}:
serr = orErrorf(serr, t, "Go struct field %s cannot have both `inline` and `unknown` specified", sf.Name)
f.inline = false // let `unknown` take precedence
default:
serr = orErrorf(serr, t, "Go struct field %s cannot have any options other than `inline` or `unknown` specified", sf.Name)
if f.hasName {
continue // invalid inlined field; treat as ignored
}
f.fieldOptions = fieldOptions{name: f.name, quotedName: f.quotedName, inline: f.inline, unknown: f.unknown}
if f.inline && f.unknown {
f.inline = false // let `unknown` take precedence
}
}
// Reject any types with custom serialization otherwise
// it becomes impossible to know what sub-fields to inline.
tf := indirectType(f.typ)
if implementsAny(tf, allMethodTypes...) && tf != jsontextValueType {
serr = orErrorf(serr, t, "inlined Go struct field %s of type %s must not implement marshal or unmarshal methods", sf.Name, tf)
}
// Handle an inlined field that serializes to/from
// a finite number of JSON object members backed by a Go struct.
if tf.Kind() == reflect.Struct {
if f.unknown {
serr = orErrorf(serr, t, "inlined Go struct field %s of type %s with `unknown` tag must be a Go map of string key or a jsontext.Value", sf.Name, tf)
continue // invalid inlined field; treat as ignored
}
if qe.visitChildren {
queue = append(queue, queueEntry{tf, f.index, !seen[tf]})
}
seen[tf] = true
continue
} else if !sf.IsExported() {
serr = orErrorf(serr, t, "inlined Go struct field %s is not exported", sf.Name)
continue // invalid inlined field; treat as ignored
}
// Handle an inlined field that serializes to/from any number of
// JSON object members back by a Go map or jsontext.Value.
switch {
case tf == jsontextValueType:
f.fncs = nil // specially handled in arshal_inlined.go
case tf.Kind() == reflect.Map && tf.Key().Kind() == reflect.String:
if implementsAny(tf.Key(), allMethodTypes...) {
serr = orErrorf(serr, t, "inlined map field %s of type %s must have a string key that does not implement marshal or unmarshal methods", sf.Name, tf)
continue // invalid inlined field; treat as ignored
}
f.fncs = lookupArshaler(tf.Elem())
default:
serr = orErrorf(serr, t, "inlined Go struct field %s of type %s must be a Go struct, Go map of string key, or jsontext.Value", sf.Name, tf)
continue // invalid inlined field; treat as ignored
}
// Reject multiple inlined fallback fields within the same struct.
if inlinedFallbackIndex >= 0 {
serr = orErrorf(serr, t, "inlined Go struct fields %s and %s cannot both be a Go map or jsontext.Value", t.Field(inlinedFallbackIndex).Name, sf.Name)
// Still append f to inlinedFallbacks as there is still a
// check for a dominant inlined fallback before returning.
}
inlinedFallbackIndex = i
inlinedFallbacks = append(inlinedFallbacks, f)
} else {
// Handle normal Go struct field that serializes to/from
// a single JSON object member.
// Unexported fields cannot be serialized except for
// embedded fields of a struct type,
// which might promote exported fields of their own.
if !sf.IsExported() {
tf := indirectType(f.typ)
if !(sf.Anonymous && tf.Kind() == reflect.Struct) {
serr = orErrorf(serr, t, "Go struct field %s is not exported", sf.Name)
continue
}
// Unfortunately, methods on the unexported field
// still cannot be called.
if implementsAny(tf, allMethodTypes...) ||
(f.omitzero && implementsAny(tf, isZeroerType)) {
serr = orErrorf(serr, t, "Go struct field %s is not exported for method calls", sf.Name)
continue
}
}
// Provide a function that uses a type's IsZero method.
switch {
case sf.Type.Kind() == reflect.Interface && sf.Type.Implements(isZeroerType):
f.isZero = func(va addressableValue) bool {
// Avoid panics calling IsZero on a nil interface or
// non-nil interface with nil pointer.
return va.IsNil() || (va.Elem().Kind() == reflect.Pointer && va.Elem().IsNil()) || va.Interface().(isZeroer).IsZero()
}
case sf.Type.Kind() == reflect.Pointer && sf.Type.Implements(isZeroerType):
f.isZero = func(va addressableValue) bool {
// Avoid panics calling IsZero on nil pointer.
return va.IsNil() || va.Interface().(isZeroer).IsZero()
}
case sf.Type.Implements(isZeroerType):
f.isZero = func(va addressableValue) bool { return va.Interface().(isZeroer).IsZero() }
case reflect.PointerTo(sf.Type).Implements(isZeroerType):
f.isZero = func(va addressableValue) bool { return va.Addr().Interface().(isZeroer).IsZero() }
}
// Provide a function that can determine whether the value would
// serialize as an empty JSON value.
switch sf.Type.Kind() {
case reflect.String, reflect.Map, reflect.Array, reflect.Slice:
f.isEmpty = func(va addressableValue) bool { return va.Len() == 0 }
case reflect.Pointer, reflect.Interface:
f.isEmpty = func(va addressableValue) bool { return va.IsNil() }
}
// Reject multiple fields with same name within the same struct.
if j, ok := namesIndex[f.name]; ok {
serr = orErrorf(serr, t, "Go struct fields %s and %s conflict over JSON object name %q", t.Field(j).Name, sf.Name, f.name)
// Still append f to allFields as there is still a
// check for a dominant field before returning.
}
namesIndex[f.name] = i
f.id = len(allFields)
f.fncs = lookupArshaler(sf.Type)
allFields = append(allFields, f)
}
}
// NOTE: New users to the json package are occasionally surprised that
// unexported fields are ignored. This occurs by necessity due to our
// inability to directly introspect such fields with Go reflection
// without the use of unsafe.
//
// To reduce friction here, refuse to serialize any Go struct that
// has no JSON serializable fields, has at least one Go struct field,
// and does not have any `json` tags present. For example,
// errors returned by errors.New would fail to serialize.
isEmptyStruct := t.NumField() == 0
if !isEmptyStruct && !hasAnyJSONTag && !hasAnyJSONField {
serr = cmp.Or(serr, &SemanticError{GoType: t, Err: errNoExportedFields})
}
}
// Sort the fields by exact name (breaking ties by depth and
// then by presence of an explicitly provided JSON name).
// Select the dominant field from each set of fields with the same name.
// If multiple fields have the same name, then the dominant field
// is the one that exists alone at the shallowest depth,
// or the one that is uniquely tagged with a JSON name.
// Otherwise, no dominant field exists for the set.
flattened := allFields[:0]
slices.SortStableFunc(allFields, func(x, y structField) int {
return cmp.Or(
strings.Compare(x.name, y.name),
cmp.Compare(len(x.index), len(y.index)),
boolsCompare(!x.hasName, !y.hasName))
})
for len(allFields) > 0 {
n := 1 // number of fields with the same exact name
for n < len(allFields) && allFields[n-1].name == allFields[n].name {
n++
}
if n == 1 || len(allFields[0].index) != len(allFields[1].index) || allFields[0].hasName != allFields[1].hasName {
flattened = append(flattened, allFields[0]) // only keep field if there is a dominant field
}
allFields = allFields[n:]
}
// Sort the fields according to a breadth-first ordering
// so that we can re-number IDs with the smallest possible values.
// This optimizes use of uintSet such that it fits in the 64-entry bit set.
slices.SortFunc(flattened, func(x, y structField) int {
return cmp.Compare(x.id, y.id)
})
for i := range flattened {
flattened[i].id = i
}
// Sort the fields according to a depth-first ordering
// as the typical order that fields are marshaled.
slices.SortFunc(flattened, func(x, y structField) int {
return slices.Compare(x.index, y.index)
})
// Compute the mapping of fields in the byActualName map.
// Pre-fold all names so that we can lookup folded names quickly.
fs = structFields{
flattened: flattened,
byActualName: make(map[string]*structField, len(flattened)),
byFoldedName: make(map[string][]*structField, len(flattened)),
}
for i, f := range fs.flattened {
foldedName := string(foldName([]byte(f.name)))
fs.byActualName[f.name] = &fs.flattened[i]
fs.byFoldedName[foldedName] = append(fs.byFoldedName[foldedName], &fs.flattened[i])
}
for foldedName, fields := range fs.byFoldedName {
if len(fields) > 1 {
// The precedence order for conflicting ignoreCase names
// is by breadth-first order, rather than depth-first order.
slices.SortFunc(fields, func(x, y *structField) int {
return cmp.Compare(x.id, y.id)
})
fs.byFoldedName[foldedName] = fields
}
}
if n := len(inlinedFallbacks); n == 1 || (n > 1 && len(inlinedFallbacks[0].index) != len(inlinedFallbacks[1].index)) {
fs.inlinedFallback = &inlinedFallbacks[0] // dominant inlined fallback field
}
fs.reindex()
return fs, serr
}
// indirectType unwraps one level of pointer indirection
// similar to how Go only allows embedding either T or *T,
// but not **T or P (which is a named pointer).
func indirectType(t reflect.Type) reflect.Type {
if t.Kind() == reflect.Pointer && t.Name() == "" {
t = t.Elem()
}
return t
}
// matchFoldedName matches a case-insensitive name depending on the options.
// It assumes that foldName(f.name) == foldName(name).
//
// Case-insensitive matching is used if the `case:ignore` tag option is specified
// or the MatchCaseInsensitiveNames call option is specified
// (and the `case:strict` tag option is not specified).
// Functionally, the `case:ignore` and `case:strict` tag options take precedence.
//
// The v1 definition of case-insensitivity operated under strings.EqualFold
// and would strictly compare dashes and underscores,
// while the v2 definition would ignore the presence of dashes and underscores.
// Thus, if the MatchCaseSensitiveDelimiter call option is specified,
// the match is further restricted to using strings.EqualFold.
func (f *structField) matchFoldedName(name []byte, flags *jsonflags.Flags) bool {
if f.casing == caseIgnore || (flags.Get(jsonflags.MatchCaseInsensitiveNames) && f.casing != caseStrict) {
if !flags.Get(jsonflags.MatchCaseSensitiveDelimiter) || strings.EqualFold(string(name), f.name) {
return true
}
}
return false
}
const (
caseIgnore = 1
caseStrict = 2
)
type fieldOptions struct {
name string
quotedName string // quoted name per RFC 8785, section 3.2.2.2.
hasName bool
nameNeedEscape bool
casing int8 // either 0, caseIgnore, or caseStrict
inline bool
unknown bool
omitzero bool
omitempty bool
string bool
format string
}
// parseFieldOptions parses the `json` tag in a Go struct field as
// a structured set of options configuring parameters such as
// the JSON member name and other features.
func parseFieldOptions(sf reflect.StructField) (out fieldOptions, ignored bool, err error) {
tag, hasTag := sf.Tag.Lookup("json")
// Check whether this field is explicitly ignored.
if tag == "-" {
return fieldOptions{}, true, nil
}
// Check whether this field is unexported and not embedded,
// which Go reflection cannot mutate for the sake of serialization.
//
// An embedded field of an unexported type is still capable of
// forwarding exported fields, which may be JSON serialized.
// This technically operates on the edge of what is permissible by
// the Go language, but the most recent decision is to permit this.
//
// See https://go.dev/issue/24153 and https://go.dev/issue/32772.
if !sf.IsExported() && !sf.Anonymous {
// Tag options specified on an unexported field suggests user error.
if hasTag {
err = cmp.Or(err, fmt.Errorf("unexported Go struct field %s cannot have non-ignored `json:%q` tag", sf.Name, tag))
}
return fieldOptions{}, true, err
}
// Determine the JSON member name for this Go field. A user-specified name
// may be provided as either an identifier or a single-quoted string.
// The single-quoted string allows arbitrary characters in the name.
// See https://go.dev/issue/2718 and https://go.dev/issue/3546.
out.name = sf.Name // always starts with an uppercase character
if len(tag) > 0 && !strings.HasPrefix(tag, ",") {
// For better compatibility with v1, accept almost any unescaped name.
n := len(tag) - len(strings.TrimLeftFunc(tag, func(r rune) bool {
return !strings.ContainsRune(",\\'\"`", r) // reserve comma, backslash, and quotes
}))
name := tag[:n]
// If the next character is not a comma, then the name is either
// malformed (if n > 0) or a single-quoted name.
// In either case, call consumeTagOption to handle it further.
var err2 error
if !strings.HasPrefix(tag[n:], ",") && len(name) != len(tag) {
name, n, err2 = consumeTagOption(tag)
if err2 != nil {
err = cmp.Or(err, fmt.Errorf("Go struct field %s has malformed `json` tag: %v", sf.Name, err2))
}
}
if !utf8.ValidString(name) {
err = cmp.Or(err, fmt.Errorf("Go struct field %s has JSON object name %q with invalid UTF-8", sf.Name, name))
name = string([]rune(name)) // replace invalid UTF-8 with utf8.RuneError
}
if err2 == nil {
out.hasName = true
out.name = name
}
tag = tag[n:]
}
b, _ := jsonwire.AppendQuote(nil, out.name, &jsonflags.Flags{})
out.quotedName = string(b)
out.nameNeedEscape = jsonwire.NeedEscape(out.name)
// Handle any additional tag options (if any).
var wasFormat bool
seenOpts := make(map[string]bool)
for len(tag) > 0 {
// Consume comma delimiter.
if tag[0] != ',' {
err = cmp.Or(err, fmt.Errorf("Go struct field %s has malformed `json` tag: invalid character %q before next option (expecting ',')", sf.Name, tag[0]))
} else {
tag = tag[len(","):]
if len(tag) == 0 {
err = cmp.Or(err, fmt.Errorf("Go struct field %s has malformed `json` tag: invalid trailing ',' character", sf.Name))
break
}
}
// Consume and process the tag option.
opt, n, err2 := consumeTagOption(tag)
if err2 != nil {
err = cmp.Or(err, fmt.Errorf("Go struct field %s has malformed `json` tag: %v", sf.Name, err2))
}
rawOpt := tag[:n]
tag = tag[n:]
switch {
case wasFormat:
err = cmp.Or(err, fmt.Errorf("Go struct field %s has `format` tag option that was not specified last", sf.Name))
case strings.HasPrefix(rawOpt, "'") && strings.TrimFunc(opt, isLetterOrDigit) == "":
err = cmp.Or(err, fmt.Errorf("Go struct field %s has unnecessarily quoted appearance of `%s` tag option; specify `%s` instead", sf.Name, rawOpt, opt))
}
switch opt {
case "case":
if !strings.HasPrefix(tag, ":") {
err = cmp.Or(err, fmt.Errorf("Go struct field %s is missing value for `case` tag option; specify `case:ignore` or `case:strict` instead", sf.Name))
break
}
tag = tag[len(":"):]
opt, n, err2 := consumeTagOption(tag)
if err2 != nil {
err = cmp.Or(err, fmt.Errorf("Go struct field %s has malformed value for `case` tag option: %v", sf.Name, err2))
break
}
rawOpt := tag[:n]
tag = tag[n:]
if strings.HasPrefix(rawOpt, "'") {
err = cmp.Or(err, fmt.Errorf("Go struct field %s has unnecessarily quoted appearance of `case:%s` tag option; specify `case:%s` instead", sf.Name, rawOpt, opt))
}
switch opt {
case "ignore":
out.casing |= caseIgnore
case "strict":
out.casing |= caseStrict
default:
err = cmp.Or(err, fmt.Errorf("Go struct field %s has unknown `case:%s` tag value", sf.Name, rawOpt))
}
case "inline":
out.inline = true
case "unknown":
out.unknown = true
case "omitzero":
out.omitzero = true
case "omitempty":
out.omitempty = true
case "string":
out.string = true
case "format":
if !strings.HasPrefix(tag, ":") {
err = cmp.Or(err, fmt.Errorf("Go struct field %s is missing value for `format` tag option", sf.Name))
break
}
tag = tag[len(":"):]
opt, n, err2 := consumeTagOption(tag)
if err2 != nil {
err = cmp.Or(err, fmt.Errorf("Go struct field %s has malformed value for `format` tag option: %v", sf.Name, err2))
break
}
tag = tag[n:]
out.format = opt
wasFormat = true
default:
// Reject keys that resemble one of the supported options.
// This catches invalid mutants such as "omitEmpty" or "omit_empty".
normOpt := strings.ReplaceAll(strings.ToLower(opt), "_", "")
switch normOpt {
case "case", "inline", "unknown", "omitzero", "omitempty", "string", "format":
err = cmp.Or(err, fmt.Errorf("Go struct field %s has invalid appearance of `%s` tag option; specify `%s` instead", sf.Name, opt, normOpt))
}
// NOTE: Everything else is ignored. This does not mean it is
// forward compatible to insert arbitrary tag options since
// a future version of this package may understand that tag.
}
// Reject duplicates.
switch {
case out.casing == caseIgnore|caseStrict:
err = cmp.Or(err, fmt.Errorf("Go struct field %s cannot have both `case:ignore` and `case:strict` tag options", sf.Name))
case seenOpts[opt]:
err = cmp.Or(err, fmt.Errorf("Go struct field %s has duplicate appearance of `%s` tag option", sf.Name, rawOpt))
}
seenOpts[opt] = true
}
return out, false, err
}
// consumeTagOption consumes the next option,
// which is either a Go identifier or a single-quoted string.
// If the next option is invalid, it returns all of in until the next comma,
// and reports an error.
func consumeTagOption(in string) (string, int, error) {
// For legacy compatibility with v1, assume options are comma-separated.
i := strings.IndexByte(in, ',')
if i < 0 {
i = len(in)
}
switch r, _ := utf8.DecodeRuneInString(in); {
// Option as a Go identifier.
case r == '_' || unicode.IsLetter(r):
n := len(in) - len(strings.TrimLeftFunc(in, isLetterOrDigit))
return in[:n], n, nil
// Option as a single-quoted string.
case r == '\'':
// The grammar is nearly identical to a double-quoted Go string literal,
// but uses single quotes as the terminators. The reason for a custom
// grammar is because both backtick and double quotes cannot be used
// verbatim in a struct tag.
//
// Convert a single-quoted string to a double-quote string and rely on
// strconv.Unquote to handle the rest.
var inEscape bool
b := []byte{'"'}
n := len(`'`)
for len(in) > n {
r, rn := utf8.DecodeRuneInString(in[n:])
switch {
case inEscape:
if r == '\'' {
b = b[:len(b)-1] // remove escape character: `\'` => `'`
}
inEscape = false
case r == '\\':
inEscape = true
case r == '"':
b = append(b, '\\') // insert escape character: `"` => `\"`
case r == '\'':
b = append(b, '"')
n += len(`'`)
out, err := strconv.Unquote(string(b))
if err != nil {
return in[:i], i, fmt.Errorf("invalid single-quoted string: %s", in[:n])
}
return out, n, nil
}
b = append(b, in[n:][:rn]...)
n += rn
}
if n > 10 {
n = 10 // limit the amount of context printed in the error
}
return in[:i], i, fmt.Errorf("single-quoted string not terminated: %s...", in[:n])
case len(in) == 0:
return in[:i], i, io.ErrUnexpectedEOF
default:
return in[:i], i, fmt.Errorf("invalid character %q at start of option (expecting Unicode letter or single quote)", r)
}
}
func isLetterOrDigit(r rune) bool {
return r == '_' || unicode.IsLetter(r) || unicode.IsNumber(r)
}
// boolsCompare compares x and y, ordering false before true.
func boolsCompare(x, y bool) int {
switch {
case !x && y:
return -1
default:
return 0
case x && !y:
return +1
}
}