// Copyright 2020 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package jsontext import ( "bytes" "errors" "io" "slices" "sync" "github.com/go-json-experiment/json/internal/jsonflags" "github.com/go-json-experiment/json/internal/jsonwire" ) // NOTE: Value is analogous to v1 json.RawMessage. // AppendFormat formats the JSON value in src and appends it to dst // according to the specified options. // See [Value.Format] for more details about the formatting behavior. // // The dst and src may overlap. // If an error is reported, then the entirety of src is appended to dst. func AppendFormat(dst, src []byte, opts ...Options) ([]byte, error) { e := getBufferedEncoder(opts...) defer putBufferedEncoder(e) e.s.Flags.Set(jsonflags.OmitTopLevelNewline | 1) if err := e.s.WriteValue(src); err != nil { return append(dst, src...), err } return append(dst, e.s.Buf...), nil } // Value represents a single raw JSON value, which may be one of the following: // - a JSON literal (i.e., null, true, or false) // - a JSON string (e.g., "hello, world!") // - a JSON number (e.g., 123.456) // - an entire JSON object (e.g., {"fizz":"buzz"} ) // - an entire JSON array (e.g., [1,2,3] ) // // Value can represent entire array or object values, while [Token] cannot. // Value may contain leading and/or trailing whitespace. type Value []byte // Clone returns a copy of v. func (v Value) Clone() Value { return bytes.Clone(v) } // String returns the string formatting of v. func (v Value) String() string { if v == nil { return "null" } return string(v) } // IsValid reports whether the raw JSON value is syntactically valid // according to the specified options. // // By default (if no options are specified), it validates according to RFC 7493. // It verifies whether the input is properly encoded as UTF-8, // that escape sequences within strings decode to valid Unicode codepoints, and // that all names in each object are unique. // It does not verify whether numbers are representable within the limits // of any common numeric type (e.g., float64, int64, or uint64). // // Relevant options include: // - [AllowDuplicateNames] // - [AllowInvalidUTF8] // // All other options are ignored. func (v Value) IsValid(opts ...Options) bool { // TODO: Document support for [WithByteLimit] and [WithDepthLimit]. d := getBufferedDecoder(v, opts...) defer putBufferedDecoder(d) _, errVal := d.ReadValue() _, errEOF := d.ReadToken() return errVal == nil && errEOF == io.EOF } // Format formats the raw JSON value in place. // // By default (if no options are specified), it validates according to RFC 7493 // and produces the minimal JSON representation, where // all whitespace is elided and JSON strings use the shortest encoding. // // Relevant options include: // - [AllowDuplicateNames] // - [AllowInvalidUTF8] // - [EscapeForHTML] // - [EscapeForJS] // - [PreserveRawStrings] // - [CanonicalizeRawInts] // - [CanonicalizeRawFloats] // - [ReorderRawObjects] // - [SpaceAfterColon] // - [SpaceAfterComma] // - [Multiline] // - [WithIndent] // - [WithIndentPrefix] // // All other options are ignored. // // It is guaranteed to succeed if the value is valid according to the same options. // If the value is already formatted, then the buffer is not mutated. func (v *Value) Format(opts ...Options) error { // TODO: Document support for [WithByteLimit] and [WithDepthLimit]. return v.format(opts, nil) } // format accepts two []Options to avoid the allocation appending them together. // It is equivalent to v.Format(append(opts1, opts2...)...). func (v *Value) format(opts1, opts2 []Options) error { e := getBufferedEncoder(opts1...) defer putBufferedEncoder(e) e.s.Join(opts2...) e.s.Flags.Set(jsonflags.OmitTopLevelNewline | 1) if err := e.s.WriteValue(*v); err != nil { return err } if !bytes.Equal(*v, e.s.Buf) { *v = append((*v)[:0], e.s.Buf...) } return nil } // Compact removes all whitespace from the raw JSON value. // // It does not reformat JSON strings or numbers to use any other representation. // To maximize the set of JSON values that can be formatted, // this permits values with duplicate names and invalid UTF-8. // // Compact is equivalent to calling [Value.Format] with the following options: // - [AllowDuplicateNames](true) // - [AllowInvalidUTF8](true) // - [PreserveRawStrings](true) // // Any options specified by the caller are applied after the initial set // and may deliberately override prior options. func (v *Value) Compact(opts ...Options) error { return v.format([]Options{ AllowDuplicateNames(true), AllowInvalidUTF8(true), PreserveRawStrings(true), }, opts) } // Indent reformats the whitespace in the raw JSON value so that each element // in a JSON object or array begins on a indented line according to the nesting. // // It does not reformat JSON strings or numbers to use any other representation. // To maximize the set of JSON values that can be formatted, // this permits values with duplicate names and invalid UTF-8. // // Indent is equivalent to calling [Value.Format] with the following options: // - [AllowDuplicateNames](true) // - [AllowInvalidUTF8](true) // - [PreserveRawStrings](true) // - [Multiline](true) // // Any options specified by the caller are applied after the initial set // and may deliberately override prior options. func (v *Value) Indent(opts ...Options) error { return v.format([]Options{ AllowDuplicateNames(true), AllowInvalidUTF8(true), PreserveRawStrings(true), Multiline(true), }, opts) } // Canonicalize canonicalizes the raw JSON value according to the // JSON Canonicalization Scheme (JCS) as defined by RFC 8785 // where it produces a stable representation of a JSON value. // // JSON strings are formatted to use their minimal representation, // JSON numbers are formatted as double precision numbers according // to some stable serialization algorithm. // JSON object members are sorted in ascending order by name. // All whitespace is removed. // // The output stability is dependent on the stability of the application data // (see RFC 8785, Appendix E). It cannot produce stable output from // fundamentally unstable input. For example, if the JSON value // contains ephemeral data (e.g., a frequently changing timestamp), // then the value is still unstable regardless of whether this is called. // // Canonicalize is equivalent to calling [Value.Format] with the following options: // - [CanonicalizeRawInts](true) // - [CanonicalizeRawFloats](true) // - [ReorderRawObjects](true) // // Any options specified by the caller are applied after the initial set // and may deliberately override prior options. // // Note that JCS treats all JSON numbers as IEEE 754 double precision numbers. // Any numbers with precision beyond what is representable by that form // will lose their precision when canonicalized. For example, integer values // beyond ±2⁵³ will lose their precision. To preserve the original representation // of JSON integers, additionally set [CanonicalizeRawInts] to false: // // v.Canonicalize(jsontext.CanonicalizeRawInts(false)) func (v *Value) Canonicalize(opts ...Options) error { return v.format([]Options{ CanonicalizeRawInts(true), CanonicalizeRawFloats(true), ReorderRawObjects(true), }, opts) } // MarshalJSON returns v as the JSON encoding of v. // It returns the stored value as the raw JSON output without any validation. // If v is nil, then this returns a JSON null. func (v Value) MarshalJSON() ([]byte, error) { // NOTE: This matches the behavior of v1 json.RawMessage.MarshalJSON. if v == nil { return []byte("null"), nil } return v, nil } // UnmarshalJSON sets v as the JSON encoding of b. // It stores a copy of the provided raw JSON input without any validation. func (v *Value) UnmarshalJSON(b []byte) error { // NOTE: This matches the behavior of v1 json.RawMessage.UnmarshalJSON. if v == nil { return errors.New("jsontext.Value: UnmarshalJSON on nil pointer") } *v = append((*v)[:0], b...) return nil } // Kind returns the starting token kind. // For a valid value, this will never include '}' or ']'. func (v Value) Kind() Kind { if v := v[jsonwire.ConsumeWhitespace(v):]; len(v) > 0 { return Kind(v[0]).normalize() } return invalidKind } const commaAndWhitespace = ", \n\r\t" type objectMember struct { // name is the unquoted name. name []byte // e.g., "name" // buffer is the entirety of the raw JSON object member // starting from right after the previous member (or opening '{') // until right after the member value. buffer []byte // e.g., `, \n\r\t"name": "value"` } func (x objectMember) Compare(y objectMember) int { if c := jsonwire.CompareUTF16(x.name, y.name); c != 0 { return c } // With [AllowDuplicateNames] or [AllowInvalidUTF8], // names could be identical, so also sort using the member value. return jsonwire.CompareUTF16( bytes.TrimLeft(x.buffer, commaAndWhitespace), bytes.TrimLeft(y.buffer, commaAndWhitespace)) } var objectMemberPool = sync.Pool{New: func() any { return new([]objectMember) }} func getObjectMembers() *[]objectMember { ns := objectMemberPool.Get().(*[]objectMember) *ns = (*ns)[:0] return ns } func putObjectMembers(ns *[]objectMember) { if cap(*ns) < 1<<10 { clear(*ns) // avoid pinning name and buffer objectMemberPool.Put(ns) } } // mustReorderObjects reorders in-place all object members in a JSON value, // which must be valid otherwise it panics. func mustReorderObjects(b []byte) { // Obtain a buffered encoder just to use its internal buffer as // a scratch buffer for reordering object members. e2 := getBufferedEncoder() defer putBufferedEncoder(e2) // Disable unnecessary checks to syntactically parse the JSON value. d := getBufferedDecoder(b) defer putBufferedDecoder(d) d.s.Flags.Set(jsonflags.AllowDuplicateNames | jsonflags.AllowInvalidUTF8 | 1) mustReorderObjectsFromDecoder(d, &e2.s.Buf) // per RFC 8785, section 3.2.3 } // mustReorderObjectsFromDecoder recursively reorders all object members in place // according to the ordering specified in RFC 8785, section 3.2.3. // // Pre-conditions: // - The value is valid (i.e., no decoder errors should ever occur). // - Initial call is provided a Decoder reading from the start of v. // // Post-conditions: // - Exactly one JSON value is read from the Decoder. // - All fully-parsed JSON objects are reordered by directly moving // the members in the value buffer. // // The runtime is approximately O(n·log(n)) + O(m·log(m)), // where n is len(v) and m is the total number of object members. func mustReorderObjectsFromDecoder(d *Decoder, scratch *[]byte) { switch tok, err := d.ReadToken(); tok.Kind() { case '{': // Iterate and collect the name and offsets for every object member. members := getObjectMembers() defer putObjectMembers(members) var prevMember objectMember isSorted := true beforeBody := d.InputOffset() // offset after '{' for d.PeekKind() != '}' { beforeName := d.InputOffset() var flags jsonwire.ValueFlags name, _ := d.s.ReadValue(&flags) name = jsonwire.UnquoteMayCopy(name, flags.IsVerbatim()) mustReorderObjectsFromDecoder(d, scratch) afterValue := d.InputOffset() currMember := objectMember{name, d.s.buf[beforeName:afterValue]} if isSorted && len(*members) > 0 { isSorted = objectMember.Compare(prevMember, currMember) < 0 } *members = append(*members, currMember) prevMember = currMember } afterBody := d.InputOffset() // offset before '}' d.ReadToken() // Sort the members; return early if it's already sorted. if isSorted { return } firstBufferBeforeSorting := (*members)[0].buffer slices.SortFunc(*members, objectMember.Compare) firstBufferAfterSorting := (*members)[0].buffer // Append the reordered members to a new buffer, // then copy the reordered members back over the original members. // Avoid swapping in place since each member may be a different size // where moving a member over a smaller member may corrupt the data // for subsequent members before they have been moved. // // The following invariant must hold: // sum([m.after-m.before for m in members]) == afterBody-beforeBody commaAndWhitespacePrefix := func(b []byte) []byte { return b[:len(b)-len(bytes.TrimLeft(b, commaAndWhitespace))] } sorted := (*scratch)[:0] for i, member := range *members { switch { case i == 0 && &member.buffer[0] != &firstBufferBeforeSorting[0]: // First member after sorting is not the first member before sorting, // so use the prefix of the first member before sorting. sorted = append(sorted, commaAndWhitespacePrefix(firstBufferBeforeSorting)...) sorted = append(sorted, bytes.TrimLeft(member.buffer, commaAndWhitespace)...) case i != 0 && &member.buffer[0] == &firstBufferBeforeSorting[0]: // Later member after sorting is the first member before sorting, // so use the prefix of the first member after sorting. sorted = append(sorted, commaAndWhitespacePrefix(firstBufferAfterSorting)...) sorted = append(sorted, bytes.TrimLeft(member.buffer, commaAndWhitespace)...) default: sorted = append(sorted, member.buffer...) } } if int(afterBody-beforeBody) != len(sorted) { panic("BUG: length invariant violated") } copy(d.s.buf[beforeBody:afterBody], sorted) // Update scratch buffer to the largest amount ever used. if len(sorted) > len(*scratch) { *scratch = sorted } case '[': for d.PeekKind() != ']' { mustReorderObjectsFromDecoder(d, scratch) } d.ReadToken() default: if err != nil { panic("BUG: " + err.Error()) } } }