diff --git a/arshal_any.go b/arshal_any.go index 5ff4bf9..d1171a1 100644 --- a/arshal_any.go +++ b/arshal_any.go @@ -7,7 +7,6 @@ package json import ( "cmp" "reflect" - "strconv" "github.com/go-json-experiment/json/internal" "github.com/go-json-experiment/json/internal/jsonflags" @@ -83,9 +82,9 @@ func unmarshalValueAny(dec *jsontext.Decoder, uo *jsonopts.Struct) (any, error) if uo.Flags.Get(jsonflags.UnmarshalAnyWithRawNumber) { return internal.RawNumberOf(val), nil } - fv, ok := jsonwire.ParseFloat(val, 64) - if !ok { - return fv, newUnmarshalErrorAfterWithValue(dec, float64Type, strconv.ErrRange) + fv, err := jsonwire.ParseFloat(val, 64) + if err != nil { + return nil, newUnmarshalErrorAfterWithValue(dec, float64Type, err) } return fv, nil default: @@ -196,13 +195,13 @@ func unmarshalObjectAny(dec *jsontext.Decoder, uo *jsonopts.Struct) (map[string] } val, err := unmarshalValueAny(dec, uo) - obj[name] = val if err != nil { if isFatalError(err, uo.Flags) { return obj, err } errUnmarshal = cmp.Or(err, errUnmarshal) } + obj[name] = val } if _, err := dec.ReadToken(); err != nil { return obj, err @@ -266,13 +265,13 @@ func unmarshalArrayAny(dec *jsontext.Decoder, uo *jsonopts.Struct) ([]any, error var errUnmarshal error for dec.PeekKind() != ']' { val, err := unmarshalValueAny(dec, uo) - arr = append(arr, val) if err != nil { if isFatalError(err, uo.Flags) { return arr, err } errUnmarshal = cmp.Or(errUnmarshal, err) } + arr = append(arr, val) } if _, err := dec.ReadToken(); err != nil { return arr, err diff --git a/arshal_default.go b/arshal_default.go index a6777ff..1ba09dd 100644 --- a/arshal_default.go +++ b/arshal_default.go @@ -43,6 +43,10 @@ var ( bytesType = reflect.TypeFor[[]byte]() emptyStructType = reflect.TypeFor[struct{}]() + + nanString = jsontext.String("NaN") + pinfString = jsontext.String("Infinity") + ninfString = jsontext.String("-Infinity") ) const startDetectingCyclesAfter = 1000 @@ -479,28 +483,11 @@ func makeIntArshaler(t reflect.Type) *arshaler { if stringify && k == '0' { break } - var negOffset int - neg := len(val) > 0 && val[0] == '-' - if neg { - negOffset = 1 - } - n, ok := jsonwire.ParseUint(val[negOffset:]) - maxInt := uint64(1) << (bits - 1) - overflow := (neg && n > maxInt) || (!neg && n > maxInt-1) - if !ok { - if n != math.MaxUint64 { - return newUnmarshalErrorAfterWithValue(dec, t, strconv.ErrSyntax) - } - overflow = true - } - if overflow { - return newUnmarshalErrorAfterWithValue(dec, t, strconv.ErrRange) - } - if neg { - va.SetInt(int64(-n)) - } else { - va.SetInt(int64(+n)) + n, err := jsonwire.ParseInt(val, bits) + if err != nil { + return newUnmarshalErrorAfterWithValue(dec, t, err) } + va.SetInt(n) return nil } return newUnmarshalErrorAfter(dec, t, nil) @@ -566,17 +553,9 @@ func makeUintArshaler(t reflect.Type) *arshaler { if stringify && k == '0' { break } - n, ok := jsonwire.ParseUint(val) - maxUint := uint64(1) << bits - overflow := n > maxUint-1 - if !ok { - if n != math.MaxUint64 { - return newUnmarshalErrorAfterWithValue(dec, t, strconv.ErrSyntax) - } - overflow = true - } - if overflow { - return newUnmarshalErrorAfterWithValue(dec, t, strconv.ErrRange) + n, err := jsonwire.ParseUint(val, bits) + if err != nil { + return newUnmarshalErrorAfterWithValue(dec, t, err) } va.SetUint(n) return nil @@ -606,7 +585,18 @@ func makeFloatArshaler(t reflect.Type) *arshaler { err := fmt.Errorf("unsupported value: %v", fv) return newMarshalErrorBefore(enc, t, err) } - return enc.WriteToken(jsontext.Float(fv)) + var token jsontext.Token + switch { + case math.IsInf(fv, 1): + token = pinfString + case math.IsInf(fv, -1): + token = ninfString + case math.IsNaN(fv): + token = nanString + default: + panic("unreachable") + } + return enc.WriteToken(token) } // Optimize for marshaling without preceding whitespace or string escaping. @@ -679,11 +669,11 @@ func makeFloatArshaler(t reflect.Type) *arshaler { if stringify && k == '0' { break } - fv, ok := jsonwire.ParseFloat(val, bits) - va.SetFloat(fv) - if !ok { - return newUnmarshalErrorAfterWithValue(dec, t, strconv.ErrRange) + fv, err := jsonwire.ParseFloat(val, bits) + if err != nil { + return newUnmarshalErrorAfterWithValue(dec, t, err) } + va.SetFloat(fv) return nil } return newUnmarshalErrorAfter(dec, t, nil) diff --git a/arshal_test.go b/arshal_test.go index 8379e22..1a4fbe3 100644 --- a/arshal_test.go +++ b/arshal_test.go @@ -5434,7 +5434,7 @@ func TestUnmarshal(t *testing.T) { name: jsontest.Name("Floats/Float32/Overflow"), inBuf: `-1e1000`, inVal: addr(float32(32.32)), - want: addr(float32(-math.MaxFloat32)), + want: addr(float32(32.32)), wantErr: EU(strconv.ErrRange).withVal(`-1e1000`).withType('0', T[float32]()), }, { name: jsontest.Name("Floats/Float64/Pi"), @@ -5450,13 +5450,13 @@ func TestUnmarshal(t *testing.T) { name: jsontest.Name("Floats/Float64/Overflow"), inBuf: `-1e1000`, inVal: addr(float64(64.64)), - want: addr(float64(-math.MaxFloat64)), + want: addr(float64(64.64)), wantErr: EU(strconv.ErrRange).withVal(`-1e1000`).withType('0', T[float64]()), }, { name: jsontest.Name("Floats/Any/Overflow"), inBuf: `1e1000`, inVal: new(any), - want: addr(any(float64(math.MaxFloat64))), + want: new(any), wantErr: EU(strconv.ErrRange).withVal(`1e1000`).withType('0', T[float64]()), }, { name: jsontest.Name("Floats/Named"), diff --git a/arshal_time.go b/arshal_time.go index 9cb8e80..6dce5ca 100644 --- a/arshal_time.go +++ b/arshal_time.go @@ -419,18 +419,19 @@ func appendDurationBase10(b []byte, d time.Duration, pow10 uint64) []byte { func parseDurationBase10(b []byte, pow10 uint64) (time.Duration, error) { suffix, neg := consumeSign(b) // consume sign wholeBytes, fracBytes := bytesCutByte(suffix, '.', true) // consume whole and frac fields - whole, okWhole := jsonwire.ParseUint(wholeBytes) // parse whole field; may overflow + whole, err := jsonwire.ParseUint(wholeBytes, 64) // parse whole field; may overflow frac, okFrac := parseFracBase10(fracBytes, pow10) // parse frac field hi, lo := bits.Mul64(whole, uint64(pow10)) // overflow if hi > 0 sum, co := bits.Add64(lo, uint64(frac), 0) // overflow if co > 0 switch d := mayApplyDurationSign(sum, neg); { // overflow if neg != (d < 0) - case (!okWhole && whole != math.MaxUint64) || !okFrac: - return 0, fmt.Errorf("invalid duration %q: %w", b, strconv.ErrSyntax) - case !okWhole || hi > 0 || co > 0 || neg != (d < 0): - return 0, fmt.Errorf("invalid duration %q: %w", b, strconv.ErrRange) - default: + case !okFrac: + err = strconv.ErrSyntax + case hi > 0 || co > 0 || neg != (d < 0): + err = strconv.ErrRange + case err == nil: return d, nil } + return 0, fmt.Errorf("invalid duration %q: %w", b, err) } // appendDurationBase60 appends d formatted with H:MM:SS.SSS notation. @@ -455,7 +456,7 @@ func parseDurationBase60(b []byte) (time.Duration, error) { hourBytes, suffix := bytesCutByte(suffix, ':', false) // consume hour field minBytes, suffix := bytesCutByte(suffix, ':', false) // consume min field secBytes, nsecBytes := bytesCutByte(suffix, '.', true) // consume sec and nsec fields - hour, okHour := jsonwire.ParseUint(hourBytes) // parse hour field; may overflow + hour, err := jsonwire.ParseUint(hourBytes, 64) // parse hour field; may overflow min := parseDec2(minBytes) // parse min field sec := parseDec2(secBytes) // parse sec field nsec, okNsec := parseFracBase10(nsecBytes, 1e9) // parse nsec field @@ -463,13 +464,14 @@ func parseDurationBase60(b []byte) (time.Duration, error) { hi, lo := bits.Mul64(hour, 60*60*1e9) // overflow if hi > 0 sum, co := bits.Add64(lo, n, 0) // overflow if co > 0 switch d := mayApplyDurationSign(sum, neg); { // overflow if neg != (d < 0) - case (!okHour && hour != math.MaxUint64) || !checkBase60(minBytes) || !checkBase60(secBytes) || !okNsec: - return 0, fmt.Errorf("invalid duration %q: %w", b, strconv.ErrSyntax) - case !okHour || hi > 0 || co > 0 || neg != (d < 0): - return 0, fmt.Errorf("invalid duration %q: %w", b, strconv.ErrRange) - default: + case !checkBase60(minBytes) || !checkBase60(secBytes) || !okNsec: + err = strconv.ErrSyntax + case hi > 0 || co > 0 || neg != (d < 0): + err = strconv.ErrRange + case err == nil: return d, nil } + return 0, fmt.Errorf("invalid duration %q: %w", b, err) } // mayAppendDurationSign appends a negative sign if n is negative. @@ -517,19 +519,19 @@ func appendTimeUnix(b []byte, t time.Time, pow10 uint64) []byte { func parseTimeUnix(b []byte, pow10 uint64) (time.Time, error) { suffix, neg := consumeSign(b) // consume sign wholeBytes, fracBytes := bytesCutByte(suffix, '.', true) // consume whole and frac fields - whole, okWhole := jsonwire.ParseUint(wholeBytes) // parse whole field; may overflow + whole, err := jsonwire.ParseUint(wholeBytes, 64) // parse whole field; may overflow frac, okFrac := parseFracBase10(fracBytes, 1e9/pow10) // parse frac field var sec, nsec int64 switch { case pow10 == 1e0: // fast case where units is in seconds sec = int64(whole) // check overflow later after negation nsec = int64(frac) // cannot overflow - case okWhole: // intermediate case where units is not seconds, but no overflow + case err == nil: // intermediate case where units is not seconds, but no overflow sec = int64(whole / pow10) // check overflow later after negation nsec = int64((whole%pow10)*(1e9/pow10) + frac) // cannot overflow - case !okWhole && whole == math.MaxUint64: // slow case where units is not seconds and overflow occurred + case err == strconv.ErrRange: // slow case where units is not seconds and overflow occurred width := int(math.Log10(float64(pow10))) // compute len(strconv.Itoa(pow10-1)) - whole, okWhole = jsonwire.ParseUint(wholeBytes[:len(wholeBytes)-width]) // parse the upper whole field + whole, err = jsonwire.ParseUint(wholeBytes[:len(wholeBytes)-width], 64) // parse the upper whole field mid, _ := parsePaddedBase10(wholeBytes[len(wholeBytes)-width:], pow10) // parse the lower whole field sec = int64(whole) // check overflow later after negation nsec = int64(mid*(1e9/pow10) + frac) // cannot overflow @@ -538,13 +540,14 @@ func parseTimeUnix(b []byte, pow10 uint64) (time.Time, error) { sec, nsec = negateSecNano(sec, nsec) } switch t := time.Unix(sec, nsec).UTC(); { - case (!okWhole && whole != math.MaxUint64) || !okFrac: - return time.Time{}, fmt.Errorf("invalid time %q: %w", b, strconv.ErrSyntax) - case !okWhole || neg != (t.Unix() < 0): - return time.Time{}, fmt.Errorf("invalid time %q: %w", b, strconv.ErrRange) - default: + case !okFrac: + err = strconv.ErrSyntax + case neg != (t.Unix() < 0): + err = strconv.ErrRange + case err == nil: return t, nil } + return time.Time{}, fmt.Errorf("invalid time %q: %w", b, err) } // negateSecNano negates a Unix timestamp, where nsec must be within [0, 1e9). diff --git a/bench_test.go b/bench_test.go index db1abbc..0d22031 100644 --- a/bench_test.go +++ b/bench_test.go @@ -407,9 +407,13 @@ func mustDecodeTokens(t testing.TB, data []byte) []jsontext.Token { case '"': tokens = append(tokens, jsontext.String(tok.String())) case '0': - tokens = append(tokens, jsontext.Float(tok.Float())) + v, err := tok.ParseFloat(64) + if err != nil { + t.Fatalf("ParseFloat error: %v", err) + } + tokens = append(tokens, jsontext.Float(v)) default: - tokens = append(tokens, tok.Clone()) + tokens = append(tokens, jsontext.Raw(tok.Clone())) } } return tokens diff --git a/internal/jsonwire/decode.go b/internal/jsonwire/decode.go index 0278771..cd5652b 100644 --- a/internal/jsonwire/decode.go +++ b/internal/jsonwire/decode.go @@ -6,7 +6,6 @@ package jsonwire import ( "io" - "math" "slices" "strconv" "unicode/utf16" @@ -586,42 +585,62 @@ func parseHexUint16[Bytes ~[]byte | ~string](b Bytes) (v uint16, ok bool) { // ParseUint parses b as a decimal unsigned integer according to // a strict subset of the JSON number grammar, returning the value if valid. -// It returns (0, false) if there is a syntax error and -// returns (math.MaxUint64, false) if there is an overflow. -func ParseUint(b []byte) (v uint64, ok bool) { +// It returns (0, strconv.ErrSyntax) if there is a syntax error and +// returns (max, strconv.ErrRange) if there is an overflow. +func ParseUint(b []byte, bits int) (uint64, error) { const unsafeWidth = 20 // len(fmt.Sprint(uint64(math.MaxUint64))) var n int + var v uint64 for ; len(b) > n && ('0' <= b[n] && b[n] <= '9'); n++ { v = 10*v + uint64(b[n]-'0') } + + max := uint64(1)<= unsafeWidth && (b[0] != '1' || v < 1e19 || n > unsafeWidth): - return math.MaxUint64, false + return max, strconv.ErrRange + case v > max: + return max, strconv.ErrRange + } + return v, nil +} + +func ParseInt(b []byte, bits int) (int64, error) { + negOffset := 0 + neg := len(b) > 0 && b[0] == '-' + if neg { + negOffset = 1 + } + n, err := ParseUint(b[negOffset:], bits) + if err != nil && n == 0 { + return 0, err + } + + maxInt := uint64(1) << (bits - 1) + if neg && n > maxInt { + return -int64(maxInt), strconv.ErrRange + } else if !neg && n > maxInt-1 { + return int64(maxInt - 1), strconv.ErrRange + } + + if neg { + return int64(-n), nil + } else { + return int64(+n), nil } - return v, true } // ParseFloat parses a floating point number according to the Go float grammar. -// Note that the JSON number grammar is a strict subset. -// -// If the number overflows the finite representation of a float, -// then we return MaxFloat since any finite value will always be infinitely -// more accurate at representing another finite value than an infinite value. -func ParseFloat(b []byte, bits int) (v float64, ok bool) { - fv, err := strconv.ParseFloat(string(b), bits) - if math.IsInf(fv, 0) { - switch { - case bits == 32 && math.IsInf(fv, +1): - fv = +math.MaxFloat32 - case bits == 64 && math.IsInf(fv, +1): - fv = +math.MaxFloat64 - case bits == 32 && math.IsInf(fv, -1): - fv = -math.MaxFloat32 - case bits == 64 && math.IsInf(fv, -1): - fv = -math.MaxFloat64 - } +func ParseFloat(b []byte, bits int) (float64, error) { + // Note that the JSON number grammar is a strict subset. + // We have ensured the input is a valid json number in [ConsumeNumberResumable], + // So we may take advantage of the simpler grammar and + // replace this with a more efficient implementation in the future. + v, err := strconv.ParseFloat(string(b), bits) + if err != nil { + err = err.(*strconv.NumError).Err } - return fv, err == nil + return v, err } diff --git a/internal/jsonwire/decode_test.go b/internal/jsonwire/decode_test.go index 1748b59..e3801f5 100644 --- a/internal/jsonwire/decode_test.go +++ b/internal/jsonwire/decode_test.go @@ -9,6 +9,7 @@ import ( "io" "math" "reflect" + "strconv" "strings" "testing" ) @@ -342,55 +343,55 @@ func TestParseHexUint16(t *testing.T) { func TestParseUint(t *testing.T) { tests := []struct { - in string - want uint64 - wantOk bool + in string + want uint64 + wantErr error }{ - {"", 0, false}, - {"0", 0, true}, - {"1", 1, true}, - {"-1", 0, false}, - {"1f", 0, false}, - {"00", 0, false}, - {"01", 0, false}, - {"10", 10, true}, - {"10.9", 0, false}, - {" 10", 0, false}, - {"10 ", 0, false}, - {"123456789", 123456789, true}, - {"123456789d", 0, false}, - {"18446744073709551614", math.MaxUint64 - 1, true}, - {"18446744073709551615", math.MaxUint64, true}, - {"18446744073709551616", math.MaxUint64, false}, - {"18446744073709551620", math.MaxUint64, false}, - {"18446744073709551700", math.MaxUint64, false}, - {"18446744073709552000", math.MaxUint64, false}, - {"18446744073709560000", math.MaxUint64, false}, - {"18446744073709600000", math.MaxUint64, false}, - {"18446744073710000000", math.MaxUint64, false}, - {"18446744073800000000", math.MaxUint64, false}, - {"18446744074000000000", math.MaxUint64, false}, - {"18446744080000000000", math.MaxUint64, false}, - {"18446744100000000000", math.MaxUint64, false}, - {"18446745000000000000", math.MaxUint64, false}, - {"18446750000000000000", math.MaxUint64, false}, - {"18446800000000000000", math.MaxUint64, false}, - {"18447000000000000000", math.MaxUint64, false}, - {"18450000000000000000", math.MaxUint64, false}, - {"18500000000000000000", math.MaxUint64, false}, - {"19000000000000000000", math.MaxUint64, false}, - {"19999999999999999999", math.MaxUint64, false}, - {"20000000000000000000", math.MaxUint64, false}, - {"100000000000000000000", math.MaxUint64, false}, - {"99999999999999999999999999999999", math.MaxUint64, false}, - {"99999999999999999999999999999999f", 0, false}, + {"", 0, strconv.ErrSyntax}, + {"0", 0, nil}, + {"1", 1, nil}, + {"-1", 0, strconv.ErrSyntax}, + {"1f", 0, strconv.ErrSyntax}, + {"00", 0, strconv.ErrSyntax}, + {"01", 0, strconv.ErrSyntax}, + {"10", 10, nil}, + {"10.9", 0, strconv.ErrSyntax}, + {" 10", 0, strconv.ErrSyntax}, + {"10 ", 0, strconv.ErrSyntax}, + {"123456789", 123456789, nil}, + {"123456789d", 0, strconv.ErrSyntax}, + {"18446744073709551614", math.MaxUint64 - 1, nil}, + {"18446744073709551615", math.MaxUint64, nil}, + {"18446744073709551616", math.MaxUint64, strconv.ErrRange}, + {"18446744073709551620", math.MaxUint64, strconv.ErrRange}, + {"18446744073709551700", math.MaxUint64, strconv.ErrRange}, + {"18446744073709552000", math.MaxUint64, strconv.ErrRange}, + {"18446744073709560000", math.MaxUint64, strconv.ErrRange}, + {"18446744073709600000", math.MaxUint64, strconv.ErrRange}, + {"18446744073710000000", math.MaxUint64, strconv.ErrRange}, + {"18446744073800000000", math.MaxUint64, strconv.ErrRange}, + {"18446744074000000000", math.MaxUint64, strconv.ErrRange}, + {"18446744080000000000", math.MaxUint64, strconv.ErrRange}, + {"18446744100000000000", math.MaxUint64, strconv.ErrRange}, + {"18446745000000000000", math.MaxUint64, strconv.ErrRange}, + {"18446750000000000000", math.MaxUint64, strconv.ErrRange}, + {"18446800000000000000", math.MaxUint64, strconv.ErrRange}, + {"18447000000000000000", math.MaxUint64, strconv.ErrRange}, + {"18450000000000000000", math.MaxUint64, strconv.ErrRange}, + {"18500000000000000000", math.MaxUint64, strconv.ErrRange}, + {"19000000000000000000", math.MaxUint64, strconv.ErrRange}, + {"19999999999999999999", math.MaxUint64, strconv.ErrRange}, + {"20000000000000000000", math.MaxUint64, strconv.ErrRange}, + {"100000000000000000000", math.MaxUint64, strconv.ErrRange}, + {"99999999999999999999999999999999", math.MaxUint64, strconv.ErrRange}, + {"99999999999999999999999999999999f", 0, strconv.ErrSyntax}, } for _, tt := range tests { t.Run("", func(t *testing.T) { - got, gotOk := ParseUint([]byte(tt.in)) - if got != tt.want || gotOk != tt.wantOk { - t.Errorf("ParseUint(%q) = (%v, %v), want (%v, %v)", tt.in, got, gotOk, tt.want, tt.wantOk) + got, gotErr := ParseUint([]byte(tt.in), 64) + if got != tt.want || gotErr != tt.wantErr { + t.Errorf("ParseUint(%q) = (%v, %v), want (%v, %v)", tt.in, got, gotErr, tt.want, tt.wantErr) } }) } @@ -398,43 +399,43 @@ func TestParseUint(t *testing.T) { func TestParseFloat(t *testing.T) { tests := []struct { - in string - want32 float64 - want64 float64 - wantOk bool + in string + want32 float64 + want64 float64 + wantErr error }{ - {"0", 0, 0, true}, - {"-1", -1, -1, true}, - {"1", 1, 1, true}, + {"0", 0, 0, nil}, + {"-1", -1, -1, nil}, + {"1", 1, 1, nil}, - {"-16777215", -16777215, -16777215, true}, // -(1<<24 - 1) - {"16777215", 16777215, 16777215, true}, // +(1<<24 - 1) - {"-16777216", -16777216, -16777216, true}, // -(1<<24) - {"16777216", 16777216, 16777216, true}, // +(1<<24) - {"-16777217", -16777216, -16777217, true}, // -(1<<24 + 1) - {"16777217", 16777216, 16777217, true}, // +(1<<24 + 1) + {"-16777215", -16777215, -16777215, nil}, // -(1<<24 - 1) + {"16777215", 16777215, 16777215, nil}, // +(1<<24 - 1) + {"-16777216", -16777216, -16777216, nil}, // -(1<<24) + {"16777216", 16777216, 16777216, nil}, // +(1<<24) + {"-16777217", -16777216, -16777217, nil}, // -(1<<24 + 1) + {"16777217", 16777216, 16777217, nil}, // +(1<<24 + 1) - {"-9007199254740991", -9007199254740992, -9007199254740991, true}, // -(1<<53 - 1) - {"9007199254740991", 9007199254740992, 9007199254740991, true}, // +(1<<53 - 1) - {"-9007199254740992", -9007199254740992, -9007199254740992, true}, // -(1<<53) - {"9007199254740992", 9007199254740992, 9007199254740992, true}, // +(1<<53) - {"-9007199254740993", -9007199254740992, -9007199254740992, true}, // -(1<<53 + 1) - {"9007199254740993", 9007199254740992, 9007199254740992, true}, // +(1<<53 + 1) + {"-9007199254740991", -9007199254740992, -9007199254740991, nil}, // -(1<<53 - 1) + {"9007199254740991", 9007199254740992, 9007199254740991, nil}, // +(1<<53 - 1) + {"-9007199254740992", -9007199254740992, -9007199254740992, nil}, // -(1<<53) + {"9007199254740992", 9007199254740992, 9007199254740992, nil}, // +(1<<53) + {"-9007199254740993", -9007199254740992, -9007199254740992, nil}, // -(1<<53 + 1) + {"9007199254740993", 9007199254740992, 9007199254740992, nil}, // +(1<<53 + 1) - {"-1e1000", -math.MaxFloat32, -math.MaxFloat64, false}, - {"1e1000", +math.MaxFloat32, +math.MaxFloat64, false}, + {"-1e1000", math.Inf(-1), math.Inf(-1), strconv.ErrRange}, + {"1e1000", math.Inf(+1), math.Inf(+1), strconv.ErrRange}, } for _, tt := range tests { t.Run("", func(t *testing.T) { - got32, gotOk32 := ParseFloat([]byte(tt.in), 32) - if got32 != tt.want32 || gotOk32 != tt.wantOk { - t.Errorf("ParseFloat(%q, 32) = (%v, %v), want (%v, %v)", tt.in, got32, gotOk32, tt.want32, tt.wantOk) + got32, gotErr32 := ParseFloat([]byte(tt.in), 32) + if got32 != tt.want32 || gotErr32 != tt.wantErr { + t.Errorf("ParseFloat(%q, 32) = (%v, %v), want (%v, %v)", tt.in, got32, gotErr32, tt.want32, tt.wantErr) } - got64, gotOk64 := ParseFloat([]byte(tt.in), 64) - if got64 != tt.want64 || gotOk64 != tt.wantOk { - t.Errorf("ParseFloat(%q, 64) = (%v, %v), want (%v, %v)", tt.in, got64, gotOk64, tt.want64, tt.wantOk) + got64, gotErr64 := ParseFloat([]byte(tt.in), 64) + if got64 != tt.want64 || gotErr64 != tt.wantErr { + t.Errorf("ParseFloat(%q, 64) = (%v, %v), want (%v, %v)", tt.in, got64, gotErr64, tt.want64, tt.wantErr) } }) } diff --git a/jsontext/coder_test.go b/jsontext/coder_test.go index 8c34721..60e3d0e 100644 --- a/jsontext/coder_test.go +++ b/jsontext/coder_test.go @@ -146,7 +146,7 @@ var coderTestdata = []coderTestdataEntry{{ 9007199254740990, 9007199254740991, 9007199254740992, 9007199254740993, 9007199254740994, -9223372036854775808, 9223372036854775807, 0, 18446744073709551615 ] `, - outCompacted: "[0,-0,0.0,-0.0,1.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001,1e1000,-5e-324,1e+100,1.7976931348623157e+308,9007199254740990,9007199254740991,9007199254740992,9007199254740993,9007199254740994,-9223372036854775808,9223372036854775807,0,18446744073709551615]", + outCompacted: `[0,-0,0.0,-0.0,1.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001,1e1000,-5e-324,1e+100,1.7976931348623157e+308,9007199254740990,9007199254740991,9007199254740992,9007199254740993,9007199254740994,-9223372036854775808,9223372036854775807,0,18446744073709551615]`, outIndented: `[ 0, -0, @@ -473,7 +473,7 @@ func testCoderInterleaved(t *testing.T, where jsontest.CasePos, modeName string, } t.Fatalf("%s: Decoder.ReadToken error: %v", where, err) } - if err := enc.WriteToken(tok); err != nil { + if err := enc.WriteToken(Raw(tok)); err != nil { t.Fatalf("%s: Encoder.WriteToken error: %v", where, err) } } else { diff --git a/jsontext/decode.go b/jsontext/decode.go index 6f14095..da35475 100644 --- a/jsontext/decode.go +++ b/jsontext/decode.go @@ -437,13 +437,13 @@ func (d *decoderState) SkipUntil(depth int, length int64) error { return nil } -// ReadToken reads the next [Token], advancing the read offset. +// ReadToken reads the next [RawToken], advancing the read offset. // The returned token is only valid until the next Peek, Read, or Skip call. // It returns [io.EOF] if there are no more tokens. -func (d *Decoder) ReadToken() (Token, error) { +func (d *Decoder) ReadToken() (RawToken, error) { return d.s.ReadToken() } -func (d *decoderState) ReadToken() (Token, error) { +func (d *decoderState) ReadToken() (RawToken, error) { // Determine the next kind. var err error var next Kind @@ -453,7 +453,7 @@ func (d *decoderState) ReadToken() (Token, error) { if d.peekErr != nil { err := d.peekErr d.peekPos, d.peekErr = 0, nil // possibly a transient I/O error - return Token{}, err + return RawToken{}, err } next = Kind(d.buf[pos]).normalize() d.peekPos = 0 // reset cache @@ -468,7 +468,7 @@ func (d *decoderState) ReadToken() (Token, error) { if err == io.ErrUnexpectedEOF && d.Tokens.Depth() == 1 { err = io.EOF // EOF possibly if no Tokens present after top-level value } - return Token{}, wrapSyntacticError(d, err, pos, 0) + return RawToken{}, wrapSyntacticError(d, err, pos, 0) } } @@ -481,13 +481,13 @@ func (d *decoderState) ReadToken() (Token, error) { if d.needMore(pos) { if pos, err = d.consumeWhitespace(pos); err != nil { err = wrapSyntacticError(d, err, pos, 0) - return Token{}, d.checkDelimBeforeIOError(delim, err) + return RawToken{}, d.checkDelimBeforeIOError(delim, err) } } } next = Kind(d.buf[pos]).normalize() if d.Tokens.needDelim(next) != delim { - return Token{}, d.checkDelim(delim, next) + return RawToken{}, d.checkDelim(delim, next) } } @@ -498,46 +498,46 @@ func (d *decoderState) ReadToken() (Token, error) { if jsonwire.ConsumeNull(d.buf[pos:]) == 0 { pos, err = d.consumeLiteral(pos, "null") if err != nil { - return Token{}, wrapSyntacticError(d, err, pos, +1) + return RawToken{}, wrapSyntacticError(d, err, pos, +1) } } else { pos += len("null") } if err = d.Tokens.appendLiteral(); err != nil { - return Token{}, wrapSyntacticError(d, err, pos-len("null"), +1) // report position at start of literal + return RawToken{}, wrapSyntacticError(d, err, pos-len("null"), +1) // report position at start of literal } d.prevStart, d.prevEnd = pos, pos - return Null, nil + return Null.raw, nil case 'f': if jsonwire.ConsumeFalse(d.buf[pos:]) == 0 { pos, err = d.consumeLiteral(pos, "false") if err != nil { - return Token{}, wrapSyntacticError(d, err, pos, +1) + return RawToken{}, wrapSyntacticError(d, err, pos, +1) } } else { pos += len("false") } if err = d.Tokens.appendLiteral(); err != nil { - return Token{}, wrapSyntacticError(d, err, pos-len("false"), +1) // report position at start of literal + return RawToken{}, wrapSyntacticError(d, err, pos-len("false"), +1) // report position at start of literal } d.prevStart, d.prevEnd = pos, pos - return False, nil + return False.raw, nil case 't': if jsonwire.ConsumeTrue(d.buf[pos:]) == 0 { pos, err = d.consumeLiteral(pos, "true") if err != nil { - return Token{}, wrapSyntacticError(d, err, pos, +1) + return RawToken{}, wrapSyntacticError(d, err, pos, +1) } } else { pos += len("true") } if err = d.Tokens.appendLiteral(); err != nil { - return Token{}, wrapSyntacticError(d, err, pos-len("true"), +1) // report position at start of literal + return RawToken{}, wrapSyntacticError(d, err, pos-len("true"), +1) // report position at start of literal } d.prevStart, d.prevEnd = pos, pos - return True, nil + return True.raw, nil case '"': var flags jsonwire.ValueFlags // TODO: Preserve this in Token? @@ -547,7 +547,7 @@ func (d *decoderState) ReadToken() (Token, error) { newAbsPos := d.baseOffset + int64(pos) n = int(newAbsPos - oldAbsPos) if err != nil { - return Token{}, wrapSyntacticError(d, err, pos, +1) + return RawToken{}, wrapSyntacticError(d, err, pos, +1) } } else { pos += n @@ -555,20 +555,20 @@ func (d *decoderState) ReadToken() (Token, error) { if d.Tokens.Last.NeedObjectName() { if !d.Flags.Get(jsonflags.AllowDuplicateNames) { if !d.Tokens.Last.isValidNamespace() { - return Token{}, wrapSyntacticError(d, errInvalidNamespace, pos-n, +1) + return RawToken{}, wrapSyntacticError(d, errInvalidNamespace, pos-n, +1) } if d.Tokens.Last.isActiveNamespace() && !d.Namespaces.Last().insertQuoted(d.buf[pos-n:pos], flags.IsVerbatim()) { err = wrapWithObjectName(ErrDuplicateName, d.buf[pos-n:pos]) - return Token{}, wrapSyntacticError(d, err, pos-n, +1) // report position at start of string + return RawToken{}, wrapSyntacticError(d, err, pos-n, +1) // report position at start of string } } d.Names.ReplaceLastQuotedOffset(pos - n) // only replace if insertQuoted succeeds } if err = d.Tokens.appendString(); err != nil { - return Token{}, wrapSyntacticError(d, err, pos-n, +1) // report position at start of string + return RawToken{}, wrapSyntacticError(d, err, pos-n, +1) // report position at start of string } d.prevStart, d.prevEnd = pos-n, pos - return Token{raw: &d.decodeBuffer, num: uint64(d.previousOffsetStart())}, nil + return RawToken{dBuf: &d.decodeBuffer, num: uint64(d.previousOffsetStart())}, nil case '0': // NOTE: Since JSON numbers are not self-terminating, @@ -579,20 +579,20 @@ func (d *decoderState) ReadToken() (Token, error) { newAbsPos := d.baseOffset + int64(pos) n = int(newAbsPos - oldAbsPos) if err != nil { - return Token{}, wrapSyntacticError(d, err, pos, +1) + return RawToken{}, wrapSyntacticError(d, err, pos, +1) } } else { pos += n } if err = d.Tokens.appendNumber(); err != nil { - return Token{}, wrapSyntacticError(d, err, pos-n, +1) // report position at start of number + return RawToken{}, wrapSyntacticError(d, err, pos-n, +1) // report position at start of number } d.prevStart, d.prevEnd = pos-n, pos - return Token{raw: &d.decodeBuffer, num: uint64(d.previousOffsetStart())}, nil + return RawToken{dBuf: &d.decodeBuffer, num: uint64(d.previousOffsetStart())}, nil case '{': if err = d.Tokens.pushObject(); err != nil { - return Token{}, wrapSyntacticError(d, err, pos, +1) + return RawToken{}, wrapSyntacticError(d, err, pos, +1) } d.Names.push() if !d.Flags.Get(jsonflags.AllowDuplicateNames) { @@ -600,11 +600,11 @@ func (d *decoderState) ReadToken() (Token, error) { } pos += 1 d.prevStart, d.prevEnd = pos, pos - return ObjectStart, nil + return ObjectStart.raw, nil case '}': if err = d.Tokens.popObject(); err != nil { - return Token{}, wrapSyntacticError(d, err, pos, +1) + return RawToken{}, wrapSyntacticError(d, err, pos, +1) } d.Names.pop() if !d.Flags.Get(jsonflags.AllowDuplicateNames) { @@ -612,27 +612,27 @@ func (d *decoderState) ReadToken() (Token, error) { } pos += 1 d.prevStart, d.prevEnd = pos, pos - return ObjectEnd, nil + return ObjectEnd.raw, nil case '[': if err = d.Tokens.pushArray(); err != nil { - return Token{}, wrapSyntacticError(d, err, pos, +1) + return RawToken{}, wrapSyntacticError(d, err, pos, +1) } pos += 1 d.prevStart, d.prevEnd = pos, pos - return ArrayStart, nil + return ArrayStart.raw, nil case ']': if err = d.Tokens.popArray(); err != nil { - return Token{}, wrapSyntacticError(d, err, pos, +1) + return RawToken{}, wrapSyntacticError(d, err, pos, +1) } pos += 1 d.prevStart, d.prevEnd = pos, pos - return ArrayEnd, nil + return ArrayEnd.raw, nil default: err = jsonwire.NewInvalidCharacterError(d.buf[pos:], "at start of value") - return Token{}, wrapSyntacticError(d, err, pos, +1) + return RawToken{}, wrapSyntacticError(d, err, pos, +1) } } diff --git a/jsontext/decode_test.go b/jsontext/decode_test.go index 80f235d..98fe7e8 100644 --- a/jsontext/decode_test.go +++ b/jsontext/decode_test.go @@ -59,7 +59,7 @@ func testDecoder(t *testing.T, where jsontest.CasePos, typeName string, td coder } t.Fatalf("%s: Decoder.ReadToken error: %v", where, err) } - tokens = append(tokens, tok.Clone()) + tokens = append(tokens, Raw(tok.Clone())) if td.pointers != nil { pointers = append(pointers, dec.StackPointer()) } @@ -94,7 +94,7 @@ func testDecoder(t *testing.T, where jsontest.CasePos, typeName string, td coder } t.Fatalf("%s: Decoder.ReadToken error: %v", where, err) } - tokens = append(tokens, tok.Clone()) + tokens = append(tokens, Raw(tok.Clone())) default: val, err := dec.ReadValue() if err != nil { @@ -148,7 +148,7 @@ func testFaultyDecoder(t *testing.T, where jsontest.CasePos, typeName string, td } continue } - tokens = append(tokens, tok.Clone()) + tokens = append(tokens, Raw(tok.Clone())) } if !equalTokens(tokens, td.tokens) { t.Fatalf("%s: tokens mismatch:\ngot %s\nwant %s", where, tokens, td.tokens) @@ -1007,9 +1007,9 @@ func testDecoderErrors(t *testing.T, where jsontest.CasePos, opts []Options, in var gotErr error switch wantOut := call.wantOut.(type) { case Token: - var gotOut Token + var gotOut RawToken gotOut, gotErr = dec.ReadToken() - if gotOut.String() != wantOut.String() { + if Raw(gotOut).String() != wantOut.String() { t.Fatalf("%s: %d: Decoder.ReadToken = %v, want %v", where, i, gotOut, wantOut) } case Value: diff --git a/jsontext/encode_test.go b/jsontext/encode_test.go index fe8af3e..fb68b45 100644 --- a/jsontext/encode_test.go +++ b/jsontext/encode_test.go @@ -8,6 +8,7 @@ import ( "bytes" "errors" "io" + "math" "path" "slices" "testing" @@ -74,8 +75,11 @@ func testEncoder(t *testing.T, where jsontest.CasePos, formatName, typeName stri } default: val := Value(tok.String()) - if tok.Kind() == '"' { - val, _ = jsonwire.AppendQuote(nil, tok.String(), &jsonflags.Flags{}) + switch tok.Kind() { + case '"': + val, _ = tok.appendString(nil, &jsonflags.Flags{}) + case '0': + val, _ = tok.appendNumber(nil, &jsonflags.Flags{}) } if err := enc.WriteValue(val); err != nil { t.Fatalf("%s: Encoder.WriteValue error: %v", where, err) @@ -241,6 +245,11 @@ var encoderErrorTestdata = []struct { calls: []encoderMethodCall{ {Value(`0.e`), newInvalidCharacterError("e", "in number (expecting digit)").withPos(`0.`, ""), ""}, }, +}, { + name: jsontest.Name("InfinityNumber"), + calls: []encoderMethodCall{ + {Float(math.Inf(+1)), E(errors.New("unsupported value: +Inf")), ""}, + }, }, { name: jsontest.Name("TruncatedObject/AfterStart"), calls: []encoderMethodCall{ diff --git a/jsontext/example_test.go b/jsontext/example_test.go index 3ab3e2d..6f547ae 100644 --- a/jsontext/example_test.go +++ b/jsontext/example_test.go @@ -42,6 +42,7 @@ func Example_stringReplace() { for { // Read a token from the input. tok, err := dec.ReadToken() + tokW := jsontext.Raw(tok) if err != nil { if err == io.EOF { break @@ -53,11 +54,11 @@ func Example_stringReplace() { // replace each occurrence with "Go" instead. if tok.Kind() == '"' && strings.Contains(tok.String(), "Golang") { replacements = append(replacements, dec.StackPointer()) - tok = jsontext.String(strings.ReplaceAll(tok.String(), "Golang", "Go")) + tokW = jsontext.String(strings.ReplaceAll(tok.String(), "Golang", "Go")) } // Write the (possibly modified) token to the output. - if err := enc.WriteToken(tok); err != nil { + if err := enc.WriteToken(tokW); err != nil { log.Fatal(err) } } diff --git a/jsontext/fuzz_test.go b/jsontext/fuzz_test.go index 055eed4..7de95e1 100644 --- a/jsontext/fuzz_test.go +++ b/jsontext/fuzz_test.go @@ -70,8 +70,8 @@ func FuzzCoder(f *testing.F) { enc := NewEncoder(dst) for _, tokVal := range tokVals { switch tokVal := tokVal.(type) { - case Token: - if err := enc.WriteToken(tokVal); err != nil { + case RawToken: + if err := enc.WriteToken(Raw(tokVal)); err != nil { t.Fatalf("Encoder.WriteToken error: %v", err) } case Value: @@ -88,14 +88,14 @@ func FuzzCoder(f *testing.F) { if err != nil { t.Fatalf("Decoder.ReadToken error: %v", err) } - got = append(got, tok.Clone()) + got = append(got, Raw(tok.Clone())) } for dec := NewDecoder(dst); dec.PeekKind() > 0; { tok, err := dec.ReadToken() if err != nil { t.Fatalf("Decoder.ReadToken error: %v", err) } - want = append(want, tok.Clone()) + want = append(want, Raw(tok.Clone())) } if !equalTokens(got, want) { t.Fatalf("mismatching output:\ngot %v\nwant %v", got, want) diff --git a/jsontext/token.go b/jsontext/token.go index b389fc0..599bd6c 100644 --- a/jsontext/token.go +++ b/jsontext/token.go @@ -7,6 +7,7 @@ package jsontext import ( "bytes" "errors" + "fmt" "math" "strconv" @@ -16,16 +17,41 @@ import ( // NOTE: Token is analogous to v1 json.Token. -const ( - maxInt64 = math.MaxInt64 - minInt64 = math.MinInt64 - maxUint64 = math.MaxUint64 - minUint64 = 0 // for consistency and readability purposes +var errInvalidToken = errors.New("invalid jsontext.Token") + +func tokenTypeTag() *decodeBuffer { + return &decodeBuffer{} +} - invalidTokenPanic = "invalid jsontext.Token; it has been voided by a subsequent json.Decoder call" +// these special tags will have nil buf +var ( + strTag = tokenTypeTag() + uintTag = tokenTypeTag() + intTag = tokenTypeTag() + floatTag = tokenTypeTag() ) -var errInvalidToken = errors.New("invalid jsontext.Token") +// RawToken likes [Token], and is returned by [Decoder.ReadToken]. +// +// Use [Raw] to convert it to [Token] for [Encoder.WriteToken]. +type RawToken struct { + nonComparable + + // dBuf contains a reference to the dBuf decode buffer. + // It is only valid if num == dBuf.previousOffsetStart(). + dBuf *decodeBuffer + num uint64 +} + +func (t RawToken) isRaw() bool { + return t.dBuf.buf != nil +} + +func (t RawToken) ensureValid() { + if uint64(t.dBuf.previousOffsetStart()) != t.num { + panic("invalid jsontext.Token; it has been voided by a subsequent json.Decoder call") + } +} // Token represents a lexical JSON token, which may be one of the following: // - a JSON literal (i.e., null, true, or false) @@ -42,49 +68,21 @@ type Token struct { // Tokens can exist in either a "raw" or an "exact" form. // Tokens produced by the Decoder are in the "raw" form. - // Tokens returned by constructors are usually in the "exact" form. + // Tokens returned by constructors are in the "exact" form. // The Encoder accepts Tokens in either the "raw" or "exact" form. + + // raw may contains a valid RawToken if raw.dBuf.buf is non-nil. // - // The following chart shows the possible values for each Token type: - // ╔═════════════════╦════════════╤════════════╤════════════╗ - // ║ Token type ║ raw field │ str field │ num field ║ - // ╠═════════════════╬════════════╪════════════╪════════════╣ - // ║ null (raw) ║ "null" │ "" │ 0 ║ - // ║ false (raw) ║ "false" │ "" │ 0 ║ - // ║ true (raw) ║ "true" │ "" │ 0 ║ - // ║ string (raw) ║ non-empty │ "" │ offset ║ - // ║ string (string) ║ nil │ non-empty │ 0 ║ - // ║ number (raw) ║ non-empty │ "" │ offset ║ - // ║ number (float) ║ nil │ "f" │ non-zero ║ - // ║ number (int64) ║ nil │ "i" │ non-zero ║ - // ║ number (uint64) ║ nil │ "u" │ non-zero ║ - // ║ object (delim) ║ "{" or "}" │ "" │ 0 ║ - // ║ array (delim) ║ "[" or "]" │ "" │ 0 ║ - // ╚═════════════════╩════════════╧════════════╧════════════╝ + // If raw.dBuf equals to floatTag, intTag, or, uintTag, + // the token is a JSON number in the "exact" form and + // raw.num should be interpreted as a float64, int64, or uint64, respectively. // - // Notes: - // - For tokens stored in "raw" form, the num field contains the - // absolute offset determined by raw.previousOffsetStart(). - // The buffer itself is stored in raw.previousBuffer(). - // - JSON literals and structural characters are always in the "raw" form. - // - JSON strings and numbers can be in either "raw" or "exact" forms. - // - The exact zero value of JSON strings and numbers in the "exact" forms - // have ambiguous representation. Thus, they are always represented - // in the "raw" form. - - // raw contains a reference to the raw decode buffer. - // If non-nil, then its value takes precedence over str and num. - // It is only valid if num == raw.previousOffsetStart(). - raw *decodeBuffer - - // str is the unescaped JSON string if num is zero. - // Otherwise, it is "f", "i", or "u" if num should be interpreted - // as a float64, int64, or uint64, respectively. - str string + // If raw.dBuf equals to strTag, the token is a JSON string in the "string" form and + // str is the unescaped JSON string. + raw RawToken - // num is a float64, int64, or uint64 stored as a uint64 value. - // It is non-zero for any JSON number in the "exact" form. - num uint64 + // str is the unescaped JSON string + str string } // TODO: Does representing 1-byte delimiters as *decodeBuffer cause performance issues? @@ -98,17 +96,12 @@ var ( ObjectEnd Token = rawToken("}") ArrayStart Token = rawToken("[") ArrayEnd Token = rawToken("]") - - zeroString Token = rawToken(`""`) - zeroNumber Token = rawToken(`0`) - - nanString Token = String("NaN") - pinfString Token = String("Infinity") - ninfString Token = String("-Infinity") ) func rawToken(s string) Token { - return Token{raw: &decodeBuffer{buf: []byte(s), prevStart: 0, prevEnd: len(s)}} + return Token{raw: RawToken{ + dBuf: &decodeBuffer{buf: []byte(s), prevStart: 0, prevEnd: len(s)}, + }} } // Bool constructs a Token representing a JSON boolean. @@ -123,98 +116,107 @@ func Bool(b bool) Token { // The provided string should contain valid UTF-8, otherwise invalid characters // may be mangled as the Unicode replacement character. func String(s string) Token { - if len(s) == 0 { - return zeroString + return Token{ + raw: RawToken{dBuf: strTag}, + str: s, } - return Token{str: s} } -// Float constructs a Token representing a JSON number. -// The values NaN, +Inf, and -Inf will be represented -// as a JSON string with the values "NaN", "Infinity", and "-Infinity". +// Float constructs a Token representing a JSON number from a float64. +// The values NaN, +Inf, and -Inf will result in error if passed to [Encoder.WriteToken] func Float(n float64) Token { - switch { - case math.Float64bits(n) == 0: - return zeroNumber - case math.IsNaN(n): - return nanString - case math.IsInf(n, +1): - return pinfString - case math.IsInf(n, -1): - return ninfString + return Token{ + raw: RawToken{dBuf: floatTag, num: math.Float64bits(n)}, } - return Token{str: "f", num: math.Float64bits(n)} } // Int constructs a Token representing a JSON number from an int64. func Int(n int64) Token { - if n == 0 { - return zeroNumber + return Token{ + raw: RawToken{dBuf: intTag, num: uint64(n)}, } - return Token{str: "i", num: uint64(n)} } // Uint constructs a Token representing a JSON number from a uint64. func Uint(n uint64) Token { - if n == 0 { - return zeroNumber + return Token{ + raw: RawToken{dBuf: uintTag, num: n}, } - return Token{str: "u", num: uint64(n)} +} + +func Raw(t RawToken) Token { + return Token{raw: t} } // Clone makes a copy of the Token such that its value remains valid // even after a subsequent [Decoder.Read] call. -func (t Token) Clone() Token { +func (t RawToken) Clone() RawToken { + if t.dBuf == nil { + return t // zero value + } // TODO: Allow caller to avoid any allocations? - if raw := t.raw; raw != nil { - // Avoid copying globals. - if t.raw.prevStart == 0 { - switch t.raw { - case Null.raw: - return Null - case False.raw: - return False - case True.raw: - return True - case ObjectStart.raw: - return ObjectStart - case ObjectEnd.raw: - return ObjectEnd - case ArrayStart.raw: - return ArrayStart - case ArrayEnd.raw: - return ArrayEnd - } + // Avoid copying globals. + if t.dBuf.prevStart == 0 { + switch t.dBuf { + case Null.raw.dBuf: + return Null.raw + case False.raw.dBuf: + return False.raw + case True.raw.dBuf: + return True.raw + case ObjectStart.raw.dBuf: + return ObjectStart.raw + case ObjectEnd.raw.dBuf: + return ObjectEnd.raw + case ArrayStart.raw.dBuf: + return ArrayStart.raw + case ArrayEnd.raw.dBuf: + return ArrayEnd.raw } + } - if uint64(raw.previousOffsetStart()) != t.num { - panic(invalidTokenPanic) - } - buf := bytes.Clone(raw.previousBuffer()) - return Token{raw: &decodeBuffer{buf: buf, prevStart: 0, prevEnd: len(buf)}} + t.ensureValid() + buf := bytes.Clone(t.dBuf.previousBuffer()) + return RawToken{dBuf: &decodeBuffer{buf: buf, prevStart: 0, prevEnd: len(buf)}} + +} + +// Clone makes a copy of the Token such that its value remains valid +// even after a subsequent [Decoder.Read] call. +func (t Token) Clone() Token { + if t.raw.dBuf == nil { + return t // zero value } - return t + if t.raw.isRaw() { + return Token{raw: t.raw.Clone()} + } + return t // exact form. } // Bool returns the value for a JSON boolean. // It panics if the token kind is not a JSON boolean. -func (t Token) Bool() bool { - switch t.raw { - case True.raw: +func (t RawToken) Bool() bool { + switch t.dBuf { + case True.raw.dBuf: return true - case False.raw: + case False.raw.dBuf: return false default: panic("invalid JSON token kind: " + t.Kind().String()) } } +func (t Token) Bool() bool { + return t.raw.Bool() +} + // appendString appends a JSON string to dst and returns it. // It panics if t is not a JSON string. func (t Token) appendString(dst []byte, flags *jsonflags.Flags) ([]byte, error) { - if raw := t.raw; raw != nil { + if t.raw.isRaw() { + // TODO: ensure vaild? // Handle raw string value. - buf := raw.previousBuffer() + buf := t.raw.dBuf.previousBuffer() if Kind(buf[0]) == '"' { if jsonwire.ConsumeSimpleString(buf) == len(buf) { return append(dst, buf...), nil @@ -222,7 +224,7 @@ func (t Token) appendString(dst []byte, flags *jsonflags.Flags) ([]byte, error) dst, _, err := jsonwire.ReformatString(dst, buf, flags) return dst, err } - } else if len(t.str) != 0 && t.num == 0 { + } else if t.raw.dBuf == strTag { // Handle exact string value. return jsonwire.AppendQuote(dst, t.str, flags) } @@ -230,6 +232,27 @@ func (t Token) appendString(dst []byte, flags *jsonflags.Flags) ([]byte, error) panic("invalid JSON token kind: " + t.Kind().String()) } +// String returns the unescaped string value for a JSON string. +// For other JSON kinds, this returns the raw JSON representation. +func (t RawToken) String() string { + return string(t.bytes()) +} + +func (t RawToken) bytes() []byte { + if t.dBuf == nil { + return []byte("") + } + t.ensureValid() + buf := t.dBuf.previousBuffer() + if buf[0] == '"' { + // TODO: Preserve ValueFlags in Token? + isVerbatim := jsonwire.ConsumeSimpleString(buf) == len(buf) + return jsonwire.UnquoteMayCopy(buf, isVerbatim) + } + // Handle tokens that are not JSON strings for fmt.Stringer. + return buf +} + // String returns the unescaped string value for a JSON string. // For other JSON kinds, this returns the raw JSON representation. func (t Token) String() string { @@ -243,33 +266,23 @@ func (t Token) String() string { } return s } + func (t Token) string() (string, []byte) { - if raw := t.raw; raw != nil { - if uint64(raw.previousOffsetStart()) != t.num { - panic(invalidTokenPanic) - } - buf := raw.previousBuffer() - if buf[0] == '"' { - // TODO: Preserve ValueFlags in Token? - isVerbatim := jsonwire.ConsumeSimpleString(buf) == len(buf) - return "", jsonwire.UnquoteMayCopy(buf, isVerbatim) - } - // Handle tokens that are not JSON strings for fmt.Stringer. - return "", buf - } - if len(t.str) != 0 && t.num == 0 { + // Handle tokens that are not JSON strings for fmt.Stringer. + switch t.raw.dBuf { + case strTag: return t.str, nil + case nil: + return "", nil + case floatTag: + return string(jsonwire.AppendFloat(nil, math.Float64frombits(t.raw.num), 64)), nil + case intTag: + return strconv.FormatInt(int64(t.raw.num), 10), nil + case uintTag: + return strconv.FormatUint(uint64(t.raw.num), 10), nil } - // Handle tokens that are not JSON strings for fmt.Stringer. - if t.num > 0 { - switch t.str[0] { - case 'f': - return string(jsonwire.AppendFloat(nil, math.Float64frombits(t.num), 64)), nil - case 'i': - return strconv.FormatInt(int64(t.num), 10), nil - case 'u': - return strconv.FormatUint(uint64(t.num), 10), nil - } + if t.raw.isRaw() { + return "", t.raw.bytes() } return "", nil } @@ -277,193 +290,122 @@ func (t Token) string() (string, []byte) { // appendNumber appends a JSON number to dst and returns it. // It panics if t is not a JSON number. func (t Token) appendNumber(dst []byte, flags *jsonflags.Flags) ([]byte, error) { - if raw := t.raw; raw != nil { + if t.raw.isRaw() { // Handle raw number value. - buf := raw.previousBuffer() + buf := t.raw.dBuf.previousBuffer() if Kind(buf[0]).normalize() == '0' { dst, _, err := jsonwire.ReformatNumber(dst, buf, flags) return dst, err } - } else if t.num != 0 { + } else { // Handle exact number value. - switch t.str[0] { - case 'f': - return jsonwire.AppendFloat(dst, math.Float64frombits(t.num), 64), nil - case 'i': - return strconv.AppendInt(dst, int64(t.num), 10), nil - case 'u': - return strconv.AppendUint(dst, uint64(t.num), 10), nil + switch t.raw.dBuf { + case floatTag: + v := math.Float64frombits(t.raw.num) + if math.IsInf(v, 0) || math.IsNaN(v) { + return nil, fmt.Errorf("unsupported value: %v", v) + } + return jsonwire.AppendFloat(dst, v, 64), nil + case intTag: + return strconv.AppendInt(dst, int64(t.raw.num), 10), nil + case uintTag: + return strconv.AppendUint(dst, uint64(t.raw.num), 10), nil } } panic("invalid JSON token kind: " + t.Kind().String()) } -// Float returns the floating-point value for a JSON number. +var ErrUnexpectedKind = errors.New("unexpected JSON token kind") + +// ParseFloat parses the floating-point value for a JSON number. // It returns a NaN, +Inf, or -Inf value for any JSON string // with the values "NaN", "Infinity", or "-Infinity". -// It panics for all other cases. -func (t Token) Float() float64 { - if raw := t.raw; raw != nil { - // Handle raw number value. - if uint64(raw.previousOffsetStart()) != t.num { - panic(invalidTokenPanic) - } - buf := raw.previousBuffer() - if Kind(buf[0]).normalize() == '0' { - fv, _ := jsonwire.ParseFloat(buf, 64) - return fv - } - } else if t.num != 0 { - // Handle exact number value. - switch t.str[0] { - case 'f': - return math.Float64frombits(t.num) - case 'i': - return float64(int64(t.num)) - case 'u': - return float64(uint64(t.num)) - } +func (t RawToken) ParseFloat(bits int) (float64, error) { + t.ensureValid() + buf := t.dBuf.previousBuffer() + if Kind(buf[0]).normalize() == '0' { + return jsonwire.ParseFloat(buf, bits) } + return 0., ErrUnexpectedKind +} - // Handle string values with "NaN", "Infinity", or "-Infinity". - if t.Kind() == '"' { - switch t.String() { - case "NaN": - return math.NaN() - case "Infinity": - return math.Inf(+1) - case "-Infinity": - return math.Inf(-1) - } +// Float returns the floating-point value for a JSON number. +// It panics if the token is not created with [Float]. +func (t Token) Float() float64 { + if t.raw.dBuf == floatTag { + return math.Float64frombits(t.raw.num) } + panic("JSON token not created with Float") +} - panic("invalid JSON token kind: " + t.Kind().String()) +func (t RawToken) ParseInt(bits int) (int64, error) { + t.ensureValid() + buf := t.dBuf.previousBuffer() + if Kind(buf[0]).normalize() == '0' { + return jsonwire.ParseInt(buf, bits) + } + return 0, ErrUnexpectedKind } // Int returns the signed integer value for a JSON number. -// The fractional component of any number is ignored (truncation toward zero). -// Any number beyond the representation of an int64 will be saturated -// to the closest representable value. -// It panics if the token kind is not a JSON number. +// It panics if the token is not created with [Int]. func (t Token) Int() int64 { - if raw := t.raw; raw != nil { - // Handle raw integer value. - if uint64(raw.previousOffsetStart()) != t.num { - panic(invalidTokenPanic) - } - neg := false - buf := raw.previousBuffer() - if len(buf) > 0 && buf[0] == '-' { - neg, buf = true, buf[1:] - } - if numAbs, ok := jsonwire.ParseUint(buf); ok { - if neg { - if numAbs > -minInt64 { - return minInt64 - } - return -1 * int64(numAbs) - } else { - if numAbs > +maxInt64 { - return maxInt64 - } - return +1 * int64(numAbs) - } - } - } else if t.num != 0 { - // Handle exact integer value. - switch t.str[0] { - case 'i': - return int64(t.num) - case 'u': - if t.num > maxInt64 { - return maxInt64 - } - return int64(t.num) - } + if t.raw.dBuf == intTag { + return int64(t.raw.num) } + panic("JSON token not created with Int") +} - // Handle JSON number that is a floating-point value. - if t.Kind() == '0' { - switch fv := t.Float(); { - case fv >= maxInt64: - return maxInt64 - case fv <= minInt64: - return minInt64 - default: - return int64(fv) // truncation toward zero - } +func (t RawToken) ParseUint(bits int) (uint64, error) { + t.ensureValid() + buf := t.dBuf.previousBuffer() + if Kind(buf[0]).normalize() == '0' { + return jsonwire.ParseUint(buf, bits) } - - panic("invalid JSON token kind: " + t.Kind().String()) + return 0, ErrUnexpectedKind } // Uint returns the unsigned integer value for a JSON number. -// The fractional component of any number is ignored (truncation toward zero). -// Any number beyond the representation of an uint64 will be saturated -// to the closest representable value. -// It panics if the token kind is not a JSON number. +// It panics if the token is not created with [Uint]. func (t Token) Uint() uint64 { - // NOTE: This accessor returns 0 for any negative JSON number, - // which might be surprising, but is at least consistent with the behavior - // of saturating out-of-bounds numbers to the closest representable number. - - if raw := t.raw; raw != nil { - // Handle raw integer value. - if uint64(raw.previousOffsetStart()) != t.num { - panic(invalidTokenPanic) - } - neg := false - buf := raw.previousBuffer() - if len(buf) > 0 && buf[0] == '-' { - neg, buf = true, buf[1:] - } - if num, ok := jsonwire.ParseUint(buf); ok { - if neg { - return minUint64 - } - return num - } - } else if t.num != 0 { - // Handle exact integer value. - switch t.str[0] { - case 'u': - return t.num - case 'i': - if int64(t.num) < minUint64 { - return minUint64 - } - return uint64(int64(t.num)) - } + if t.raw.dBuf == uintTag { + return t.raw.num } + panic("JSON token not created with Uint") +} - // Handle JSON number that is a floating-point value. - if t.Kind() == '0' { - switch fv := t.Float(); { - case fv >= maxUint64: - return maxUint64 - case fv <= minUint64: - return minUint64 - default: - return uint64(fv) // truncation toward zero - } +// Float returns the RawToken embedded. +// It panics if the token is not created with [Raw]. +func (t Token) Raw() RawToken { + if t.raw.isRaw() { + return t.raw } + panic("JSON token not created with Raw") +} - panic("invalid JSON token kind: " + t.Kind().String()) +// Kind returns the token kind. +func (t RawToken) Kind() Kind { + if t.dBuf == nil { // for zero value RawToken + return invalidKind + } + t.ensureValid() + return Kind(t.dBuf.buf[t.dBuf.prevStart]).normalize() } // Kind returns the token kind. func (t Token) Kind() Kind { switch { - case t.raw != nil: - raw := t.raw - if uint64(raw.previousOffsetStart()) != t.num { - panic(invalidTokenPanic) - } - return Kind(t.raw.buf[raw.prevStart]).normalize() - case t.num != 0: + case t.raw.dBuf == nil: + return invalidKind // zero value Token + case t.raw.isRaw(): + return t.raw.Kind() + case t.raw.dBuf == intTag || t.raw.dBuf == uintTag || t.raw.dBuf == floatTag: + // For NaN and Inf, we still return '0' as the Kind + // even if it will be encoded as a string. + // We don't want to use this for object key, right? return '0' - case len(t.str) != 0: + case t.raw.dBuf == strTag: return '"' default: return invalidKind diff --git a/jsontext/token_test.go b/jsontext/token_test.go index 2180b6a..146a7bb 100644 --- a/jsontext/token_test.go +++ b/jsontext/token_test.go @@ -7,9 +7,17 @@ package jsontext import ( "math" "reflect" + "strconv" "testing" ) +const ( + maxInt64 = math.MaxInt64 + minInt64 = math.MinInt64 + maxUint64 = math.MaxUint64 + minUint64 = 0 // for consistency and readability purposes +) + func TestTokenStringAllocations(t *testing.T) { if testing.CoverMode() != "" { t.Skip("coverage mode breaks the compiler optimization this depends on") @@ -56,35 +64,23 @@ func TestTokenAccessors(t *testing.T) { {String(""), token{String: "", Kind: '"'}}, {String("hello, world!"), token{String: "hello, world!", Kind: '"'}}, {rawToken(`"hello, world!"`), token{String: "hello, world!", Kind: '"'}}, - {Float(0), token{String: "0", Float: 0, Int: 0, Uint: 0, Kind: '0'}}, + {Float(0), token{String: "0", Float: 0, Kind: '0'}}, + {Float(1.2), token{String: "1.2", Float: 1.2, Kind: '0'}}, {Float(math.Copysign(0, -1)), token{String: "-0", Float: math.Copysign(0, -1), Int: 0, Uint: 0, Kind: '0'}}, - {Float(math.NaN()), token{String: "NaN", Float: math.NaN(), Int: 0, Uint: 0, Kind: '"'}}, - {Float(math.Inf(+1)), token{String: "Infinity", Float: math.Inf(+1), Kind: '"'}}, - {Float(math.Inf(-1)), token{String: "-Infinity", Float: math.Inf(-1), Kind: '"'}}, - {Int(minInt64), token{String: "-9223372036854775808", Float: minInt64, Int: minInt64, Uint: minUint64, Kind: '0'}}, - {Int(minInt64 + 1), token{String: "-9223372036854775807", Float: minInt64 + 1, Int: minInt64 + 1, Uint: minUint64, Kind: '0'}}, - {Int(-1), token{String: "-1", Float: -1, Int: -1, Uint: minUint64, Kind: '0'}}, - {Int(0), token{String: "0", Float: 0, Int: 0, Uint: 0, Kind: '0'}}, - {Int(+1), token{String: "1", Float: +1, Int: +1, Uint: +1, Kind: '0'}}, - {Int(maxInt64 - 1), token{String: "9223372036854775806", Float: maxInt64 - 1, Int: maxInt64 - 1, Uint: maxInt64 - 1, Kind: '0'}}, - {Int(maxInt64), token{String: "9223372036854775807", Float: maxInt64, Int: maxInt64, Uint: maxInt64, Kind: '0'}}, + {Float(math.NaN()), token{String: "NaN", Float: math.NaN(), Int: 0, Uint: 0, Kind: '0'}}, + {Float(math.Inf(+1)), token{String: "+Inf", Float: math.Inf(+1), Kind: '0'}}, + {Float(math.Inf(-1)), token{String: "-Inf", Float: math.Inf(-1), Kind: '0'}}, + {Int(minInt64), token{String: "-9223372036854775808", Int: minInt64, Uint: minUint64, Kind: '0'}}, + {Int(minInt64 + 1), token{String: "-9223372036854775807", Int: minInt64 + 1, Kind: '0'}}, + {Int(-1), token{String: "-1", Int: -1, Kind: '0'}}, + {Int(0), token{String: "0", Int: 0, Kind: '0'}}, + {Int(+1), token{String: "1", Int: +1, Kind: '0'}}, + {Int(maxInt64 - 1), token{String: "9223372036854775806", Int: maxInt64 - 1, Kind: '0'}}, + {Int(maxInt64), token{String: "9223372036854775807", Int: maxInt64, Kind: '0'}}, {Uint(minUint64), token{String: "0", Kind: '0'}}, - {Uint(minUint64 + 1), token{String: "1", Float: minUint64 + 1, Int: minUint64 + 1, Uint: minUint64 + 1, Kind: '0'}}, - {Uint(maxUint64 - 1), token{String: "18446744073709551614", Float: maxUint64 - 1, Int: maxInt64, Uint: maxUint64 - 1, Kind: '0'}}, - {Uint(maxUint64), token{String: "18446744073709551615", Float: maxUint64, Int: maxInt64, Uint: maxUint64, Kind: '0'}}, - {rawToken(`-0`), token{String: "-0", Float: math.Copysign(0, -1), Int: 0, Uint: 0, Kind: '0'}}, - {rawToken(`1e1000`), token{String: "1e1000", Float: math.MaxFloat64, Int: maxInt64, Uint: maxUint64, Kind: '0'}}, - {rawToken(`-1e1000`), token{String: "-1e1000", Float: -math.MaxFloat64, Int: minInt64, Uint: minUint64, Kind: '0'}}, - {rawToken(`0.1`), token{String: "0.1", Float: 0.1, Int: 0, Uint: 0, Kind: '0'}}, - {rawToken(`0.5`), token{String: "0.5", Float: 0.5, Int: 0, Uint: 0, Kind: '0'}}, - {rawToken(`0.9`), token{String: "0.9", Float: 0.9, Int: 0, Uint: 0, Kind: '0'}}, - {rawToken(`1.1`), token{String: "1.1", Float: 1.1, Int: 1, Uint: 1, Kind: '0'}}, - {rawToken(`-0.1`), token{String: "-0.1", Float: -0.1, Int: 0, Uint: 0, Kind: '0'}}, - {rawToken(`-0.5`), token{String: "-0.5", Float: -0.5, Int: 0, Uint: 0, Kind: '0'}}, - {rawToken(`-0.9`), token{String: "-0.9", Float: -0.9, Int: 0, Uint: 0, Kind: '0'}}, - {rawToken(`-1.1`), token{String: "-1.1", Float: -1.1, Int: -1, Uint: 0, Kind: '0'}}, - {rawToken(`99999999999999999999`), token{String: "99999999999999999999", Float: 1e20 - 1, Int: maxInt64, Uint: maxUint64, Kind: '0'}}, - {rawToken(`-99999999999999999999`), token{String: "-99999999999999999999", Float: -1e20 - 1, Int: minInt64, Uint: minUint64, Kind: '0'}}, + {Uint(minUint64 + 1), token{String: "1", Uint: minUint64 + 1, Kind: '0'}}, + {Uint(maxUint64 - 1), token{String: "18446744073709551614", Uint: maxUint64 - 1, Kind: '0'}}, + {Uint(maxUint64), token{String: "18446744073709551615", Uint: maxUint64, Kind: '0'}}, } for _, tt := range tests { @@ -132,6 +128,46 @@ func TestTokenAccessors(t *testing.T) { } } +func TestTokenAccessorRaw(t *testing.T) { + if !reflect.DeepEqual(False, Raw(False.Raw())) { + t.Error("False != Raw(False.Raw())") + } + + raw := func() *RawToken { + defer func() { recover() }() + raw := Float(0.).Raw() + return &raw + }() + if raw != nil { + t.Error("Float(0.).Raw() should panic") + } +} + +func assertParse[T comparable](t *testing.T, s string, parse func(t RawToken, bits int) (T, error), wantV T, wantErr error) { + t.Helper() + gotV, gotErr := parse(rawToken(s).raw, 64) + if gotV != wantV { + t.Errorf("RawToken.ParseXXX(64) = %v, want %v", gotV, wantV) + } + if gotErr != wantErr { + t.Errorf("RawToken.ParseXXX(64) error = %v, want %v", gotErr, wantErr) + } +} + +func TestTokenParseNumber(t *testing.T) { + assertParse(t, `1.23`, RawToken.ParseFloat, 1.23, nil) + assertParse(t, `1e1000`, RawToken.ParseFloat, math.Inf(+1), strconv.ErrRange) + assertParse(t, `"anything"`, RawToken.ParseFloat, 0, ErrUnexpectedKind) + + assertParse(t, "123", RawToken.ParseInt, 123, nil) + assertParse(t, "99999999999999999999", RawToken.ParseInt, math.MaxInt64, strconv.ErrRange) + assertParse(t, "false", RawToken.ParseInt, 0, ErrUnexpectedKind) + + assertParse(t, "123", RawToken.ParseUint, 123, nil) + assertParse(t, "-1", RawToken.ParseUint, 0, strconv.ErrSyntax) + assertParse(t, "false", RawToken.ParseUint, 0, ErrUnexpectedKind) +} + func TestTokenClone(t *testing.T) { tests := []struct { in Token @@ -157,7 +193,7 @@ func TestTokenClone(t *testing.T) { if !reflect.DeepEqual(got, tt.in) { t.Errorf("Token(%s) == Token(%s).Clone() = false, want true", tt.in, tt.in) } - gotExactRaw := got.raw == tt.in.raw + gotExactRaw := got.raw.dBuf == tt.in.raw.dBuf if gotExactRaw != tt.wantExactRaw { t.Errorf("Token(%s).raw == Token(%s).Clone().raw = %v, want %v", tt.in, tt.in, gotExactRaw, tt.wantExactRaw) } diff --git a/v1/stream.go b/v1/stream.go index 03c12e1..4cf3c5e 100644 --- a/v1/stream.go +++ b/v1/stream.go @@ -206,7 +206,7 @@ func (dec *Decoder) Token() (Token, error) { if useNumber, _ := jsonv2.GetOption(dec.opts, unmarshalAnyWithRawNumber); useNumber { return Number(tok.String()), nil } - return tok.Float(), nil + return tok.ParseFloat(64) case '{', '}', '[', ']': return Delim(k), nil default: