Skip to content

Add object iterator that avoids string allocation #343

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 119 additions & 2 deletions benchmarks/jsoniter_large_file_test.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package test

import (
"bytes"
"encoding/json"
"github.com/json-iterator/go"
"github.com/apersson/json-iterator-go"
"io"
"io/ioutil"
"os"
"testing"
Expand All @@ -24,6 +26,99 @@ import (
// }
//}

const rawObject = `
{
"id": "d50887ca-a6ce-4e59-b89f-14f0b5d03b03",
"name": {
"fullName": "Leonid Bugaev",
"givenName": "Leonid",
"familyName": "Bugaev"
},
"email": "leonsbox@gmail.com",
"gender": "male",
"location": "Saint Petersburg, Saint Petersburg, RU",
"geo": {
"city": "Saint Petersburg",
"state": "Saint Petersburg",
"country": "Russia",
"lat": 59.9342802,
"lng": 30.3350986
},
"bio": "Senior engineer at Granify.com",
"site": "http://flickfaver.com",
"avatar": "https://d1ts43dypk8bqh.cloudfront.net/v1/avatars/d50887ca-a6ce-4e59-b89f-14f0b5d03b03",
"employment": {
"name": "www.latera.ru",
"title": "Software Engineer",
"domain": "gmail.com"
},
"facebook": {
"handle": "leonid.bugaev"
},
"github": {
"handle": "buger",
"id": 14009,
"avatar": "https://avatars.githubusercontent.com/u/14009?v=3",
"company": "Granify",
"blog": "http://leonsbox.com",
"followers": 95,
"following": 10
},
"twitter": {
"handle": "flickfaver",
"id": 77004410,
"bio": null,
"followers": 2,
"following": 1,
"statuses": 5,
"favorites": 0,
"location": "",
"site": "http://flickfaver.com",
"avatar": null
},
"linkedin": {
"handle": "in/leonidbugaev"
},
"googleplus": {
"handle": null
},
"angellist": {
"handle": "leonid-bugaev",
"id": 61541,
"bio": "Senior engineer at Granify.com",
"blog": "http://buger.github.com",
"site": "http://buger.github.com",
"followers": 41,
"avatar": "https://d1qb2nb5cznatu.cloudfront.net/users/61541-medium_jpg?1405474390"
},
"klout": {
"handle": null,
"score": null
},
"foursquare": {
"handle": null
},
"aboutme": {
"handle": "leonid.bugaev",
"bio": null,
"avatar": null
},
"gravatar": {
"handle": "buger",
"urls": [
],
"avatar": "http://1.gravatar.com/avatar/f7c8edd577d13b8930d5522f28123510",
"avatars": [
{
"url": "http://1.gravatar.com/avatar/f7c8edd577d13b8930d5522f28123510",
"type": "thumbnail"
}
]
},
"fuzzy": false
}
`

func init() {
ioutil.WriteFile("/tmp/large-file.json", []byte(`[{
"person": {
Expand Down Expand Up @@ -132,7 +227,10 @@ func Benchmark_jsoniter_large_file(b *testing.B) {
count := 0
iter.ReadArrayCB(func(iter *jsoniter.Iterator) bool {
// Skip() is strict by default, use --tags jsoniter-sloppy to skip without validation
iter.Skip()
iter.ReadObjectCB(func(iter *jsoniter.Iterator, s string) bool {
iter.Skip()
return true
})
count++
return true
})
Expand All @@ -143,6 +241,25 @@ func Benchmark_jsoniter_large_file(b *testing.B) {
}
}

func Benchmark_jsoniter_object(b *testing.B) {
b.ReportAllocs()

br := bytes.NewReader([]byte(rawObject))
count := 0
for n := 0; n < b.N; n++ {
br.Seek(0, io.SeekStart)
iter := jsoniter.Parse(jsoniter.ConfigDefault, br, 4096)
iter.ReadObjectCBWithSliceKey(func(iter *jsoniter.Iterator, s []byte) bool {
iter.Skip()
count++
return true
})
if iter.Error != nil {
b.Error(iter.Error)
}
}
}

func Benchmark_json_large_file(b *testing.B) {
b.ReportAllocs()
for n := 0; n < b.N; n++ {
Expand Down
20 changes: 15 additions & 5 deletions iter_object.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,15 +107,18 @@ func calcHash(str string, caseSensitive bool) int64 {
return int64(hash)
}

// ReadObjectCB read object with callback, the key is ascii only and field name not copied
func (iter *Iterator) ReadObjectCB(callback func(*Iterator, string) bool) bool {
// ReadObjectCBWithoutCopy read object with callback, the key is ascii only,
// and it will be changed in the next iterator call. The callback function must
// make a copy of the key if needs to live beyond the scope of the callback.
func (iter *Iterator) ReadObjectCBWithoutCopy(callback func(*Iterator,
[]byte) bool) bool {
c := iter.nextToken()
var field string
var field []byte
if c == '{' {
c = iter.nextToken()
if c == '"' {
iter.unreadByte()
field = iter.ReadString()
field = iter.ReadStringAsSlice()
c = iter.nextToken()
if c != ':' {
iter.ReportError("ReadObject", "expect : after object field, but found "+string([]byte{c}))
Expand All @@ -125,7 +128,7 @@ func (iter *Iterator) ReadObjectCB(callback func(*Iterator, string) bool) bool {
}
c = iter.nextToken()
for c == ',' {
field = iter.ReadString()
field = iter.ReadStringAsSlice()
c = iter.nextToken()
if c != ':' {
iter.ReportError("ReadObject", "expect : after object field, but found "+string([]byte{c}))
Expand Down Expand Up @@ -155,6 +158,13 @@ func (iter *Iterator) ReadObjectCB(callback func(*Iterator, string) bool) bool {
return false
}

// ReadObjectCB read object with callback, the key is ascii only.
func (iter *Iterator) ReadObjectCB(callback func(*Iterator, string) bool) bool {
return iter.ReadObjectCBNoCopy(func(it *Iterator, field []byte) bool {
return callback(it, string(field))
})
}

// ReadMapCB read map with callback, the key can be any string
func (iter *Iterator) ReadMapCB(callback func(*Iterator, string) bool) bool {
c := iter.nextToken()
Expand Down