Skip to content

Commit 3d94d83

Browse files
TocarIPcopybara-github
authored andcommitted
Optimize ReadPackedVarint
Count the number of varints before allocating in ReadPackedVarint. This is pretty cheap, so we save more on avoiding realloctions. BM_ParsePacked/1 524.2Mi ± 2% 532.2Mi ± 2% +1.53% (p=0.041 n=6) BM_ParsePacked/8 758.2Mi ± 2% 1163.7Mi ± 1% +53.48% (p=0.002 n=6) BM_ParsePacked/64 1.582Gi ± 2% 2.656Gi ± 2% +67.94% (p=0.002 n=6) BM_ParsePacked/512 2.014Gi ± 3% 3.377Gi ± 4% +67.66% (p=0.002 n=6) BM_ParsePacked/4k 2.009Gi ± 2% 3.358Gi ± 2% +67.10% (p=0.002 n=6) BM_ParsePacked/32k 1.882Gi ± 2% 3.151Gi ± 3% +67.46% (p=0.002 n=6) BM_ParsePacked/256k 1.338Gi ± 15% 2.408Gi ± 8% +80.03% (p=0.002 n=6) Memory usage is also 25% lower BM_ParsePacked/1 192.0 ± 0% 192.0 ± 0% ~ (p=1.000 n=6) ¹ BM_ParsePacked/8 2.048k ± 0% 1.568k ± 0% -23.44% (p=0.002 n=6) BM_ParsePacked/64 16.19k ± 0% 12.05k ± 0% -25.59% (p=0.002 n=6) BM_ParsePacked/512 130.69k ± 0% 98.69k ± 0% -24.49% (p=0.002 n=6) BM_ParsePacked/4k 1048.6k ± 0% 795.9k ± 0% -24.10% (p=0.002 n=6) BM_ParsePacked/32k 8.045M ± 0% 5.677M ± 0% -29.43% (p=0.002 n=6) BM_ParsePacked/256k 64.01M ± 0% 44.67M ± 0% -30.21% (p=0.002 n=6) PiperOrigin-RevId: 813430457
1 parent 76c2b20 commit 3d94d83

File tree

2 files changed

+99
-14
lines changed

2 files changed

+99
-14
lines changed

‎src/google/protobuf/generated_message_tctable_lite.cc‎

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1225,20 +1225,23 @@ PROTOBUF_ALWAYS_INLINE const char* TcParser::PackedVarint(
12251225
// Since ctx->ReadPackedVarint does not use TailCall or Return, sync any
12261226
// pending hasbits now:
12271227
SyncHasbits(msg, hasbits, table);
1228-
auto* field = &RefAt<RepeatedField<FieldType>>(msg, data.offset());
1229-
return ctx->ReadPackedVarint(ptr, [field](uint64_t varint) {
1230-
FieldType val;
1231-
if (zigzag) {
1232-
if (sizeof(FieldType) == 8) {
1233-
val = WireFormatLite::ZigZagDecode64(varint);
1234-
} else {
1235-
val = WireFormatLite::ZigZagDecode32(varint);
1236-
}
1237-
} else {
1238-
val = varint;
1239-
}
1240-
field->Add(val);
1241-
});
1228+
auto& field = RefAt<RepeatedField<FieldType>>(msg, data.offset());
1229+
return ctx->ReadPackedVarintWithField(
1230+
ptr,
1231+
[field](uint64_t varint) {
1232+
FieldType val;
1233+
if (zigzag) {
1234+
if (sizeof(FieldType) == 8) {
1235+
val = WireFormatLite::ZigZagDecode64(varint);
1236+
} else {
1237+
val = WireFormatLite::ZigZagDecode32(varint);
1238+
}
1239+
} else {
1240+
val = varint;
1241+
}
1242+
return val;
1243+
},
1244+
field);
12421245
}
12431246

12441247
PROTOBUF_NOINLINE const char* TcParser::FastV8P1(PROTOBUF_TC_PARAM_DECL) {

‎src/google/protobuf/parse_context.h‎

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,11 @@ class PROTOBUF_EXPORT EpsCopyInputStream {
260260
template <typename Add, typename SizeCb>
261261
[[nodiscard]] const char* ReadPackedVarint(const char* ptr, Add add,
262262
SizeCb size_callback);
263+
// Same as above, but pass the field directly , so we can preallocate.
264+
template <typename Convert, typename T>
265+
[[nodiscard]] const char* ReadPackedVarintWithField(const char* ptr,
266+
Convert conv,
267+
RepeatedField<T>& out);
263268

264269
uint32_t LastTag() const { return last_tag_minus_1_ + 1; }
265270
bool ConsumeEndGroup(uint32_t start_tag) {
@@ -1362,6 +1367,83 @@ const char* ReadPackedVarintArray(const char* ptr, const char* end, Add add) {
13621367
return ptr;
13631368
}
13641369

1370+
template <typename Convert, typename T>
1371+
const char* ReadPackedVarintArrayWithField(const char* ptr, const char* end,
1372+
Convert conv,
1373+
RepeatedField<T>& out) {
1374+
// If we have enough bytes, we will spend more cpu cycles growing repeated
1375+
// field, than parsing, so count the number of ints first and preallocate.
1376+
// Assume that varint are valid and just count the number of bytes with
1377+
// continuation bit not set. In a valid varint there is only 1 such byte.
1378+
if ((end - ptr) >= 16 && (out.Capacity() - out.size() < end - ptr)) {
1379+
int old_size = out.size();
1380+
int count = out.Capacity() - out.size();
1381+
// We are not guaranteed to have enough space for worst possible case,
1382+
// do an actual count and reserve.
1383+
if (count < end - ptr) {
1384+
count = std::count_if(ptr, end, [](char c) { return (c & 0x80) == 0; });
1385+
// We can overread, so if the last byte has a continuation bit set,
1386+
// we need to account for that.
1387+
if (end[-1] & 0x80) count++;
1388+
out.Reserve(old_size + count);
1389+
}
1390+
T* x = out.AddNAlreadyReserved(count);
1391+
ptr = ReadPackedVarintArray(ptr, end, [&](uint64_t varint) {
1392+
*x = conv(varint);
1393+
x++;
1394+
});
1395+
int new_size = x - out.data();
1396+
ABSL_DCHECK_LE(new_size, old_size + count);
1397+
// We may have overreserved if there was enough capacitiy.
1398+
// Or encountered malformed data, so set the actaul size to
1399+
// avoid exposing uninitialized memory.
1400+
out.Truncate(new_size);
1401+
return ptr;
1402+
} else {
1403+
return ReadPackedVarintArray(
1404+
ptr, end, [&](uint64_t varint) { out.Add(conv(varint)); });
1405+
}
1406+
}
1407+
1408+
template <typename Convert, typename T>
1409+
const char* EpsCopyInputStream::ReadPackedVarintWithField(
1410+
const char* ptr, Convert conv, RepeatedField<T>& out) {
1411+
int size = ReadSize(&ptr);
1412+
1413+
GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
1414+
int chunk_size = static_cast<int>(buffer_end_ - ptr);
1415+
while (size > chunk_size) {
1416+
ptr = ReadPackedVarintArrayWithField(ptr, buffer_end_, conv, out);
1417+
if (ptr == nullptr) return nullptr;
1418+
int overrun = static_cast<int>(ptr - buffer_end_);
1419+
ABSL_DCHECK(overrun >= 0 && overrun <= kSlopBytes);
1420+
if (size - chunk_size <= kSlopBytes) {
1421+
// The current buffer contains all the information needed, we don't need
1422+
// to flip buffers. However we must parse from a buffer with enough space
1423+
// so we are not prone to a buffer overflow.
1424+
char buf[kSlopBytes + 10] = {};
1425+
std::memcpy(buf, buffer_end_, kSlopBytes);
1426+
ABSL_CHECK_LE(size - chunk_size, kSlopBytes);
1427+
auto end = buf + (size - chunk_size);
1428+
auto result = ReadPackedVarintArray(
1429+
buf + overrun, end, [&](uint64_t varint) { out.Add(conv(varint)); });
1430+
if (result == nullptr || result != end) return nullptr;
1431+
return buffer_end_ + (result - buf);
1432+
}
1433+
size -= overrun + chunk_size;
1434+
ABSL_DCHECK_GT(size, 0);
1435+
// We must flip buffers
1436+
if (limit_ <= kSlopBytes) return nullptr;
1437+
ptr = Next();
1438+
if (ptr == nullptr) return nullptr;
1439+
ptr += overrun;
1440+
chunk_size = static_cast<int>(buffer_end_ - ptr);
1441+
}
1442+
auto end = ptr + size;
1443+
ptr = ReadPackedVarintArrayWithField(ptr, end, conv, out);
1444+
return end == ptr ? ptr : nullptr;
1445+
}
1446+
13651447
template <typename Add, typename SizeCb>
13661448
const char* EpsCopyInputStream::ReadPackedVarint(const char* ptr, Add add,
13671449
SizeCb size_callback) {

0 commit comments

Comments
 (0)