grafana
diff --git a/‎pkg/dataobj/internal/dataset/column.go
Lines changed: 25 additions & 0 deletions b/‎pkg/dataobj/internal/dataset/column.go
Lines changed: 25 additions & 0 deletions
diff --git a/‎pkg/dataobj/internal/dataset/column_builder.go
Lines changed: 173 additions & 0 deletions b/‎pkg/dataobj/internal/dataset/column_builder.go
Lines changed: 173 additions & 0 deletions
diff --git a/‎pkg/dataobj/internal/dataset/column_iter.go
Lines changed: 20 additions & 0 deletions b/‎pkg/dataobj/internal/dataset/column_iter.go
Lines changed: 20 additions & 0 deletions
diff --git a/‎pkg/dataobj/internal/dataset/column_test.go
Lines changed: 61 additions & 0 deletions b/‎pkg/dataobj/internal/dataset/column_test.go
Lines changed: 61 additions & 0 deletions
diff --git a/‎pkg/dataobj/internal/dataset/page.go
Lines changed: 99 additions & 0 deletions b/‎pkg/dataobj/internal/dataset/page.go
Lines changed: 99 additions & 0 deletions
@@ -0,0 +1,25 @@
+package dataset
+
+import "github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd"
+
+// Helper types.
+type (
+	// ColumnInfo describes a column.
+	ColumnInfo struct {
+		Name        string                    // Name of the column, if any.
+		Type        datasetmd.ValueType       // Type of values in the column.
+		Compression datasetmd.CompressionType // Compression used for the column.
+
+		RowsCount        int // Total number of rows in the column.
+		CompressedSize   int // Total size of all pages in the column after compression.
+		UncompressedSize int // Total size of all pages in the column before compression.
+
+		Statistics *datasetmd.Statistics // Optional statistics for the column.
+	}
+)
+
+// MemColumn holds a set of pages of a common type.
+type MemColumn struct {
+	Info  ColumnInfo // Information about the column.
+	Pages []*MemPage // The set of pages in the column.
+}
@@ -0,0 +1,173 @@
+package dataset
+
+import (
+	"fmt"
+
+	"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd"
+)
+
+// BuilderOptions configures common settings for building pages.
+type BuilderOptions struct {
+	// PageSizeHint is the soft limit for the size of the page. Builders try to
+	// fill pages as close to this size as possible, but the actual size may be
+	// slightly larger or smaller.
+	PageSizeHint int
+
+	// Value is the value type of data to write.
+	Value datasetmd.ValueType
+
+	// Encoding is the encoding algorithm to use for values.
+	Encoding datasetmd.EncodingType
+
+	// Compression is the compression algorithm to use for values.
+	Compression datasetmd.CompressionType
+}
+
+// A ColumnBuilder builds a sequence of [Value] entries of a common type into a
+// column. Values are accumulated into a buffer and then flushed into
+// [MemPage]s once the size of data exceeds a configurable limit.
+type ColumnBuilder struct {
+	name string
+	opts BuilderOptions
+
+	rows int // Total number of rows in the column.
+
+	pages   []*MemPage
+	builder *pageBuilder
+}
+
+// NewColumnBuilder creates a new ColumnBuilder from the optional name and
+// provided options. NewColumnBuilder returns an error if the options are
+// invalid.
+func NewColumnBuilder(name string, opts BuilderOptions) (*ColumnBuilder, error) {
+	builder, err := newPageBuilder(opts)
+	if err != nil {
+		return nil, fmt.Errorf("creating page builder: %w", err)
+	}
+
+	return &ColumnBuilder{
+		name: name,
+		opts: opts,
+
+		builder: builder,
+	}, nil
+}
+
+// Append adds a new value into cb with the given zero-indexed row number. If
+// the row number is higher than the current number of rows in cb, null values
+// are added up to the new row.
+//
+// Append returns an error if the row number is out-of-order.
+func (cb *ColumnBuilder) Append(row int, value Value) error {
+	if row < cb.rows {
+		return fmt.Errorf("row %d is older than current row %d", row, cb.rows)
+	}
+
+	// We give two attempts to append the data to the buffer; if the buffer is
+	// full, we cut a page and then append to the newly reset buffer.
+	//
+	// The second iteration should never fail, as the buffer will always be empty
+	// then.
+	for range 2 {
+		if cb.append(row, value) {
+			cb.rows = row + 1
+			return nil
+		}
+
+		cb.flushPage()
+	}
+
+	panic("ColumnBuilder.Append: failed to append value to fresh buffer")
+}
+
+// Backfill adds NULLs into cb up to (but not including) the provided row
+// number. If values exist up to the provided row number, Backfill does
+// nothing.
+func (cb *ColumnBuilder) Backfill(row int) {
+	// We give two attempts to append the data to the buffer; if the buffer is
+	// full, we cut a page and then append again to the newly reset buffer.
+	//
+	// The second iteration should never fail, as the buffer will always be
+	// empty.
+	for range 2 {
+		if cb.backfill(row) {
+			return
+		}
+		cb.flushPage()
+	}
+
+	panic("ColumnBuilder.Backfill: failed to backfill buffer")
+}
+
+func (cb *ColumnBuilder) backfill(row int) bool {
+	for row > cb.rows {
+		if !cb.builder.AppendNull() {
+			return false
+		}
+		cb.rows++
+	}
+
+	return true
+}
+
+func (cb *ColumnBuilder) append(row int, value Value) bool {
+	// Backfill up to row.
+	if !cb.backfill(row) {
+		return false
+	}
+	return cb.builder.Append(value)
+}
+
+// Flush converts data in cb into a [MemColumn]. Afterwards, cb is reset to a
+// fresh state and can be reused.
+func (cb *ColumnBuilder) Flush() (*MemColumn, error) {
+	cb.flushPage()
+
+	info := ColumnInfo{
+		Name: cb.name,
+		Type: cb.opts.Value,
+
+		Compression: cb.opts.Compression,
+	}
+
+	// TODO(rfratto): Should we compute column-wide statistics if they're
+	// available in pages?
+	//
+	// That would potentially work for min/max values, but not for count
+	// distinct, unless we had a way to pass sketches around.
+
+	for _, page := range cb.pages {
+		info.RowsCount += page.Info.RowCount
+		info.CompressedSize += page.Info.CompressedSize
+		info.UncompressedSize += page.Info.UncompressedSize
+	}
+
+	column := &MemColumn{
+		Info:  info,
+		Pages: cb.pages,
+	}
+
+	cb.Reset()
+	return column, nil
+}
+
+func (cb *ColumnBuilder) flushPage() {
+	if cb.builder.Rows() == 0 {
+		return
+	}
+
+	page, err := cb.builder.Flush()
+	if err != nil {
+		// Flush should only return an error when it's empty, which we already
+		// ensure it's not in the lines above.
+		panic(fmt.Sprintf("failed to flush page: %s", err))
+	}
+	cb.pages = append(cb.pages, page)
+}
+
+// Reset clears all data in cb and resets it to a fresh state.
+func (cb *ColumnBuilder) Reset() {
+	cb.rows = 0
+	cb.pages = nil
+	cb.builder.Reset()
+}
@@ -0,0 +1,20 @@
+package dataset
+
+import "github.com/grafana/loki/v3/pkg/dataobj/internal/result"
+
+func iterMemColumn(col *MemColumn) result.Seq[Value] {
+	return result.Iter(func(yield func(Value) bool) error {
+		for _, page := range col.Pages {
+			for result := range iterMemPage(page, col.Info.Type, col.Info.Compression) {
+				val, err := result.Value()
+				if err != nil {
+					return err
+				} else if !yield(val) {
+					return nil
+				}
+			}
+		}
+
+		return nil
+	})
+}
@@ -0,0 +1,61 @@
+package dataset
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd"
+)
+
+func TestColumnBuilder_ReadWrite(t *testing.T) {
+	in := []string{
+		"hello, world!",
+		"",
+		"this is a test of the emergency broadcast system",
+		"this is only a test",
+		"if this were a real emergency, you would be instructed to panic",
+		"but it's not, so don't",
+		"",
+		"this concludes the test",
+		"thank you for your cooperation",
+		"goodbye",
+	}
+
+	opts := BuilderOptions{
+		// Set the size to 0 so each column has exactly one value.
+		PageSizeHint: 0,
+		Value:        datasetmd.VALUE_TYPE_STRING,
+		Compression:  datasetmd.COMPRESSION_TYPE_ZSTD,
+		Encoding:     datasetmd.ENCODING_TYPE_PLAIN,
+	}
+	b, err := NewColumnBuilder("", opts)
+	require.NoError(t, err)
+
+	for i, s := range in {
+		require.NoError(t, b.Append(i, StringValue(s)))
+	}
+
+	col, err := b.Flush()
+	require.NoError(t, err)
+	require.Equal(t, datasetmd.VALUE_TYPE_STRING, col.Info.Type)
+	require.Greater(t, len(col.Pages), 1)
+
+	t.Log("Uncompressed size: ", col.Info.UncompressedSize)
+	t.Log("Compressed size: ", col.Info.CompressedSize)
+	t.Log("Pages: ", len(col.Pages))
+
+	var actual []string
+	for result := range iterMemColumn(col) {
+		val, err := result.Value()
+		require.NoError(t, err)
+
+		if val.IsNil() || val.IsZero() {
+			actual = append(actual, "")
+		} else {
+			require.Equal(t, datasetmd.VALUE_TYPE_STRING, val.Type())
+			actual = append(actual, val.String())
+		}
+	}
+	require.Equal(t, in, actual)
+}
@@ -0,0 +1,99 @@
+package dataset
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"hash/crc32"
+	"io"
+
+	"github.com/golang/snappy"
+	"github.com/klauspost/compress/zstd"
+
+	"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd"
+)
+
+// Helper types.
+type (
+	// PageData holds the raw data for a page. Data is formatted as:
+	//
+	//   <uvarint(presence-bitmap-size)> <presence-bitmap> <values-data>
+	//
+	// The presence-bitmap is a bitmap-encoded sequence of booleans, where values
+	// describe which rows are present (1) or nil (0). The presence bitmap is
+	// always stored uncompressed.
+	//
+	// values-data is then the encoded and optionally compressed sequence of
+	// non-NULL values.
+	PageData []byte
+
+	// PageInfo describes a page.
+	PageInfo struct {
+		UncompressedSize int    // UncompressedSize is the size of a page before compression.
+		CompressedSize   int    // CompressedSize is the size of a page after compression.
+		CRC32            uint32 // CRC32 checksum of the page after encoding and compression.
+		RowCount         int    // RowCount is the number of rows in the page, including NULLs.
+
+		Encoding datasetmd.EncodingType // Encoding used for values in the page.
+		Stats    *datasetmd.Statistics  // Optional statistics for the page.
+	}
+)
+
+// MemPage holds an encoded (and optionally compressed) sequence of [Value]
+// entries of a common type. Use [ColumnBuilder] to construct sets of pages.
+type MemPage struct {
+	Info PageInfo // Information about the page.
+	Data PageData // Data for the page.
+}
+
+var checksumTable = crc32.MakeTable(crc32.Castagnoli)
+
+// reader returns a reader for decompressed page data. Reader returns an error
+// if the CRC32 fails to validate.
+func (p *MemPage) reader(compression datasetmd.CompressionType) (presence io.Reader, values io.ReadCloser, err error) {
+	if actual := crc32.Checksum(p.Data, checksumTable); p.Info.CRC32 != actual {
+		return nil, nil, fmt.Errorf("invalid CRC32 checksum %x, expected %x", actual, p.Info.CRC32)
+	}
+
+	bitmapSize, n := binary.Uvarint(p.Data)
+	if n <= 0 {
+		return nil, nil, fmt.Errorf("reading presence bitmap size: %w", err)
+	}
+
+	var (
+		bitmapReader         = bytes.NewReader(p.Data[n : n+int(bitmapSize)])
+		compressedDataReader = bytes.NewReader(p.Data[n+int(bitmapSize):])
+	)
+
+	switch compression {
+	case datasetmd.COMPRESSION_TYPE_UNSPECIFIED, datasetmd.COMPRESSION_TYPE_NONE:
+		return bitmapReader, io.NopCloser(compressedDataReader), nil
+
+	case datasetmd.COMPRESSION_TYPE_SNAPPY:
+		sr := snappy.NewReader(compressedDataReader)
+		return bitmapReader, io.NopCloser(sr), nil
+
+	case datasetmd.COMPRESSION_TYPE_ZSTD:
+		zr, err := zstd.NewReader(compressedDataReader)
+		if err != nil {
+			return nil, nil, fmt.Errorf("opening zstd reader: %w", err)
+		}
+		return bitmapReader, newZstdReader(zr), nil
+	}
+
+	panic(fmt.Sprintf("dataset.MemPage.reader: unknown compression type %q", compression.String()))
+}
+
+// zstdReader implements [io.ReadCloser] for a [zstd.Decoder].
+type zstdReader struct{ *zstd.Decoder }
+
+// newZstdReader returns a new [io.ReadCloser] for a [zstd.Decoder].
+func newZstdReader(dec *zstd.Decoder) io.ReadCloser {
+	return &zstdReader{Decoder: dec}
+}
+
+// Close implements [io.Closer].
+func (r *zstdReader) Close() error {
+	r.Decoder.Close()
+	return nil
+}