grafana
diff --git a/‎pkg/dataobj/internal/dataset/column_reader.go
Lines changed: 5 additions & 0 deletions b/‎pkg/dataobj/internal/dataset/column_reader.go
Lines changed: 5 additions & 0 deletions
diff --git a/‎pkg/dataobj/internal/dataset/dataset.go
Lines changed: 8 additions & 0 deletions b/‎pkg/dataobj/internal/dataset/dataset.go
Lines changed: 8 additions & 0 deletions
diff --git a/‎pkg/dataobj/internal/dataset/reader.go
Lines changed: 23 additions & 1 deletion b/‎pkg/dataobj/internal/dataset/reader.go
Lines changed: 23 additions & 1 deletion
diff --git a/‎pkg/dataobj/internal/dataset/reader_downloader.go
Lines changed: 5 additions & 0 deletions b/‎pkg/dataobj/internal/dataset/reader_downloader.go
Lines changed: 5 additions & 0 deletions
diff --git a/‎pkg/dataobj/internal/dataset/value.go
Lines changed: 15 additions & 0 deletions b/‎pkg/dataobj/internal/dataset/value.go
Lines changed: 15 additions & 0 deletions
diff --git a/‎pkg/dataobj/querier/iter.go
Lines changed: 2 additions & 6 deletions b/‎pkg/dataobj/querier/iter.go
Lines changed: 2 additions & 6 deletions
diff --git a/‎pkg/logql/bench/bench_test.go
Lines changed: 2 additions & 2 deletions b/‎pkg/logql/bench/bench_test.go
Lines changed: 2 additions & 2 deletions
diff --git a/‎pkg/logqlmodel/stats/context.go
Lines changed: 64 additions & 4 deletions b/‎pkg/logqlmodel/stats/context.go
Lines changed: 64 additions & 4 deletions
@@ -8,6 +8,7 @@ import (
 	"sort"
 
 	"github.com/grafana/loki/v3/pkg/dataobj/internal/util/sliceclear"
+	"github.com/grafana/loki/v3/pkg/logqlmodel/stats"
 )
 
 type columnReader struct {
@@ -41,6 +42,7 @@ func (cr *columnReader) Read(ctx context.Context, v []Value) (n int, err error)
 			return 0, err
 		}
 	}
+	statistics := stats.FromContext(ctx)
 
 	for n < len(v) {
 		// Make sure our reader is initialized to the right page for the row we
@@ -52,6 +54,9 @@ func (cr *columnReader) Read(ctx context.Context, v []Value) (n int, err error)
 			if err != nil {
 				return n, err
 			}
+			if pageIndex != cr.pageIndex {
+				statistics.AddPagesScanned(1)
+			}
 
 			switch cr.reader {
 			case nil:
 
@@ -87,3 +87,11 @@ type Row struct {
 	Index  int     // Index of the row in the dataset.
 	Values []Value // Values for the row, one per [Column].
 }
+
+func (r Row) Size() int64 {
+	var size int64
+	for _, v := range r.Values {
+		size += int64(v.Size())
+	}
+	return size
+}
@@ -10,6 +10,7 @@ import (
 	"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd"
 	"github.com/grafana/loki/v3/pkg/dataobj/internal/util/bitmask"
 	"github.com/grafana/loki/v3/pkg/dataobj/internal/util/sliceclear"
+	"github.com/grafana/loki/v3/pkg/logqlmodel/stats"
 )
 
 // ReaderOptions configures how a [Reader] will read [Row]s.
@@ -130,17 +131,22 @@ func (r *Reader) Read(ctx context.Context, s []Row) (n int, err error) {
 		return 0, io.EOF
 	}
 
+	var totalSizeBefore int64
+	var totalSizePostPredicate int64
+	var totalSizeAfterFill int64
 	var passCount int // passCount tracks how many rows pass the predicate.
 	for i := range count {
+		size := s[i].Size()
+		totalSizeBefore += size
 		if !checkPredicate(r.opts.Predicate, r.origColumnLookup, s[i]) {
 			continue
 		}
-
 		// We move s[i] to s[passCount] by *swapping* the rows. Copying would
 		// result in the Row.Values slice existing in two places in the buffer,
 		// which causes memory corruption when filling in rows.
 		s[passCount], s[i] = s[i], s[passCount]
 		passCount++
+		totalSizePostPredicate += size
 	}
 
 	if secondary := r.dl.SecondaryColumns(); len(secondary) > 0 && passCount > 0 {
@@ -161,10 +167,19 @@ func (r *Reader) Read(ctx context.Context, s []Row) (n int, err error) {
 		} else if count != passCount {
 			return n, fmt.Errorf("failed to fill rows: expected %d, got %d", n, count)
 		}
+		for i := range count {
+			totalSizeAfterFill += s[i].Size()
+		}
 	}
 
 	n += passCount
 
+	statistics := stats.FromContext(ctx)
+	statistics.AddPrePredicateDecompressedRows(int64(count))
+	statistics.AddPrePredicateDecompressedBytes(totalSizeBefore)
+	statistics.AddPostPredicateRows(int64(passCount))
+	statistics.AddPostPredicateDecompressedBytes(totalSizeAfterFill - totalSizePostPredicate)
+
 	// We only advance r.row after we successfully read and filled rows. This
 	// allows the caller to retry reading rows if a sporadic error occurs.
 	r.row += int64(count)
@@ -417,6 +432,13 @@ func (r *Reader) initDownloader(ctx context.Context) error {
 	r.dl.SetDatasetRanges(ranges)
 	r.ranges = ranges
 
+	var rowsCount uint64
+	for _, column := range r.dl.AllColumns() {
+		rowsCount = max(rowsCount, uint64(column.ColumnInfo().RowsCount))
+	}
+	statistics := stats.FromContext(ctx)
+	statistics.AddTotalRowsAvailable(int64(rowsCount))
+
 	return nil
 }
 
 
@@ -5,6 +5,7 @@ import (
 
 	"github.com/grafana/loki/v3/pkg/dataobj/internal/result"
 	"github.com/grafana/loki/v3/pkg/dataobj/internal/util/sliceclear"
+	"github.com/grafana/loki/v3/pkg/logqlmodel/stats"
 )
 
 // readerDownloader is a utility for downloading pages in bulk from a
@@ -302,6 +303,10 @@ func (dl *readerDownloader) buildDownloadBatch(ctx context.Context, requestor *r
 		batchSize += pageSize
 	}
 
+	statistics := stats.FromContext(ctx)
+	statistics.AddPageBatches(1)
+	statistics.AddPagesDownloaded(int64(len(pageBatch)))
+	statistics.AddPagesDownloadedBytes(int64(batchSize))
 	return pageBatch, nil
 }
 
 
@@ -283,3 +283,18 @@ func CompareValues(a, b Value) int {
 		panic(fmt.Sprintf("page.CompareValues: unsupported type %s", a.Type()))
 	}
 }
+
+func (v Value) Size() int {
+	switch v.Type() {
+	case datasetmd.VALUE_TYPE_INT64:
+		return int(unsafe.Sizeof(int64(0)))
+	case datasetmd.VALUE_TYPE_UINT64:
+		return int(unsafe.Sizeof(uint64(0)))
+	case datasetmd.VALUE_TYPE_BYTE_ARRAY:
+		return int(v.num)
+	case datasetmd.VALUE_TYPE_UNSPECIFIED:
+		return 0
+	default:
+		panic(fmt.Sprintf("dataset.Value.Size: unsupported type %s", v.Type()))
+	}
+}
@@ -71,10 +71,8 @@ func newEntryIterator(ctx context.Context,
 		streamExtractor log.StreamPipeline
 		streamHash      uint64
 		top             = newTopK(int(req.Limit), req.Direction)
+		statistics      = stats.FromContext(ctx)
 	)
-	statistics := stats.FromContext(ctx)
-	// For dataobjs, this maps to sections downloaded
-	statistics.AddChunksDownloaded(1)
 
 	for {
 		n, err := reader.Read(ctx, buf)
@@ -98,12 +96,11 @@ func newEntryIterator(ctx context.Context,
 			}
 
 			timestamp := record.Timestamp.UnixNano()
-			statistics.AddDecompressedLines(1)
 			line, parsedLabels, ok := streamExtractor.Process(timestamp, record.Line, record.Metadata...)
 			if !ok {
 				continue
 			}
-			statistics.AddPostFilterLines(1)
+			statistics.AddPostFilterRows(1)
 
 			top.Add(entryWithLabels{
 				Labels:     parsedLabels.String(),
@@ -338,7 +335,6 @@ func newSampleIterator(ctx context.Context,
 				// TODO(twhitney): when iterating over multiple extractors, we need a way to pre-process as much of the line as possible
 				// In the case of multi-variant expressions, the only difference between the multiple extractors should be the final value, with all
 				// other filters and processing already done.
-				statistics.AddDecompressedLines(1)
 				value, parsedLabels, ok := streamExtractor.Process(timestamp, record.Line, record.Metadata...)
 				if !ok {
 					continue
 
@@ -253,9 +253,9 @@ func BenchmarkLogQL(b *testing.B) {
 				for i := 0; i < b.N; i++ {
 					r, err := q.Exec(ctx)
 					require.NoError(b, err)
-					b.ReportMetric(float64(r.Statistics.TotalDecompressedLines()), "linesScanned")
-					b.ReportMetric(float64(r.Statistics.TotalChunksDownloaded()), "chunks/dataobjSections")
+					b.ReportMetric(float64(r.Statistics.Summary.TotalLinesProcessed), "linesProcessed")
 					b.ReportMetric(float64(r.Statistics.Summary.TotalPostFilterLines), "postFilterLines")
+					b.ReportMetric(float64(r.Statistics.Summary.TotalBytesProcessed)/1024, "kilobytesProcessed")
 				}
 			})
 		}
 
@@ -181,12 +181,16 @@ func JoinIngesters(ctx context.Context, inc Ingester) {
 // ComputeSummary compute the summary of the statistics.
 func (r *Result) ComputeSummary(execTime time.Duration, queueTime time.Duration, totalEntriesReturned int) {
 	r.Summary.TotalBytesProcessed = r.Querier.Store.Chunk.DecompressedBytes + r.Querier.Store.Chunk.HeadChunkBytes +
-		r.Ingester.Store.Chunk.DecompressedBytes + r.Ingester.Store.Chunk.HeadChunkBytes
+		r.Ingester.Store.Chunk.DecompressedBytes + r.Ingester.Store.Chunk.HeadChunkBytes +
+		r.Querier.Store.Dataobj.PrePredicateDecompressedBytes + r.Querier.Store.Dataobj.PostPredicateDecompressedBytes
 	r.Summary.TotalStructuredMetadataBytesProcessed = r.Querier.Store.Chunk.DecompressedStructuredMetadataBytes + r.Querier.Store.Chunk.HeadChunkStructuredMetadataBytes +
-		r.Ingester.Store.Chunk.DecompressedStructuredMetadataBytes + r.Ingester.Store.Chunk.HeadChunkStructuredMetadataBytes
+		r.Ingester.Store.Chunk.DecompressedStructuredMetadataBytes + r.Ingester.Store.Chunk.HeadChunkStructuredMetadataBytes +
+		r.Querier.Store.Dataobj.PrePredicateDecompressedStructuredMetadataBytes + r.Querier.Store.Dataobj.PostPredicateStructuredMetadataBytes
 	r.Summary.TotalLinesProcessed = r.Querier.Store.Chunk.DecompressedLines + r.Querier.Store.Chunk.HeadChunkLines +
-		r.Ingester.Store.Chunk.DecompressedLines + r.Ingester.Store.Chunk.HeadChunkLines
-	r.Summary.TotalPostFilterLines = r.Querier.Store.Chunk.PostFilterLines + r.Ingester.Store.Chunk.PostFilterLines
+		r.Ingester.Store.Chunk.DecompressedLines + r.Ingester.Store.Chunk.HeadChunkLines +
+		r.Querier.Store.Dataobj.PrePredicateDecompressedRows
+	r.Summary.TotalPostFilterLines = r.Querier.Store.Chunk.PostFilterLines + r.Ingester.Store.Chunk.PostFilterLines +
+		r.Querier.Store.Dataobj.PostFilterRows
 	r.Summary.ExecTime = execTime.Seconds()
 	if execTime != 0 {
 		r.Summary.BytesProcessedPerSecond = int64(float64(r.Summary.TotalBytesProcessed) /
@@ -217,6 +221,18 @@ func (s *Store) Merge(m Store) {
 	s.Chunk.CompressedBytes += m.Chunk.CompressedBytes
 	s.Chunk.TotalDuplicates += m.Chunk.TotalDuplicates
 	s.Chunk.PostFilterLines += m.Chunk.PostFilterLines
+	s.Dataobj.PrePredicateDecompressedRows += m.Dataobj.PrePredicateDecompressedRows
+	s.Dataobj.PrePredicateDecompressedBytes += m.Dataobj.PrePredicateDecompressedBytes
+	s.Dataobj.PrePredicateDecompressedStructuredMetadataBytes += m.Dataobj.PrePredicateDecompressedStructuredMetadataBytes
+	s.Dataobj.PostPredicateDecompressedBytes += m.Dataobj.PostPredicateDecompressedBytes
+	s.Dataobj.PostPredicateRows += m.Dataobj.PostPredicateRows
+	s.Dataobj.PostPredicateStructuredMetadataBytes += m.Dataobj.PostPredicateStructuredMetadataBytes
+	s.Dataobj.PostFilterRows += m.Dataobj.PostFilterRows
+	s.Dataobj.PagesScanned += m.Dataobj.PagesScanned
+	s.Dataobj.PagesDownloaded += m.Dataobj.PagesDownloaded
+	s.Dataobj.PagesDownloadedBytes += m.Dataobj.PagesDownloadedBytes
+	s.Dataobj.PageBatches += m.Dataobj.PageBatches
+	s.Dataobj.TotalRowsAvailable += m.Dataobj.TotalRowsAvailable
 	if m.QueryReferencedStructured {
 		s.QueryReferencedStructured = true
 	}
@@ -513,6 +529,50 @@ func (c *Context) AddSplitQueries(num int64) {
 	atomic.AddInt64(&c.result.Summary.Splits, num)
 }
 
+func (c *Context) AddPrePredicateDecompressedRows(i int64) {
+	atomic.AddInt64(&c.store.Dataobj.PrePredicateDecompressedRows, i)
+}
+
+func (c *Context) AddPrePredicateDecompressedBytes(i int64) {
+	atomic.AddInt64(&c.store.Dataobj.PrePredicateDecompressedBytes, i)
+}
+
+func (c *Context) AddPostPredicateDecompressedBytes(i int64) {
+	atomic.AddInt64(&c.store.Dataobj.PostPredicateDecompressedBytes, i)
+}
+
+func (c *Context) AddPostPredicateRows(i int64) {
+	atomic.AddInt64(&c.store.Dataobj.PostPredicateRows, i)
+}
+
+func (c *Context) AddPostPredicateStructuredMetadataBytes(i int64) {
+	atomic.AddInt64(&c.store.Dataobj.PostPredicateStructuredMetadataBytes, i)
+}
+
+func (c *Context) AddPostFilterRows(i int64) {
+	atomic.AddInt64(&c.store.Dataobj.PostFilterRows, i)
+}
+
+func (c *Context) AddPagesScanned(i int64) {
+	atomic.AddInt64(&c.store.Dataobj.PagesScanned, i)
+}
+
+func (c *Context) AddPagesDownloaded(i int64) {
+	atomic.AddInt64(&c.store.Dataobj.PagesDownloaded, i)
+}
+
+func (c *Context) AddPagesDownloadedBytes(i int64) {
+	atomic.AddInt64(&c.store.Dataobj.PagesDownloadedBytes, i)
+}
+
+func (c *Context) AddPageBatches(i int64) {
+	atomic.AddInt64(&c.store.Dataobj.PageBatches, i)
+}
+
+func (c *Context) AddTotalRowsAvailable(i int64) {
+	atomic.AddInt64(&c.store.Dataobj.TotalRowsAvailable, i)
+}
+
 func (c *Context) SetQueryReferencedStructuredMetadata() {
 	c.store.QueryReferencedStructured = true
 }
Original file line number	Diff line number	Diff line change
`@@ -8,6 +8,7 @@ import (`
`8`	`8`	`"sort"`
`9`	`9`
`10`	`10`	`"github.com/grafana/loki/v3/pkg/dataobj/internal/util/sliceclear"`
	`11`	`+ "github.com/grafana/loki/v3/pkg/logqlmodel/stats"`
`11`	`12`	`)`
`12`	`13`
`13`	`14`	`type columnReader struct {`
`@@ -41,6 +42,7 @@ func (cr *columnReader) Read(ctx context.Context, v []Value) (n int, err error)`
`41`	`42`	`return 0, err`
`42`	`43`	`}`
`43`	`44`	`}`
	`45`	`+ statistics := stats.FromContext(ctx)`
`44`	`46`
`45`	`47`	`for n < len(v) {`
`46`	`48`	`// Make sure our reader is initialized to the right page for the row we`
`@@ -52,6 +54,9 @@ func (cr *columnReader) Read(ctx context.Context, v []Value) (n int, err error)`
`52`	`54`	`if err != nil {`
`53`	`55`	`return n, err`
`54`	`56`	`}`
	`57`	`+ if pageIndex != cr.pageIndex {`
	`58`	`+ statistics.AddPagesScanned(1)`
	`59`	`+ }`
`55`	`60`
`56`	`61`	`switch cr.reader {`
`57`	`62`	`case nil:`
Original file line number	Diff line number	Diff line change
`@@ -5,6 +5,7 @@ import (`
`5`	`5`
`6`	`6`	`"github.com/grafana/loki/v3/pkg/dataobj/internal/result"`
`7`	`7`	`"github.com/grafana/loki/v3/pkg/dataobj/internal/util/sliceclear"`
	`8`	`+ "github.com/grafana/loki/v3/pkg/logqlmodel/stats"`
`8`	`9`	`)`
`9`	`10`
`10`	`11`	`// readerDownloader is a utility for downloading pages in bulk from a`
`@@ -302,6 +303,10 @@ func (dl readerDownloader) buildDownloadBatch(ctx context.Context, requestor r`
`302`	`303`	`batchSize += pageSize`
`303`	`304`	`}`
`304`	`305`
	`306`	`+ statistics := stats.FromContext(ctx)`
	`307`	`+ statistics.AddPageBatches(1)`
	`308`	`+ statistics.AddPagesDownloaded(int64(len(pageBatch)))`
	`309`	`+ statistics.AddPagesDownloadedBytes(int64(batchSize))`
`305`	`310`	`return pageBatch, nil`
`306`	`311`	`}`
`307`	`312`
Original file line number	Diff line number	Diff line change
`@@ -253,9 +253,9 @@ func BenchmarkLogQL(b *testing.B) {`
`253`	`253`	`for i := 0; i < b.N; i++ {`
`254`	`254`	`r, err := q.Exec(ctx)`
`255`	`255`	`require.NoError(b, err)`
`256`		`- b.ReportMetric(float64(r.Statistics.TotalDecompressedLines()), "linesScanned")`
`257`		`- b.ReportMetric(float64(r.Statistics.TotalChunksDownloaded()), "chunks/dataobjSections")`
	`256`	`+ b.ReportMetric(float64(r.Statistics.Summary.TotalLinesProcessed), "linesProcessed")`
`258`	`257`	`b.ReportMetric(float64(r.Statistics.Summary.TotalPostFilterLines), "postFilterLines")`
	`258`	`+ b.ReportMetric(float64(r.Statistics.Summary.TotalBytesProcessed)/1024, "kilobytesProcessed")`
`259`	`259`	`}`
`260`	`260`	`})`
`261`	`261`	`}`