-
Notifications
You must be signed in to change notification settings - Fork 3.8k
feat(blockbuilder): priority queue for job dispatching #15245
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 1 commit
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
fd79b56
priority queue & circular buffer impl
owen-d f7c10b4
deterministic job ordering in time range planner
owen-d 2d4ab8b
[wip] priority for planning jobs
owen-d b7150cc
[wip] priority for planning jobs
owen-d 2ecc114
lint
owen-d 4218c7c
Merge remote-tracking branch 'upstream/main' into block-scheduler-que…
owen-d c4e764d
pr feedback
owen-d File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
[wip] priority for planning jobs
- Loading branch information
commit 2d4ab8bf8ca4aaa2fc3d806109e9f083b4df3c24
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,69 +3,82 @@ package scheduler | |
| import ( | ||
| "fmt" | ||
| "sync" | ||
| "time" | ||
|
|
||
| "github.com/grafana/loki/v3/pkg/blockbuilder/types" | ||
| ) | ||
|
|
||
| // jobAssignment tracks a job and its assigned builder | ||
| type jobAssignment struct { | ||
| const ( | ||
| defaultCompletedJobsCapacity = 100 | ||
| ) | ||
|
|
||
| // JobWithPriority wraps a job with a priority value | ||
| type JobWithPriority[T comparable] struct { | ||
| Job *types.Job | ||
| Priority T | ||
| } | ||
|
|
||
| // NewJobWithPriority creates a new JobWithPriority instance | ||
| func NewJobWithPriority[T comparable](job *types.Job, priority T) *JobWithPriority[T] { | ||
| return &JobWithPriority[T]{ | ||
| Job: job, | ||
| Priority: priority, | ||
| } | ||
| } | ||
|
|
||
| // inProgressJob contains a job and its start time | ||
| type inProgressJob struct { | ||
| job *types.Job | ||
| builderID string | ||
| startTime time.Time | ||
| } | ||
|
|
||
| // Duration returns how long the job has been running | ||
| func (j *inProgressJob) Duration() time.Duration { | ||
| return time.Since(j.startTime) | ||
| } | ||
|
|
||
| // JobQueue manages the queue of pending jobs and tracks their state. | ||
| type JobQueue struct { | ||
| pending map[string]*types.Job // Jobs waiting to be processed, key is job ID | ||
| inProgress map[string]*jobAssignment // job ID -> assignment info | ||
| completed map[string]*types.Job // Completed jobs, key is job ID | ||
| pending *PriorityQueue[*JobWithPriority[int]] // Jobs waiting to be processed, ordered by priority | ||
| inProgress map[string]*inProgressJob // Jobs currently being processed, key is job ID | ||
| completed *CircularBuffer[*types.Job] // Last N completed jobs | ||
| statusMap map[string]types.JobStatus // Maps job ID to its current status | ||
| mu sync.RWMutex | ||
| } | ||
|
|
||
| // NewJobQueue creates a new job queue instance | ||
| func NewJobQueue() *JobQueue { | ||
| return &JobQueue{ | ||
| pending: make(map[string]*types.Job), | ||
| inProgress: make(map[string]*jobAssignment), | ||
| completed: make(map[string]*types.Job), | ||
| pending: NewPriorityQueue[*JobWithPriority[int]](func(a, b *JobWithPriority[int]) bool { | ||
| return a.Priority > b.Priority // Higher priority first | ||
| }), | ||
| inProgress: make(map[string]*inProgressJob), | ||
| completed: NewCircularBuffer[*types.Job](defaultCompletedJobsCapacity), | ||
| statusMap: make(map[string]types.JobStatus), | ||
| } | ||
| } | ||
|
|
||
| func (q *JobQueue) Exists(job *types.Job) (types.JobStatus, bool) { | ||
| q.mu.RLock() | ||
| defer q.mu.RUnlock() | ||
|
|
||
| if _, ok := q.inProgress[job.ID]; ok { | ||
| return types.JobStatusInProgress, true | ||
| } | ||
|
|
||
| if _, ok := q.pending[job.ID]; ok { | ||
| return types.JobStatusPending, true | ||
| } | ||
|
|
||
| if _, ok := q.completed[job.ID]; ok { | ||
| return types.JobStatusComplete, true | ||
| } | ||
|
|
||
| return -1, false | ||
| status, exists := q.statusMap[job.ID] | ||
| return status, exists | ||
| } | ||
|
|
||
| // Enqueue adds a new job to the pending queue | ||
| // This is a naive implementation, intended to be refactored | ||
| func (q *JobQueue) Enqueue(job *types.Job) error { | ||
| // Enqueue adds a new job to the pending queue with a priority | ||
| func (q *JobQueue) Enqueue(job *types.Job, priority int) error { | ||
| q.mu.Lock() | ||
| defer q.mu.Unlock() | ||
|
|
||
| if _, exists := q.pending[job.ID]; exists { | ||
| return fmt.Errorf("job %s already exists in pending queue", job.ID) | ||
| } | ||
| if _, exists := q.inProgress[job.ID]; exists { | ||
| return fmt.Errorf("job %s already exists in progress", job.ID) | ||
| } | ||
| if _, exists := q.completed[job.ID]; exists { | ||
| return fmt.Errorf("job %s already completed", job.ID) | ||
| // Check if job already exists | ||
| if status, exists := q.statusMap[job.ID]; exists { | ||
| return fmt.Errorf("job %s already exists with status %v", job.ID, status) | ||
| } | ||
|
|
||
| q.pending[job.ID] = job | ||
| jobWithPriority := NewJobWithPriority(job, priority) | ||
| q.pending.Push(jobWithPriority) | ||
| q.statusMap[job.ID] = types.JobStatusPending | ||
| return nil | ||
| } | ||
|
|
||
|
|
@@ -74,52 +87,65 @@ func (q *JobQueue) Dequeue(builderID string) (*types.Job, bool, error) { | |
| q.mu.Lock() | ||
| defer q.mu.Unlock() | ||
|
|
||
| // Simple FIFO for now | ||
| for id, job := range q.pending { | ||
| delete(q.pending, id) | ||
| q.inProgress[id] = &jobAssignment{ | ||
| job: job, | ||
| builderID: builderID, | ||
| } | ||
| return job, true, nil | ||
| if q.pending.Len() == 0 { | ||
| return nil, false, nil | ||
| } | ||
|
|
||
| return nil, false, nil | ||
| jobWithPriority, ok := q.pending.Pop() | ||
| if !ok { | ||
| return nil, false, nil | ||
| } | ||
|
|
||
| // Add to in-progress with current time | ||
| q.inProgress[jobWithPriority.Job.ID] = &inProgressJob{ | ||
| job: jobWithPriority.Job, | ||
| startTime: time.Now(), | ||
| } | ||
| q.statusMap[jobWithPriority.Job.ID] = types.JobStatusInProgress | ||
|
|
||
| return jobWithPriority.Job, true, nil | ||
| } | ||
|
|
||
| // MarkComplete moves a job from in-progress to completed | ||
| func (q *JobQueue) MarkComplete(jobID string, builderID string) error { | ||
| func (q *JobQueue) MarkComplete(jobID string) error { | ||
| q.mu.Lock() | ||
| defer q.mu.Unlock() | ||
|
|
||
| assignment, exists := q.inProgress[jobID] | ||
| // Find job in in-progress map | ||
| inProgressJob, exists := q.inProgress[jobID] | ||
| if !exists { | ||
| return fmt.Errorf("job %s not found in progress", jobID) | ||
| } | ||
|
|
||
| if assignment.builderID != builderID { | ||
| return fmt.Errorf("job %s not assigned to builder %s", jobID, builderID) | ||
| // Remove from in-progress | ||
| delete(q.inProgress, jobID) | ||
|
|
||
| // Add to completed buffer and handle evicted job | ||
| if evictedJob, hasEvicted := q.completed.Push(inProgressJob.job); hasEvicted { | ||
| // Remove evicted job from status map | ||
| delete(q.statusMap, evictedJob.ID) | ||
| } | ||
| q.statusMap[jobID] = types.JobStatusComplete | ||
|
|
||
| delete(q.inProgress, jobID) | ||
| q.completed[jobID] = assignment.job | ||
| return nil | ||
| } | ||
|
|
||
| // SyncJob updates the state of an in-progress job | ||
| func (q *JobQueue) SyncJob(jobID string, builderID string, job *types.Job) error { | ||
| // SyncJob registers a job as in-progress, used for restoring state after scheduler restarts | ||
| func (q *JobQueue) SyncJob(jobID string, _ string, job *types.Job) error { | ||
| q.mu.Lock() | ||
| defer q.mu.Unlock() | ||
|
|
||
| assignment, exists := q.inProgress[jobID] | ||
| if !exists { | ||
| return fmt.Errorf("job %s not found in progress", jobID) | ||
| // Check if job already exists | ||
| if status, exists := q.statusMap[jobID]; exists { | ||
|
||
| return fmt.Errorf("job %s already exists with status %v", jobID, status) | ||
| } | ||
|
|
||
| if assignment.builderID != builderID { | ||
| return fmt.Errorf("job %s not assigned to builder %s", jobID, builderID) | ||
| // Add directly to in-progress | ||
| q.inProgress[jobID] = &inProgressJob{ | ||
| job: job, | ||
| startTime: time.Now(), | ||
| } | ||
| q.statusMap[jobID] = types.JobStatusInProgress | ||
|
|
||
| assignment.job = job | ||
| return nil | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: maybe this should not error? if a scheduler restarted and lost it's state we'd miss committing already consumed jobs.
but this can be worried about later when we add the committer logic