Skip to content

Support lastEvaluation and evaluationTime in /api/v1/rules endpoints and make order of groups stable. #2196

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 4, 2020
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
* [FEATURE] Added user sub rings to distribute users to a subset of ingesters. #1947
* `--experimental.distributor.user-subring-size`
* [FEATURE] Added flag `-experimental.ruler.enable-api` to enable the ruler api which implements the Prometheus API `/api/v1/rules` and `/api/v1/alerts` endpoints under the configured `-http.prefix`. #1999
* [ENHANCEMENT] Support `lastEvaluation` and `evaluationTime` in `/api/v1/rules` endpoints and make order of groups stable. #2196
* [FEATURE] Added sharding support to compactor when using the experimental TSDB blocks storage. #2113
* [FEATURE] Add ability to override YAML config file settings using environment variables. #2147
* `-config.expand-env`
Expand Down
94 changes: 56 additions & 38 deletions pkg/ruler/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package ruler
import (
"encoding/json"
"net/http"
"sort"
"strconv"
"time"

Expand Down Expand Up @@ -70,33 +71,39 @@ type RuleGroup struct {
// In order to preserve rule ordering, while exposing type (alerting or recording)
// specific properties, both alerting and recording rules are exposed in the
// same array.
Rules []rule `json:"rules"`
Interval float64 `json:"interval"`
Rules []rule `json:"rules"`
Interval float64 `json:"interval"`
LastEvaluation time.Time `json:"lastEvaluation"`
EvaluationTime float64 `json:"evaluationTime"`
}

type rule interface{}

type alertingRule struct {
// State can be "pending", "firing", "inactive".
State string `json:"state"`
Name string `json:"name"`
Query string `json:"query"`
Duration float64 `json:"duration"`
Labels labels.Labels `json:"labels"`
Annotations labels.Labels `json:"annotations"`
Alerts []*Alert `json:"alerts"`
Health string `json:"health"`
LastError string `json:"lastError,omitempty"`
Type v1.RuleType `json:"type"`
State string `json:"state"`
Name string `json:"name"`
Query string `json:"query"`
Duration float64 `json:"duration"`
Labels labels.Labels `json:"labels"`
Annotations labels.Labels `json:"annotations"`
Alerts []*Alert `json:"alerts"`
Health string `json:"health"`
LastError string `json:"lastError,omitempty"`
Type v1.RuleType `json:"type"`
LastEvaluation time.Time `json:"lastEvaluation"`
EvaluationTime float64 `json:"evaluationTime"`
}

type recordingRule struct {
Name string `json:"name"`
Query string `json:"query"`
Labels labels.Labels `json:"labels,omitempty"`
Health string `json:"health"`
LastError string `json:"lastError,omitempty"`
Type v1.RuleType `json:"type"`
Name string `json:"name"`
Query string `json:"query"`
Labels labels.Labels `json:"labels,omitempty"`
Health string `json:"health"`
LastError string `json:"lastError,omitempty"`
Type v1.RuleType `json:"type"`
LastEvaluation time.Time `json:"lastEvaluation"`
EvaluationTime float64 `json:"evaluationTime"`
}

func respondError(logger log.Logger, w http.ResponseWriter, msg string) {
Expand Down Expand Up @@ -140,10 +147,12 @@ func (r *Ruler) rules(w http.ResponseWriter, req *http.Request) {

for _, g := range rgs {
grp := RuleGroup{
Name: g.Name,
File: g.Namespace,
Interval: g.Interval.Seconds(),
Rules: make([]rule, len(g.Rules)),
Name: g.Name,
File: g.Namespace,
Rules: make([]rule, len(g.Rules)),
Interval: g.Interval.Seconds(),
LastEvaluation: g.GetEvaluationTimestamp(),
EvaluationTime: g.GetEvaluationDuration().Seconds(),
}

for i, rl := range g.Rules {
Expand All @@ -159,31 +168,40 @@ func (r *Ruler) rules(w http.ResponseWriter, req *http.Request) {
})
}
grp.Rules[i] = alertingRule{
State: rl.GetState(),
Name: rl.GetAlert(),
Query: rl.GetExpr(),
Duration: rl.For.Seconds(),
Labels: client.FromLabelAdaptersToLabels(rl.Labels),
Annotations: client.FromLabelAdaptersToLabels(rl.Annotations),
Alerts: alerts,
Health: rl.GetHealth(),
LastError: rl.GetLastError(),
Type: v1.RuleTypeAlerting,
State: rl.GetState(),
Name: rl.GetAlert(),
Query: rl.GetExpr(),
Duration: rl.For.Seconds(),
Labels: client.FromLabelAdaptersToLabels(rl.Labels),
Annotations: client.FromLabelAdaptersToLabels(rl.Annotations),
Alerts: alerts,
Health: rl.GetHealth(),
LastError: rl.GetLastError(),
LastEvaluation: rl.GetEvaluationTimestamp(),
EvaluationTime: rl.GetEvaluationDuration().Seconds(),
Type: v1.RuleTypeAlerting,
}
} else {
grp.Rules[i] = recordingRule{
Name: rl.GetRecord(),
Query: rl.GetExpr(),
Labels: client.FromLabelAdaptersToLabels(rl.Labels),
Health: rl.GetHealth(),
LastError: rl.GetLastError(),
Type: v1.RuleTypeRecording,
Name: rl.GetRecord(),
Query: rl.GetExpr(),
Labels: client.FromLabelAdaptersToLabels(rl.Labels),
Health: rl.GetHealth(),
LastError: rl.GetLastError(),
LastEvaluation: rl.GetEvaluationTimestamp(),
EvaluationTime: rl.GetEvaluationDuration().Seconds(),
Type: v1.RuleTypeRecording,
}
}
}
groups = append(groups, &grp)
}

// keep data.groups are in order
sort.Slice(groups, func(i, j int) bool {
return groups[i].File < groups[j].File
})

b, err := json.Marshal(&response{
Status: "success",
Data: &RuleDiscovery{RuleGroups: groups},
Expand Down
46 changes: 27 additions & 19 deletions pkg/ruler/ruler.go
Original file line number Diff line number Diff line change
Expand Up @@ -494,10 +494,12 @@ func (r *Ruler) getLocalRules(userID string) ([]*rules.RuleGroupDesc, error) {
for _, group := range groups {
interval := group.Interval()
groupDesc := &rules.RuleGroupDesc{
Name: group.Name(),
Namespace: strings.TrimPrefix(group.File(), prefix),
Interval: interval,
User: userID,
Name: group.Name(),
Namespace: strings.TrimPrefix(group.File(), prefix),
Interval: interval,
User: userID,
EvaluationTimestamp: group.GetEvaluationTimestamp(),
EvaluationDuration: group.GetEvaluationDuration(),
}
for _, r := range group.Rules() {
lastError := ""
Expand All @@ -524,23 +526,27 @@ func (r *Ruler) getLocalRules(userID string) ([]*rules.RuleGroupDesc, error) {
})
}
ruleDesc = &rules.RuleDesc{
State: rule.State().String(),
Alert: rule.Name(),
Alerts: alerts,
Expr: rule.Query().String(),
For: rule.Duration(),
Labels: client.FromLabelsToLabelAdapters(rule.Labels()),
Annotations: client.FromLabelsToLabelAdapters(rule.Annotations()),
Health: string(rule.Health()),
LastError: lastError,
Expr: rule.Query().String(),
Alert: rule.Name(),
For: rule.Duration(),
Labels: client.FromLabelsToLabelAdapters(rule.Labels()),
Annotations: client.FromLabelsToLabelAdapters(rule.Annotations()),
State: rule.State().String(),
Health: string(rule.Health()),
LastError: lastError,
Alerts: alerts,
EvaluationTimestamp: rule.GetEvaluationTimestamp(),
EvaluationDuration: rule.GetEvaluationDuration(),
}
case *promRules.RecordingRule:
ruleDesc = &rules.RuleDesc{
Record: rule.Name(),
Expr: rule.Query().String(),
Labels: client.FromLabelsToLabelAdapters(rule.Labels()),
Health: string(rule.Health()),
LastError: lastError,
Record: rule.Name(),
Expr: rule.Query().String(),
Labels: client.FromLabelsToLabelAdapters(rule.Labels()),
Health: string(rule.Health()),
LastError: lastError,
EvaluationTimestamp: rule.GetEvaluationTimestamp(),
EvaluationDuration: rule.GetEvaluationDuration(),
}
default:
return nil, errors.Errorf("failed to assert type of rule '%v'", rule.Name())
Expand All @@ -563,7 +569,9 @@ func (r *Ruler) getShardedRules(ctx context.Context, userID string) ([]*rules.Ru
return nil, fmt.Errorf("unable to inject user ID into grpc request, %v", err)
}

rgs := []*rules.RuleGroupDesc{}
// len(rgs) can't be larger than len(rulers.Ingesters)
// alloc it in advance to avoid realloc
rgs := make([]*rules.RuleGroupDesc, 0, len(rulers.Ingesters))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think you need this change. The number of Rulers in the ring does not correlate to the number of rule groups being evaluated.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The number of Rulers in the ring does not correlate to the number of rule groups being evaluated.

Yes, but it should be no more than len(rulers.Ingesters). This fix is for avoid realloc slice.
if the slice is empty (before my fix), the len and cap is 0. After each append, the len increase 1 but cap will be 0, 2, 2, 4, 4..., which make malloc many times.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a comment for the same? I'll merge it after!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added the comment.


for _, rlr := range rulers.Ingesters {
conn, err := grpc.Dial(rlr.Addr, grpc.WithInsecure())
Expand Down
Loading