Skip to content

Commit e0483e3

Browse files
authored
feat: add prebuilds metrics collector (#17547)
Closes coder/internal#509 --------- Signed-off-by: Danny Kopping <dannykopping@gmail.com>
1 parent b47d54d commit e0483e3

File tree

7 files changed

+548
-26
lines changed

7 files changed

+548
-26
lines changed

coderd/prebuilds/api.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import (
55

66
"github.com/google/uuid"
77
"golang.org/x/xerrors"
8+
9+
"github.com/coder/coder/v2/coderd/database"
810
)
911

1012
var ErrNoClaimablePrebuiltWorkspaces = xerrors.New("no claimable prebuilt workspaces found")
@@ -25,12 +27,23 @@ type ReconciliationOrchestrator interface {
2527
}
2628

2729
type Reconciler interface {
30+
StateSnapshotter
31+
2832
// ReconcileAll orchestrates the reconciliation of all prebuilds across all templates.
2933
// It takes a global snapshot of the system state and then reconciles each preset
3034
// in parallel, creating or deleting prebuilds as needed to reach their desired states.
3135
ReconcileAll(ctx context.Context) error
3236
}
3337

38+
// StateSnapshotter defines the operations necessary to capture workspace prebuilds state.
39+
type StateSnapshotter interface {
40+
// SnapshotState captures the current state of all prebuilds across templates.
41+
// It creates a global database snapshot that can be viewed as a collection of PresetSnapshots,
42+
// each representing the state of prebuilds for a specific preset.
43+
// MUST be called inside a repeatable-read transaction.
44+
SnapshotState(ctx context.Context, store database.Store) (*GlobalSnapshot, error)
45+
}
46+
3447
type Claimer interface {
3548
Claim(ctx context.Context, userID uuid.UUID, name string, presetID uuid.UUID) (*uuid.UUID, error)
3649
Initiator() uuid.UUID

enterprise/coderd/coderd.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1165,6 +1165,6 @@ func (api *API) setupPrebuilds(featureEnabled bool) (agplprebuilds.Reconciliatio
11651165
}
11661166

11671167
reconciler := prebuilds.NewStoreReconciler(api.Database, api.Pubsub, api.DeploymentValues.Prebuilds,
1168-
api.Logger.Named("prebuilds"), quartz.NewReal())
1168+
api.Logger.Named("prebuilds"), quartz.NewReal(), api.PrometheusRegistry)
11691169
return reconciler, prebuilds.EnterpriseClaimer{}
11701170
}

enterprise/coderd/prebuilds/claim_test.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"time"
1111

1212
"github.com/google/uuid"
13+
"github.com/prometheus/client_golang/prometheus"
1314
"github.com/stretchr/testify/require"
1415
"golang.org/x/xerrors"
1516

@@ -142,7 +143,7 @@ func TestClaimPrebuild(t *testing.T) {
142143
EntitlementsUpdateInterval: time.Second,
143144
})
144145

145-
reconciler := prebuilds.NewStoreReconciler(spy, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t))
146+
reconciler := prebuilds.NewStoreReconciler(spy, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t), prometheus.NewRegistry())
146147
var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer(spy)
147148
api.AGPL.PrebuildsClaimer.Store(&claimer)
148149

@@ -419,7 +420,7 @@ func TestClaimPrebuild_CheckDifferentErrors(t *testing.T) {
419420
EntitlementsUpdateInterval: time.Second,
420421
})
421422

422-
reconciler := prebuilds.NewStoreReconciler(errorStore, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t))
423+
reconciler := prebuilds.NewStoreReconciler(errorStore, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t), api.PrometheusRegistry)
423424
var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer(errorStore)
424425
api.AGPL.PrebuildsClaimer.Store(&claimer)
425426

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
package prebuilds
2+
3+
import (
4+
"context"
5+
"time"
6+
7+
"cdr.dev/slog"
8+
9+
"github.com/prometheus/client_golang/prometheus"
10+
11+
"github.com/coder/coder/v2/coderd/database"
12+
"github.com/coder/coder/v2/coderd/database/dbauthz"
13+
"github.com/coder/coder/v2/coderd/prebuilds"
14+
)
15+
16+
var (
17+
labels = []string{"template_name", "preset_name", "organization_name"}
18+
createdPrebuildsDesc = prometheus.NewDesc(
19+
"coderd_prebuilt_workspaces_created_total",
20+
"Total number of prebuilt workspaces that have been created to meet the desired instance count of each "+
21+
"template preset.",
22+
labels,
23+
nil,
24+
)
25+
failedPrebuildsDesc = prometheus.NewDesc(
26+
"coderd_prebuilt_workspaces_failed_total",
27+
"Total number of prebuilt workspaces that failed to build.",
28+
labels,
29+
nil,
30+
)
31+
claimedPrebuildsDesc = prometheus.NewDesc(
32+
"coderd_prebuilt_workspaces_claimed_total",
33+
"Total number of prebuilt workspaces which were claimed by users. Claiming refers to creating a workspace "+
34+
"with a preset selected for which eligible prebuilt workspaces are available and one is reassigned to a user.",
35+
labels,
36+
nil,
37+
)
38+
desiredPrebuildsDesc = prometheus.NewDesc(
39+
"coderd_prebuilt_workspaces_desired",
40+
"Target number of prebuilt workspaces that should be available for each template preset.",
41+
labels,
42+
nil,
43+
)
44+
runningPrebuildsDesc = prometheus.NewDesc(
45+
"coderd_prebuilt_workspaces_running",
46+
"Current number of prebuilt workspaces that are in a running state. These workspaces have started "+
47+
"successfully but may not yet be claimable by users (see coderd_prebuilt_workspaces_eligible).",
48+
labels,
49+
nil,
50+
)
51+
eligiblePrebuildsDesc = prometheus.NewDesc(
52+
"coderd_prebuilt_workspaces_eligible",
53+
"Current number of prebuilt workspaces that are eligible to be claimed by users. These are workspaces that "+
54+
"have completed their build process with their agent reporting 'ready' status.",
55+
labels,
56+
nil,
57+
)
58+
)
59+
60+
type MetricsCollector struct {
61+
database database.Store
62+
logger slog.Logger
63+
snapshotter prebuilds.StateSnapshotter
64+
}
65+
66+
var _ prometheus.Collector = new(MetricsCollector)
67+
68+
func NewMetricsCollector(db database.Store, logger slog.Logger, snapshotter prebuilds.StateSnapshotter) *MetricsCollector {
69+
return &MetricsCollector{
70+
database: db,
71+
logger: logger.Named("prebuilds_metrics_collector"),
72+
snapshotter: snapshotter,
73+
}
74+
}
75+
76+
func (*MetricsCollector) Describe(descCh chan<- *prometheus.Desc) {
77+
descCh <- createdPrebuildsDesc
78+
descCh <- failedPrebuildsDesc
79+
descCh <- claimedPrebuildsDesc
80+
descCh <- desiredPrebuildsDesc
81+
descCh <- runningPrebuildsDesc
82+
descCh <- eligiblePrebuildsDesc
83+
}
84+
85+
func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) {
86+
// nolint:gocritic // We need to set an authz context to read metrics from the db.
87+
ctx, cancel := context.WithTimeout(dbauthz.AsPrebuildsOrchestrator(context.Background()), 10*time.Second)
88+
defer cancel()
89+
prebuildMetrics, err := mc.database.GetPrebuildMetrics(ctx)
90+
if err != nil {
91+
mc.logger.Error(ctx, "failed to get prebuild metrics", slog.Error(err))
92+
return
93+
}
94+
95+
for _, metric := range prebuildMetrics {
96+
metricsCh <- prometheus.MustNewConstMetric(createdPrebuildsDesc, prometheus.CounterValue, float64(metric.CreatedCount), metric.TemplateName, metric.PresetName, metric.OrganizationName)
97+
metricsCh <- prometheus.MustNewConstMetric(failedPrebuildsDesc, prometheus.CounterValue, float64(metric.FailedCount), metric.TemplateName, metric.PresetName, metric.OrganizationName)
98+
metricsCh <- prometheus.MustNewConstMetric(claimedPrebuildsDesc, prometheus.CounterValue, float64(metric.ClaimedCount), metric.TemplateName, metric.PresetName, metric.OrganizationName)
99+
}
100+
101+
snapshot, err := mc.snapshotter.SnapshotState(ctx, mc.database)
102+
if err != nil {
103+
mc.logger.Error(ctx, "failed to get latest prebuild state", slog.Error(err))
104+
return
105+
}
106+
107+
for _, preset := range snapshot.Presets {
108+
if !preset.UsingActiveVersion {
109+
continue
110+
}
111+
112+
presetSnapshot, err := snapshot.FilterByPreset(preset.ID)
113+
if err != nil {
114+
mc.logger.Error(ctx, "failed to filter by preset", slog.Error(err))
115+
continue
116+
}
117+
state := presetSnapshot.CalculateState()
118+
119+
metricsCh <- prometheus.MustNewConstMetric(desiredPrebuildsDesc, prometheus.GaugeValue, float64(state.Desired), preset.TemplateName, preset.Name, preset.OrganizationName)
120+
metricsCh <- prometheus.MustNewConstMetric(runningPrebuildsDesc, prometheus.GaugeValue, float64(state.Actual), preset.TemplateName, preset.Name, preset.OrganizationName)
121+
metricsCh <- prometheus.MustNewConstMetric(eligiblePrebuildsDesc, prometheus.GaugeValue, float64(state.Eligible), preset.TemplateName, preset.Name, preset.OrganizationName)
122+
}
123+
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy