Content-Length: 58549 | pFad | http://github.com/coder/coder/pull/17547.patch
thub.com
From e8ff926abf2725a5a4386a8abfc8ed6d643ef394 Mon Sep 17 00:00:00 2001
From: Danny Kopping
Date: Wed, 23 Apr 2025 17:03:34 +0200
Subject: [PATCH 1/6] Add metrics collector
Signed-off-by: Danny Kopping
Reverting unnecessary changes that crept in
Signed-off-by: Danny Kopping
---
coderd/prebuilds/api.go | 13 +
enterprise/coderd/coderd.go | 2 +-
enterprise/coderd/prebuilds/claim_test.go | 3 +-
.../coderd/prebuilds/metricscollector.go | 126 ++++++++
.../coderd/prebuilds/metricscollector_test.go | 284 ++++++++++++++++++
enterprise/coderd/prebuilds/reconcile.go | 51 +++-
enterprise/coderd/prebuilds/reconcile_test.go | 18 +-
7 files changed, 473 insertions(+), 24 deletions(-)
create mode 100644 enterprise/coderd/prebuilds/metricscollector.go
create mode 100644 enterprise/coderd/prebuilds/metricscollector_test.go
diff --git a/coderd/prebuilds/api.go b/coderd/prebuilds/api.go
index ba174d318d5fa..2342da5d62c07 100644
--- a/coderd/prebuilds/api.go
+++ b/coderd/prebuilds/api.go
@@ -5,6 +5,8 @@ import (
"github.com/google/uuid"
"golang.org/x/xerrors"
+
+ "github.com/coder/coder/v2/coderd/database"
)
var ErrNoClaimablePrebuiltWorkspaces = xerrors.New("no claimable prebuilt workspaces found")
@@ -25,12 +27,23 @@ type ReconciliationOrchestrator interface {
}
type Reconciler interface {
+ StateSnapshotter
+
// ReconcileAll orchestrates the reconciliation of all prebuilds across all templates.
// It takes a global snapshot of the system state and then reconciles each preset
// in parallel, creating or deleting prebuilds as needed to reach their desired states.
ReconcileAll(ctx context.Context) error
}
+// StateSnapshotter defines the operations necessary to capture workspace prebuilds state.
+type StateSnapshotter interface {
+ // SnapshotState captures the current state of all prebuilds across templates.
+ // It creates a global database snapshot that can be viewed as a collection of PresetSnapshots,
+ // each representing the state of prebuilds for a specific preset.
+ // MUST be called inside a repeatable-read transaction.
+ SnapshotState(ctx context.Context, store database.Store) (*GlobalSnapshot, error)
+}
+
type Claimer interface {
Claim(ctx context.Context, userID uuid.UUID, name string, presetID uuid.UUID) (*uuid.UUID, error)
Initiator() uuid.UUID
diff --git a/enterprise/coderd/coderd.go b/enterprise/coderd/coderd.go
index 1f468997ac220..ca3531b60db78 100644
--- a/enterprise/coderd/coderd.go
+++ b/enterprise/coderd/coderd.go
@@ -1165,6 +1165,6 @@ func (api *API) setupPrebuilds(featureEnabled bool) (agplprebuilds.Reconciliatio
}
reconciler := prebuilds.NewStoreReconciler(api.Database, api.Pubsub, api.DeploymentValues.Prebuilds,
- api.Logger.Named("prebuilds"), quartz.NewReal())
+ api.Logger.Named("prebuilds"), quartz.NewReal(), api.PrometheusRegistry)
return reconciler, prebuilds.EnterpriseClaimer{}
}
diff --git a/enterprise/coderd/prebuilds/claim_test.go b/enterprise/coderd/prebuilds/claim_test.go
index 4f398724b8265..e8d66fae18a07 100644
--- a/enterprise/coderd/prebuilds/claim_test.go
+++ b/enterprise/coderd/prebuilds/claim_test.go
@@ -10,6 +10,7 @@ import (
"time"
"github.com/google/uuid"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/require"
"golang.org/x/xerrors"
@@ -142,7 +143,7 @@ func TestClaimPrebuild(t *testing.T) {
EntitlementsUpdateInterval: time.Second,
})
- reconciler := prebuilds.NewStoreReconciler(spy, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t))
+ reconciler := prebuilds.NewStoreReconciler(spy, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t), prometheus.NewRegistry())
var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer(spy)
api.AGPL.PrebuildsClaimer.Store(&claimer)
diff --git a/enterprise/coderd/prebuilds/metricscollector.go b/enterprise/coderd/prebuilds/metricscollector.go
new file mode 100644
index 0000000000000..507dbddd383ef
--- /dev/null
+++ b/enterprise/coderd/prebuilds/metricscollector.go
@@ -0,0 +1,126 @@
+package prebuilds
+
+import (
+ "context"
+ "time"
+
+ "cdr.dev/slog"
+
+ "github.com/prometheus/client_golang/prometheus"
+
+ "github.com/coder/coder/v2/coderd/database"
+ "github.com/coder/coder/v2/coderd/database/dbauthz"
+ "github.com/coder/coder/v2/coderd/prebuilds"
+)
+
+var (
+ labels = []string{"template_name", "preset_name", "organization_name"}
+ createdPrebuildsDesc = prometheus.NewDesc(
+ "coderd_prebuilds_created_total",
+ "The number of prebuilds that have been created to meet the desired count set by presets.",
+ labels,
+ nil,
+ )
+ failedPrebuildsDesc = prometheus.NewDesc(
+ "coderd_prebuilds_failed_total",
+ "The number of prebuilds that failed to build during creation.",
+ labels,
+ nil,
+ )
+ claimedPrebuildsDesc = prometheus.NewDesc(
+ "coderd_prebuilds_claimed_total",
+ "The number of prebuilds that were claimed by a user. Each count means that a user created a workspace using a preset and was assigned a prebuild instead of a brand new workspace.",
+ labels,
+ nil,
+ )
+ usedPresetsDesc = prometheus.NewDesc(
+ "coderd_prebuilds_used_presets",
+ "The number of times a preset was used to build a prebuild.",
+ labels,
+ nil,
+ )
+ desiredPrebuildsDesc = prometheus.NewDesc(
+ "coderd_prebuilds_desired",
+ "The number of prebuilds desired by each preset of each template.",
+ labels,
+ nil,
+ )
+ runningPrebuildsDesc = prometheus.NewDesc(
+ "coderd_prebuilds_running",
+ "The number of prebuilds that are currently running. Running prebuilds have successfully started, but they may not be ready to be claimed by a user yet.",
+ labels,
+ nil,
+ )
+ eligiblePrebuildsDesc = prometheus.NewDesc(
+ "coderd_prebuilds_eligible",
+ "The number of eligible prebuilds. Eligible prebuilds are prebuilds that are ready to be claimed by a user.",
+ labels,
+ nil,
+ )
+)
+
+type MetricsCollector struct {
+ database database.Store
+ logger slog.Logger
+ snapshotter prebuilds.StateSnapshotter
+}
+
+var _ prometheus.Collector = new(MetricsCollector)
+
+func NewMetricsCollector(db database.Store, logger slog.Logger, snapshotter prebuilds.StateSnapshotter) *MetricsCollector {
+ return &MetricsCollector{
+ database: db,
+ logger: logger.Named("prebuilds_metrics_collector"),
+ snapshotter: snapshotter,
+ }
+}
+
+func (*MetricsCollector) Describe(descCh chan<- *prometheus.Desc) {
+ descCh <- createdPrebuildsDesc
+ descCh <- failedPrebuildsDesc
+ descCh <- claimedPrebuildsDesc
+ descCh <- usedPresetsDesc
+ descCh <- desiredPrebuildsDesc
+ descCh <- runningPrebuildsDesc
+ descCh <- eligiblePrebuildsDesc
+}
+
+func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) {
+ ctx, cancel := context.WithTimeout(dbauthz.AsPrebuildsOrchestrator(context.Background()), 10*time.Second)
+ defer cancel()
+ // nolint:gocritic // just until we get back to this
+ prebuildMetrics, err := mc.database.GetPrebuildMetrics(ctx)
+ if err != nil {
+ mc.logger.Error(ctx, "failed to get prebuild metrics", slog.Error(err))
+ return
+ }
+
+ for _, metric := range prebuildMetrics {
+ metricsCh <- prometheus.MustNewConstMetric(createdPrebuildsDesc, prometheus.CounterValue, float64(metric.CreatedCount), metric.TemplateName, metric.PresetName, metric.OrganizationName)
+ metricsCh <- prometheus.MustNewConstMetric(failedPrebuildsDesc, prometheus.CounterValue, float64(metric.FailedCount), metric.TemplateName, metric.PresetName, metric.OrganizationName)
+ metricsCh <- prometheus.MustNewConstMetric(claimedPrebuildsDesc, prometheus.CounterValue, float64(metric.ClaimedCount), metric.TemplateName, metric.PresetName, metric.OrganizationName)
+ }
+
+ snapshot, err := mc.snapshotter.SnapshotState(ctx, mc.database)
+ if err != nil {
+ mc.logger.Error(ctx, "failed to get latest prebuild state", slog.Error(err))
+ return
+ }
+
+ for _, preset := range snapshot.Presets {
+ if !preset.UsingActiveVersion {
+ continue
+ }
+
+ presetSnapshot, err := snapshot.FilterByPreset(preset.ID)
+ if err != nil {
+ mc.logger.Error(ctx, "failed to filter by preset", slog.Error(err))
+ continue
+ }
+ state := presetSnapshot.CalculateState()
+
+ metricsCh <- prometheus.MustNewConstMetric(desiredPrebuildsDesc, prometheus.GaugeValue, float64(state.Desired), preset.TemplateName, preset.Name, preset.OrganizationName)
+ metricsCh <- prometheus.MustNewConstMetric(runningPrebuildsDesc, prometheus.GaugeValue, float64(state.Actual), preset.TemplateName, preset.Name, preset.OrganizationName)
+ metricsCh <- prometheus.MustNewConstMetric(eligiblePrebuildsDesc, prometheus.GaugeValue, float64(state.Eligible), preset.TemplateName, preset.Name, preset.OrganizationName)
+ }
+}
diff --git a/enterprise/coderd/prebuilds/metricscollector_test.go b/enterprise/coderd/prebuilds/metricscollector_test.go
new file mode 100644
index 0000000000000..0387ecea5e1db
--- /dev/null
+++ b/enterprise/coderd/prebuilds/metricscollector_test.go
@@ -0,0 +1,284 @@
+package prebuilds_test
+
+import (
+ "fmt"
+ "slices"
+ "testing"
+
+ "github.com/google/uuid"
+ "github.com/stretchr/testify/require"
+ "tailscale.com/types/ptr"
+
+ "github.com/prometheus/client_golang/prometheus"
+ prometheus_client "github.com/prometheus/client_model/go"
+
+ "cdr.dev/slog/sloggers/slogtest"
+ "github.com/coder/quartz"
+
+ "github.com/coder/coder/v2/coderd/database"
+ "github.com/coder/coder/v2/coderd/database/dbgen"
+ "github.com/coder/coder/v2/coderd/database/dbtestutil"
+ agplprebuilds "github.com/coder/coder/v2/coderd/prebuilds"
+ "github.com/coder/coder/v2/codersdk"
+ "github.com/coder/coder/v2/enterprise/coderd/prebuilds"
+ "github.com/coder/coder/v2/testutil"
+)
+
+func TestMetricsCollector(t *testing.T) {
+ t.Parallel()
+
+ if !dbtestutil.WillUsePostgres() {
+ t.Skip("this test requires postgres")
+ }
+
+ type metricCheck struct {
+ name string
+ value *float64
+ isCounter bool
+ }
+
+ type testCase struct {
+ name string
+ transitions []database.WorkspaceTransition
+ jobStatuses []database.ProvisionerJobStatus
+ initiatorIDs []uuid.UUID
+ ownerIDs []uuid.UUID
+ metrics []metricCheck
+ templateDeleted []bool
+ }
+
+ tests := []testCase{
+ {
+ name: "prebuild created",
+ transitions: allTransitions,
+ jobStatuses: allJobStatuses,
+ initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID},
+ // TODO: reexamine and refactor the test cases and assertions:
+ // * a running prebuild that is not elibible to be claimed currently seems to be eligible.
+ // * a prebuild that was claimed should not be deemed running, not eligible.
+ ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID, uuid.New()},
+ metrics: []metricCheck{
+ {"coderd_prebuilds_created_total", ptr.To(1.0), true},
+ {"coderd_prebuilds_desired", ptr.To(1.0), false},
+ // {"coderd_prebuilds_running", ptr.To(0.0), false},
+ // {"coderd_prebuilds_eligible", ptr.To(0.0), false},
+ },
+ templateDeleted: []bool{false},
+ },
+ {
+ name: "prebuild running",
+ transitions: []database.WorkspaceTransition{database.WorkspaceTransitionStart},
+ jobStatuses: []database.ProvisionerJobStatus{database.ProvisionerJobStatusSucceeded},
+ initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID},
+ ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID},
+ metrics: []metricCheck{
+ {"coderd_prebuilds_created_total", ptr.To(1.0), true},
+ {"coderd_prebuilds_desired", ptr.To(1.0), false},
+ {"coderd_prebuilds_running", ptr.To(1.0), false},
+ {"coderd_prebuilds_eligible", ptr.To(0.0), false},
+ },
+ templateDeleted: []bool{false},
+ },
+ {
+ name: "prebuild failed",
+ transitions: allTransitions,
+ jobStatuses: []database.ProvisionerJobStatus{database.ProvisionerJobStatusFailed},
+ initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID},
+ ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID, uuid.New()},
+ metrics: []metricCheck{
+ {"coderd_prebuilds_created_total", ptr.To(1.0), true},
+ {"coderd_prebuilds_failed_total", ptr.To(1.0), true},
+ {"coderd_prebuilds_desired", ptr.To(1.0), false},
+ {"coderd_prebuilds_running", ptr.To(0.0), false},
+ {"coderd_prebuilds_eligible", ptr.To(0.0), false},
+ },
+ templateDeleted: []bool{false},
+ },
+ {
+ name: "prebuild assigned",
+ transitions: allTransitions,
+ jobStatuses: allJobStatuses,
+ initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID},
+ ownerIDs: []uuid.UUID{uuid.New()},
+ metrics: []metricCheck{
+ {"coderd_prebuilds_created_total", ptr.To(1.0), true},
+ {"coderd_prebuilds_claimed_total", ptr.To(1.0), true},
+ {"coderd_prebuilds_desired", ptr.To(1.0), false},
+ {"coderd_prebuilds_running", ptr.To(0.0), false},
+ {"coderd_prebuilds_eligible", ptr.To(0.0), false},
+ },
+ templateDeleted: []bool{false},
+ },
+ {
+ name: "workspaces that were not created by the prebuilds user are not counted",
+ transitions: allTransitions,
+ jobStatuses: allJobStatuses,
+ initiatorIDs: []uuid.UUID{uuid.New()},
+ ownerIDs: []uuid.UUID{uuid.New()},
+ metrics: []metricCheck{
+ {"coderd_prebuilds_desired", ptr.To(1.0), false},
+ {"coderd_prebuilds_running", ptr.To(0.0), false},
+ {"coderd_prebuilds_eligible", ptr.To(0.0), false},
+ },
+ templateDeleted: []bool{false},
+ },
+ {
+ name: "deleted templates never desire prebuilds",
+ transitions: allTransitions,
+ jobStatuses: allJobStatuses,
+ initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID},
+ ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID, uuid.New()},
+ metrics: []metricCheck{
+ {"coderd_prebuilds_desired", ptr.To(0.0), false},
+ },
+ templateDeleted: []bool{true},
+ },
+ {
+ name: "running prebuilds for deleted templates are still counted, so that they can be deleted",
+ transitions: []database.WorkspaceTransition{database.WorkspaceTransitionStart},
+ jobStatuses: []database.ProvisionerJobStatus{database.ProvisionerJobStatusSucceeded},
+ initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID},
+ ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID},
+ metrics: []metricCheck{
+ {"coderd_prebuilds_running", ptr.To(1.0), false},
+ {"coderd_prebuilds_eligible", ptr.To(0.0), false},
+ },
+ templateDeleted: []bool{true},
+ },
+ }
+ for _, test := range tests {
+ test := test // capture for parallel
+ for _, transition := range test.transitions {
+ transition := transition // capture for parallel
+ for _, jobStatus := range test.jobStatuses {
+ jobStatus := jobStatus // capture for parallel
+ for _, initiatorID := range test.initiatorIDs {
+ initiatorID := initiatorID // capture for parallel
+ for _, ownerID := range test.ownerIDs {
+ ownerID := ownerID // capture for parallel
+ for _, templateDeleted := range test.templateDeleted {
+ templateDeleted := templateDeleted // capture for parallel
+ t.Run(fmt.Sprintf("%v/transition:%s/jobStatus:%s", test.name, transition, jobStatus), func(t *testing.T) {
+ t.Parallel()
+
+ logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
+ t.Cleanup(func() {
+ if t.Failed() {
+ t.Logf("failed to run test: %s", test.name)
+ t.Logf("transition: %s", transition)
+ t.Logf("jobStatus: %s", jobStatus)
+ t.Logf("initiatorID: %s", initiatorID)
+ t.Logf("ownerID: %s", ownerID)
+ t.Logf("templateDeleted: %t", templateDeleted)
+ }
+ })
+ clock := quartz.NewMock(t)
+ db, pubsub := dbtestutil.NewDB(t)
+ reconciler := prebuilds.NewStoreReconciler(db, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t), prometheus.NewRegistry())
+ ctx := testutil.Context(t, testutil.WaitLong)
+
+ createdUsers := []uuid.UUID{agplprebuilds.SystemUserID}
+ for _, user := range slices.Concat(test.ownerIDs, test.initiatorIDs) {
+ if !slices.Contains(createdUsers, user) {
+ dbgen.User(t, db, database.User{
+ ID: user,
+ })
+ createdUsers = append(createdUsers, user)
+ }
+ }
+
+ collector := prebuilds.NewMetricsCollector(db, logger, reconciler)
+ registry := prometheus.NewPedanticRegistry()
+ registry.Register(collector)
+
+ numTemplates := 2
+ for i := 0; i < numTemplates; i++ {
+ org, template := setupTestDBTemplate(t, db, ownerID, templateDeleted)
+ templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, pubsub, org.ID, ownerID, template.ID)
+ preset := setupTestDBPreset(t, db, templateVersionID, 1, uuid.New().String())
+ setupTestDBWorkspace(
+ t, clock, db, pubsub,
+ transition, jobStatus, org.ID, preset, template.ID, templateVersionID, initiatorID, ownerID,
+ )
+ }
+
+ metricsFamilies, err := registry.Gather()
+ require.NoError(t, err)
+
+ templates, err := db.GetTemplates(ctx)
+ require.NoError(t, err)
+ require.Equal(t, numTemplates, len(templates))
+
+ for _, template := range templates {
+ org, err := db.GetOrganizationByID(ctx, template.OrganizationID)
+ require.NoError(t, err)
+ templateVersions, err := db.GetTemplateVersionsByTemplateID(ctx, database.GetTemplateVersionsByTemplateIDParams{
+ TemplateID: template.ID,
+ })
+ require.NoError(t, err)
+ require.Equal(t, 1, len(templateVersions))
+
+ presets, err := db.GetPresetsByTemplateVersionID(ctx, templateVersions[0].ID)
+ require.NoError(t, err)
+ require.Equal(t, 1, len(presets))
+
+ for _, preset := range presets {
+ preset := preset // capture for parallel
+ labels := map[string]string{
+ "template_name": template.Name,
+ "preset_name": preset.Name,
+ "organization_name": org.Name,
+ }
+
+ for _, check := range test.metrics {
+ metric := findMetric(metricsFamilies, check.name, labels)
+ if check.value == nil {
+ continue
+ }
+
+ require.NotNil(t, metric, "metric %s should exist", check.name)
+
+ if check.isCounter {
+ require.Equal(t, *check.value, metric.GetCounter().GetValue(), "counter %s value mismatch", check.name)
+ } else {
+ require.Equal(t, *check.value, metric.GetGauge().GetValue(), "gauge %s value mismatch", check.name)
+ }
+ }
+ }
+ }
+ })
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+func findMetric(metricsFamilies []*prometheus_client.MetricFamily, name string, labels map[string]string) *prometheus_client.Metric {
+ for _, metricFamily := range metricsFamilies {
+ if metricFamily.GetName() != name {
+ continue
+ }
+
+ for _, metric := range metricFamily.GetMetric() {
+ labelPairs := metric.GetLabel()
+
+ // Convert label pairs to map for easier lookup
+ metricLabels := make(map[string]string, len(labelPairs))
+ for _, label := range labelPairs {
+ metricLabels[label.GetName()] = label.GetValue()
+ }
+
+ // Check if all requested labels match
+ for wantName, wantValue := range labels {
+ if metricLabels[wantName] != wantValue {
+ continue
+ }
+ }
+
+ return metric
+ }
+ }
+ return nil
+}
diff --git a/enterprise/coderd/prebuilds/reconcile.go b/enterprise/coderd/prebuilds/reconcile.go
index 081b4223a93c4..134365b65766b 100644
--- a/enterprise/coderd/prebuilds/reconcile.go
+++ b/enterprise/coderd/prebuilds/reconcile.go
@@ -9,6 +9,7 @@ import (
"time"
"github.com/hashicorp/go-multierror"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/coder/quartz"
@@ -31,11 +32,13 @@ import (
)
type StoreReconciler struct {
- store database.Store
- cfg codersdk.PrebuildsConfig
- pubsub pubsub.Pubsub
- logger slog.Logger
- clock quartz.Clock
+ store database.Store
+ cfg codersdk.PrebuildsConfig
+ pubsub pubsub.Pubsub
+ logger slog.Logger
+ clock quartz.Clock
+ registerer prometheus.Registerer
+ metrics *MetricsCollector
cancelFn context.CancelCauseFunc
running atomic.Bool
@@ -45,21 +48,30 @@ type StoreReconciler struct {
var _ prebuilds.ReconciliationOrchestrator = &StoreReconciler{}
-func NewStoreReconciler(
- store database.Store,
+func NewStoreReconciler(store database.Store,
ps pubsub.Pubsub,
cfg codersdk.PrebuildsConfig,
logger slog.Logger,
clock quartz.Clock,
+ registerer prometheus.Registerer,
) *StoreReconciler {
- return &StoreReconciler{
- store: store,
- pubsub: ps,
- logger: logger,
- cfg: cfg,
- clock: clock,
- done: make(chan struct{}, 1),
+ reconciler := &StoreReconciler{
+ store: store,
+ pubsub: ps,
+ logger: logger,
+ cfg: cfg,
+ clock: clock,
+ registerer: registerer,
+ done: make(chan struct{}, 1),
}
+
+ reconciler.metrics = NewMetricsCollector(store, logger, reconciler)
+ if err := registerer.Register(reconciler.metrics); err != nil {
+ // If the registerer fails to register the metrics collector, it's not fatal.
+ logger.Error(context.Background(), "failed to register prometheus metrics", slog.Error(err))
+ }
+
+ return reconciler
}
func (c *StoreReconciler) Run(ctx context.Context) {
@@ -128,6 +140,17 @@ func (c *StoreReconciler) Stop(ctx context.Context, cause error) {
return
}
+ // Unregister the metrics collector.
+ if c.metrics != nil && c.registerer != nil {
+ if !c.registerer.Unregister(c.metrics) {
+ // The API doesn't allow us to know why the de-registration failed, but it's not very consequential.
+ // The only time this would be an issue is if the premium license is removed, leading to the feature being
+ // disabled (and consequently this Stop method being called), and then adding a new license which enables the
+ // feature again. If the metrics cannot be registered, it'll log an error from NewStoreReconciler.
+ c.logger.Warn(context.Background(), "failed to unregister metrics collector")
+ }
+ }
+
// If the reconciler is not running, there's nothing else to do.
if !c.running.Load() {
return
diff --git a/enterprise/coderd/prebuilds/reconcile_test.go b/enterprise/coderd/prebuilds/reconcile_test.go
index 5c1ffe993ec42..3c18e557152bb 100644
--- a/enterprise/coderd/prebuilds/reconcile_test.go
+++ b/enterprise/coderd/prebuilds/reconcile_test.go
@@ -8,6 +8,8 @@ import (
"testing"
"time"
+ "github.com/prometheus/client_golang/prometheus"
+
"github.com/coder/coder/v2/coderd/util/slice"
"github.com/google/uuid"
@@ -45,7 +47,7 @@ func TestNoReconciliationActionsIfNoPresets(t *testing.T) {
ReconciliationInterval: serpent.Duration(testutil.WaitLong),
}
logger := testutil.Logger(t)
- controller := prebuilds.NewStoreReconciler(db, ps, cfg, logger, quartz.NewMock(t))
+ controller := prebuilds.NewStoreReconciler(db, ps, cfg, logger, quartz.NewMock(t), prometheus.NewRegistry())
// given a template version with no presets
org := dbgen.Organization(t, db, database.Organization{})
@@ -90,7 +92,7 @@ func TestNoReconciliationActionsIfNoPrebuilds(t *testing.T) {
ReconciliationInterval: serpent.Duration(testutil.WaitLong),
}
logger := testutil.Logger(t)
- controller := prebuilds.NewStoreReconciler(db, ps, cfg, logger, quartz.NewMock(t))
+ controller := prebuilds.NewStoreReconciler(db, ps, cfg, logger, quartz.NewMock(t), prometheus.NewRegistry())
// given there are presets, but no prebuilds
org := dbgen.Organization(t, db, database.Organization{})
@@ -317,7 +319,7 @@ func TestPrebuildReconciliation(t *testing.T) {
t, &slogtest.Options{IgnoreErrors: true},
).Leveled(slog.LevelDebug)
db, pubSub := dbtestutil.NewDB(t)
- controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, quartz.NewMock(t))
+ controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, quartz.NewMock(t), prometheus.NewRegistry())
ownerID := uuid.New()
dbgen.User(t, db, database.User{
@@ -419,7 +421,7 @@ func TestMultiplePresetsPerTemplateVersion(t *testing.T) {
t, &slogtest.Options{IgnoreErrors: true},
).Leveled(slog.LevelDebug)
db, pubSub := dbtestutil.NewDB(t)
- controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, quartz.NewMock(t))
+ controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, quartz.NewMock(t), prometheus.NewRegistry())
ownerID := uuid.New()
dbgen.User(t, db, database.User{
@@ -503,7 +505,7 @@ func TestInvalidPreset(t *testing.T) {
t, &slogtest.Options{IgnoreErrors: true},
).Leveled(slog.LevelDebug)
db, pubSub := dbtestutil.NewDB(t)
- controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, quartz.NewMock(t))
+ controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, quartz.NewMock(t), prometheus.NewRegistry())
ownerID := uuid.New()
dbgen.User(t, db, database.User{
@@ -575,7 +577,7 @@ func TestRunLoop(t *testing.T) {
t, &slogtest.Options{IgnoreErrors: true},
).Leveled(slog.LevelDebug)
db, pubSub := dbtestutil.NewDB(t)
- reconciler := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, clock)
+ reconciler := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, clock, prometheus.NewRegistry())
ownerID := uuid.New()
dbgen.User(t, db, database.User{
@@ -705,7 +707,7 @@ func TestFailedBuildBackoff(t *testing.T) {
t, &slogtest.Options{IgnoreErrors: true},
).Leveled(slog.LevelDebug)
db, ps := dbtestutil.NewDB(t)
- reconciler := prebuilds.NewStoreReconciler(db, ps, cfg, logger, clock)
+ reconciler := prebuilds.NewStoreReconciler(db, ps, cfg, logger, clock, prometheus.NewRegistry())
// Given: an active template version with presets and prebuilds configured.
const desiredInstances = 2
@@ -820,7 +822,7 @@ func TestReconciliationLock(t *testing.T) {
codersdk.PrebuildsConfig{},
slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug),
quartz.NewMock(t),
- )
+ prometheus.NewRegistry())
reconciler.WithReconciliationLock(ctx, logger, func(_ context.Context, _ database.Store) error {
lockObtained := mutex.TryLock()
// As long as the postgres lock is held, this mutex should always be unlocked when we get here.
From 7b78523e09750156b68f2879fccf65261f79cf15 Mon Sep 17 00:00:00 2001
From: Danny Kopping
Date: Thu, 24 Apr 2025 16:29:11 +0200
Subject: [PATCH 2/6] Renaming prebuilds to "prebuilt workspaces" in metrics
Signed-off-by: Danny Kopping
---
.../coderd/prebuilds/metricscollector.go | 31 +++++-------
.../coderd/prebuilds/metricscollector_test.go | 50 +++++++++----------
2 files changed, 37 insertions(+), 44 deletions(-)
diff --git a/enterprise/coderd/prebuilds/metricscollector.go b/enterprise/coderd/prebuilds/metricscollector.go
index 507dbddd383ef..48bae252c3b02 100644
--- a/enterprise/coderd/prebuilds/metricscollector.go
+++ b/enterprise/coderd/prebuilds/metricscollector.go
@@ -16,44 +16,38 @@ import (
var (
labels = []string{"template_name", "preset_name", "organization_name"}
createdPrebuildsDesc = prometheus.NewDesc(
- "coderd_prebuilds_created_total",
- "The number of prebuilds that have been created to meet the desired count set by presets.",
+ "coderd_prebuilt_workspaces_created_total",
+ "The number of prebuilt workspaces that have been created to meet the desired count set by presets.",
labels,
nil,
)
failedPrebuildsDesc = prometheus.NewDesc(
- "coderd_prebuilds_failed_total",
- "The number of prebuilds that failed to build during creation.",
+ "coderd_prebuilt_workspaces_failed_total",
+ "The number of prebuilt workspaces that failed to build.",
labels,
nil,
)
claimedPrebuildsDesc = prometheus.NewDesc(
- "coderd_prebuilds_claimed_total",
- "The number of prebuilds that were claimed by a user. Each count means that a user created a workspace using a preset and was assigned a prebuild instead of a brand new workspace.",
- labels,
- nil,
- )
- usedPresetsDesc = prometheus.NewDesc(
- "coderd_prebuilds_used_presets",
- "The number of times a preset was used to build a prebuild.",
+ "coderd_prebuilt_workspaces_claimed_total",
+ "The number of prebuilt workspaces that were claimed by a user. Each count means that a user created a workspace using a preset and claimed a prebuilt workspace instead of a brand new workspace being created.",
labels,
nil,
)
desiredPrebuildsDesc = prometheus.NewDesc(
- "coderd_prebuilds_desired",
- "The number of prebuilds desired by each preset of each template.",
+ "coderd_prebuilt_workspaces_desired",
+ "The number of prebuilt workspaces desired by each preset of each template.",
labels,
nil,
)
runningPrebuildsDesc = prometheus.NewDesc(
- "coderd_prebuilds_running",
- "The number of prebuilds that are currently running. Running prebuilds have successfully started, but they may not be ready to be claimed by a user yet.",
+ "coderd_prebuilt_workspaces_running",
+ "The number of prebuilt workspaces that are currently running. Running prebuilt workspaces have successfully started, but includes both eligible and ineligible workspaces.",
labels,
nil,
)
eligiblePrebuildsDesc = prometheus.NewDesc(
- "coderd_prebuilds_eligible",
- "The number of eligible prebuilds. Eligible prebuilds are prebuilds that are ready to be claimed by a user.",
+ "coderd_prebuilt_workspaces_eligible",
+ "The number of eligible prebuilt workspaces. Eligible prebuilt workspaces are ones whose agent is marked 'ready', and can be claimed by a user.",
labels,
nil,
)
@@ -79,7 +73,6 @@ func (*MetricsCollector) Describe(descCh chan<- *prometheus.Desc) {
descCh <- createdPrebuildsDesc
descCh <- failedPrebuildsDesc
descCh <- claimedPrebuildsDesc
- descCh <- usedPresetsDesc
descCh <- desiredPrebuildsDesc
descCh <- runningPrebuildsDesc
descCh <- eligiblePrebuildsDesc
diff --git a/enterprise/coderd/prebuilds/metricscollector_test.go b/enterprise/coderd/prebuilds/metricscollector_test.go
index 0387ecea5e1db..29e0e371418be 100644
--- a/enterprise/coderd/prebuilds/metricscollector_test.go
+++ b/enterprise/coderd/prebuilds/metricscollector_test.go
@@ -58,10 +58,10 @@ func TestMetricsCollector(t *testing.T) {
// * a prebuild that was claimed should not be deemed running, not eligible.
ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID, uuid.New()},
metrics: []metricCheck{
- {"coderd_prebuilds_created_total", ptr.To(1.0), true},
- {"coderd_prebuilds_desired", ptr.To(1.0), false},
- // {"coderd_prebuilds_running", ptr.To(0.0), false},
- // {"coderd_prebuilds_eligible", ptr.To(0.0), false},
+ {"coderd_prebuilt_workspaces_created_total", ptr.To(1.0), true},
+ {"coderd_prebuilt_workspaces_desired", ptr.To(1.0), false},
+ {"coderd_prebuilt_workspaces_running", ptr.To(0.0), false},
+ {"coderd_prebuilt_workspaces_eligible", ptr.To(0.0), false},
},
templateDeleted: []bool{false},
},
@@ -72,10 +72,10 @@ func TestMetricsCollector(t *testing.T) {
initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID},
ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID},
metrics: []metricCheck{
- {"coderd_prebuilds_created_total", ptr.To(1.0), true},
- {"coderd_prebuilds_desired", ptr.To(1.0), false},
- {"coderd_prebuilds_running", ptr.To(1.0), false},
- {"coderd_prebuilds_eligible", ptr.To(0.0), false},
+ {"coderd_prebuilt_workspaces_created_total", ptr.To(1.0), true},
+ {"coderd_prebuilt_workspaces_desired", ptr.To(1.0), false},
+ {"coderd_prebuilt_workspaces_running", ptr.To(1.0), false},
+ {"coderd_prebuilt_workspaces_eligible", ptr.To(0.0), false},
},
templateDeleted: []bool{false},
},
@@ -86,26 +86,26 @@ func TestMetricsCollector(t *testing.T) {
initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID},
ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID, uuid.New()},
metrics: []metricCheck{
- {"coderd_prebuilds_created_total", ptr.To(1.0), true},
- {"coderd_prebuilds_failed_total", ptr.To(1.0), true},
- {"coderd_prebuilds_desired", ptr.To(1.0), false},
- {"coderd_prebuilds_running", ptr.To(0.0), false},
- {"coderd_prebuilds_eligible", ptr.To(0.0), false},
+ {"coderd_prebuilt_workspaces_created_total", ptr.To(1.0), true},
+ {"coderd_prebuilt_workspaces_failed_total", ptr.To(1.0), true},
+ {"coderd_prebuilt_workspaces_desired", ptr.To(1.0), false},
+ {"coderd_prebuilt_workspaces_running", ptr.To(0.0), false},
+ {"coderd_prebuilt_workspaces_eligible", ptr.To(0.0), false},
},
templateDeleted: []bool{false},
},
{
- name: "prebuild assigned",
+ name: "prebuild claimed",
transitions: allTransitions,
jobStatuses: allJobStatuses,
initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID},
ownerIDs: []uuid.UUID{uuid.New()},
metrics: []metricCheck{
- {"coderd_prebuilds_created_total", ptr.To(1.0), true},
- {"coderd_prebuilds_claimed_total", ptr.To(1.0), true},
- {"coderd_prebuilds_desired", ptr.To(1.0), false},
- {"coderd_prebuilds_running", ptr.To(0.0), false},
- {"coderd_prebuilds_eligible", ptr.To(0.0), false},
+ {"coderd_prebuilt_workspaces_created_total", ptr.To(1.0), true},
+ {"coderd_prebuilt_workspaces_claimed_total", ptr.To(1.0), true},
+ {"coderd_prebuilt_workspaces_desired", ptr.To(1.0), false},
+ {"coderd_prebuilt_workspaces_running", ptr.To(0.0), false},
+ {"coderd_prebuilt_workspaces_eligible", ptr.To(0.0), false},
},
templateDeleted: []bool{false},
},
@@ -116,9 +116,9 @@ func TestMetricsCollector(t *testing.T) {
initiatorIDs: []uuid.UUID{uuid.New()},
ownerIDs: []uuid.UUID{uuid.New()},
metrics: []metricCheck{
- {"coderd_prebuilds_desired", ptr.To(1.0), false},
- {"coderd_prebuilds_running", ptr.To(0.0), false},
- {"coderd_prebuilds_eligible", ptr.To(0.0), false},
+ {"coderd_prebuilt_workspaces_desired", ptr.To(1.0), false},
+ {"coderd_prebuilt_workspaces_running", ptr.To(0.0), false},
+ {"coderd_prebuilt_workspaces_eligible", ptr.To(0.0), false},
},
templateDeleted: []bool{false},
},
@@ -129,7 +129,7 @@ func TestMetricsCollector(t *testing.T) {
initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID},
ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID, uuid.New()},
metrics: []metricCheck{
- {"coderd_prebuilds_desired", ptr.To(0.0), false},
+ {"coderd_prebuilt_workspaces_desired", ptr.To(0.0), false},
},
templateDeleted: []bool{true},
},
@@ -140,8 +140,8 @@ func TestMetricsCollector(t *testing.T) {
initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID},
ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID},
metrics: []metricCheck{
- {"coderd_prebuilds_running", ptr.To(1.0), false},
- {"coderd_prebuilds_eligible", ptr.To(0.0), false},
+ {"coderd_prebuilt_workspaces_running", ptr.To(1.0), false},
+ {"coderd_prebuilt_workspaces_eligible", ptr.To(0.0), false},
},
templateDeleted: []bool{true},
},
From 547c1b049044e88cb2a2c641d6f195c4894f5949 Mon Sep 17 00:00:00 2001
From: Danny Kopping
Date: Thu, 24 Apr 2025 17:00:17 +0200
Subject: [PATCH 3/6] Eligibility tests
Signed-off-by: Danny Kopping
---
enterprise/coderd/prebuilds/claim_test.go | 2 +-
.../coderd/prebuilds/metricscollector_test.go | 203 +++++++++++-------
enterprise/coderd/prebuilds/reconcile_test.go | 30 ++-
3 files changed, 155 insertions(+), 80 deletions(-)
diff --git a/enterprise/coderd/prebuilds/claim_test.go b/enterprise/coderd/prebuilds/claim_test.go
index e8d66fae18a07..1573aab9387f1 100644
--- a/enterprise/coderd/prebuilds/claim_test.go
+++ b/enterprise/coderd/prebuilds/claim_test.go
@@ -420,7 +420,7 @@ func TestClaimPrebuild_CheckDifferentErrors(t *testing.T) {
EntitlementsUpdateInterval: time.Second,
})
- reconciler := prebuilds.NewStoreReconciler(errorStore, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t))
+ reconciler := prebuilds.NewStoreReconciler(errorStore, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t), api.PrometheusRegistry)
var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer(errorStore)
api.AGPL.PrebuildsClaimer.Store(&claimer)
diff --git a/enterprise/coderd/prebuilds/metricscollector_test.go b/enterprise/coderd/prebuilds/metricscollector_test.go
index 29e0e371418be..859509ced6635 100644
--- a/enterprise/coderd/prebuilds/metricscollector_test.go
+++ b/enterprise/coderd/prebuilds/metricscollector_test.go
@@ -45,25 +45,26 @@ func TestMetricsCollector(t *testing.T) {
ownerIDs []uuid.UUID
metrics []metricCheck
templateDeleted []bool
+ eligible []bool
}
tests := []testCase{
{
- name: "prebuild created",
+ name: "prebuild provisioned but not completed",
transitions: allTransitions,
- jobStatuses: allJobStatuses,
+ jobStatuses: allJobStatusesExcept(database.ProvisionerJobStatusPending, database.ProvisionerJobStatusRunning, database.ProvisionerJobStatusCanceling),
initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID},
- // TODO: reexamine and refactor the test cases and assertions:
- // * a running prebuild that is not elibible to be claimed currently seems to be eligible.
- // * a prebuild that was claimed should not be deemed running, not eligible.
- ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID, uuid.New()},
+ ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID},
metrics: []metricCheck{
{"coderd_prebuilt_workspaces_created_total", ptr.To(1.0), true},
+ {"coderd_prebuilt_workspaces_claimed_total", ptr.To(0.0), true},
+ {"coderd_prebuilt_workspaces_failed_total", ptr.To(0.0), true},
{"coderd_prebuilt_workspaces_desired", ptr.To(1.0), false},
{"coderd_prebuilt_workspaces_running", ptr.To(0.0), false},
{"coderd_prebuilt_workspaces_eligible", ptr.To(0.0), false},
},
templateDeleted: []bool{false},
+ eligible: []bool{false},
},
{
name: "prebuild running",
@@ -73,11 +74,14 @@ func TestMetricsCollector(t *testing.T) {
ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID},
metrics: []metricCheck{
{"coderd_prebuilt_workspaces_created_total", ptr.To(1.0), true},
+ {"coderd_prebuilt_workspaces_claimed_total", ptr.To(0.0), true},
+ {"coderd_prebuilt_workspaces_failed_total", ptr.To(0.0), true},
{"coderd_prebuilt_workspaces_desired", ptr.To(1.0), false},
{"coderd_prebuilt_workspaces_running", ptr.To(1.0), false},
{"coderd_prebuilt_workspaces_eligible", ptr.To(0.0), false},
},
templateDeleted: []bool{false},
+ eligible: []bool{false},
},
{
name: "prebuild failed",
@@ -93,6 +97,41 @@ func TestMetricsCollector(t *testing.T) {
{"coderd_prebuilt_workspaces_eligible", ptr.To(0.0), false},
},
templateDeleted: []bool{false},
+ eligible: []bool{false},
+ },
+ {
+ name: "prebuild eligible",
+ transitions: []database.WorkspaceTransition{database.WorkspaceTransitionStart},
+ jobStatuses: []database.ProvisionerJobStatus{database.ProvisionerJobStatusSucceeded},
+ initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID},
+ ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID},
+ metrics: []metricCheck{
+ {"coderd_prebuilt_workspaces_created_total", ptr.To(1.0), true},
+ {"coderd_prebuilt_workspaces_claimed_total", ptr.To(0.0), true},
+ {"coderd_prebuilt_workspaces_failed_total", ptr.To(0.0), true},
+ {"coderd_prebuilt_workspaces_desired", ptr.To(1.0), false},
+ {"coderd_prebuilt_workspaces_running", ptr.To(1.0), false},
+ {"coderd_prebuilt_workspaces_eligible", ptr.To(1.0), false},
+ },
+ templateDeleted: []bool{false},
+ eligible: []bool{true},
+ },
+ {
+ name: "prebuild ineligible",
+ transitions: allTransitions,
+ jobStatuses: allJobStatusesExcept(database.ProvisionerJobStatusSucceeded),
+ initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID},
+ ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID},
+ metrics: []metricCheck{
+ {"coderd_prebuilt_workspaces_created_total", ptr.To(1.0), true},
+ {"coderd_prebuilt_workspaces_claimed_total", ptr.To(0.0), true},
+ {"coderd_prebuilt_workspaces_failed_total", ptr.To(0.0), true},
+ {"coderd_prebuilt_workspaces_desired", ptr.To(1.0), false},
+ {"coderd_prebuilt_workspaces_running", ptr.To(1.0), false},
+ {"coderd_prebuilt_workspaces_eligible", ptr.To(0.0), false},
+ },
+ templateDeleted: []bool{false},
+ eligible: []bool{false},
},
{
name: "prebuild claimed",
@@ -108,6 +147,7 @@ func TestMetricsCollector(t *testing.T) {
{"coderd_prebuilt_workspaces_eligible", ptr.To(0.0), false},
},
templateDeleted: []bool{false},
+ eligible: []bool{false},
},
{
name: "workspaces that were not created by the prebuilds user are not counted",
@@ -121,6 +161,7 @@ func TestMetricsCollector(t *testing.T) {
{"coderd_prebuilt_workspaces_eligible", ptr.To(0.0), false},
},
templateDeleted: []bool{false},
+ eligible: []bool{false},
},
{
name: "deleted templates never desire prebuilds",
@@ -132,6 +173,7 @@ func TestMetricsCollector(t *testing.T) {
{"coderd_prebuilt_workspaces_desired", ptr.To(0.0), false},
},
templateDeleted: []bool{true},
+ eligible: []bool{false},
},
{
name: "running prebuilds for deleted templates are still counted, so that they can be deleted",
@@ -144,6 +186,7 @@ func TestMetricsCollector(t *testing.T) {
{"coderd_prebuilt_workspaces_eligible", ptr.To(0.0), false},
},
templateDeleted: []bool{true},
+ eligible: []bool{false},
},
}
for _, test := range tests {
@@ -158,95 +201,99 @@ func TestMetricsCollector(t *testing.T) {
ownerID := ownerID // capture for parallel
for _, templateDeleted := range test.templateDeleted {
templateDeleted := templateDeleted // capture for parallel
- t.Run(fmt.Sprintf("%v/transition:%s/jobStatus:%s", test.name, transition, jobStatus), func(t *testing.T) {
- t.Parallel()
+ for _, eligible := range test.eligible {
+ eligible := eligible // capture for parallel
+ t.Run(fmt.Sprintf("%v/transition:%s/jobStatus:%s", test.name, transition, jobStatus), func(t *testing.T) {
+ t.Parallel()
- logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
- t.Cleanup(func() {
- if t.Failed() {
- t.Logf("failed to run test: %s", test.name)
- t.Logf("transition: %s", transition)
- t.Logf("jobStatus: %s", jobStatus)
- t.Logf("initiatorID: %s", initiatorID)
- t.Logf("ownerID: %s", ownerID)
- t.Logf("templateDeleted: %t", templateDeleted)
- }
- })
- clock := quartz.NewMock(t)
- db, pubsub := dbtestutil.NewDB(t)
- reconciler := prebuilds.NewStoreReconciler(db, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t), prometheus.NewRegistry())
- ctx := testutil.Context(t, testutil.WaitLong)
+ logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
+ t.Cleanup(func() {
+ if t.Failed() {
+ t.Logf("failed to run test: %s", test.name)
+ t.Logf("transition: %s", transition)
+ t.Logf("jobStatus: %s", jobStatus)
+ t.Logf("initiatorID: %s", initiatorID)
+ t.Logf("ownerID: %s", ownerID)
+ t.Logf("templateDeleted: %t", templateDeleted)
+ }
+ })
+ clock := quartz.NewMock(t)
+ db, pubsub := dbtestutil.NewDB(t)
+ reconciler := prebuilds.NewStoreReconciler(db, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t), prometheus.NewRegistry())
+ ctx := testutil.Context(t, testutil.WaitLong)
- createdUsers := []uuid.UUID{agplprebuilds.SystemUserID}
- for _, user := range slices.Concat(test.ownerIDs, test.initiatorIDs) {
- if !slices.Contains(createdUsers, user) {
- dbgen.User(t, db, database.User{
- ID: user,
- })
- createdUsers = append(createdUsers, user)
+ createdUsers := []uuid.UUID{agplprebuilds.SystemUserID}
+ for _, user := range slices.Concat(test.ownerIDs, test.initiatorIDs) {
+ if !slices.Contains(createdUsers, user) {
+ dbgen.User(t, db, database.User{
+ ID: user,
+ })
+ createdUsers = append(createdUsers, user)
+ }
}
- }
- collector := prebuilds.NewMetricsCollector(db, logger, reconciler)
- registry := prometheus.NewPedanticRegistry()
- registry.Register(collector)
+ collector := prebuilds.NewMetricsCollector(db, logger, reconciler)
+ registry := prometheus.NewPedanticRegistry()
+ registry.Register(collector)
- numTemplates := 2
- for i := 0; i < numTemplates; i++ {
- org, template := setupTestDBTemplate(t, db, ownerID, templateDeleted)
- templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, pubsub, org.ID, ownerID, template.ID)
- preset := setupTestDBPreset(t, db, templateVersionID, 1, uuid.New().String())
- setupTestDBWorkspace(
- t, clock, db, pubsub,
- transition, jobStatus, org.ID, preset, template.ID, templateVersionID, initiatorID, ownerID,
- )
- }
-
- metricsFamilies, err := registry.Gather()
- require.NoError(t, err)
-
- templates, err := db.GetTemplates(ctx)
- require.NoError(t, err)
- require.Equal(t, numTemplates, len(templates))
+ numTemplates := 2
+ for i := 0; i < numTemplates; i++ {
+ org, template := setupTestDBTemplate(t, db, ownerID, templateDeleted)
+ templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, pubsub, org.ID, ownerID, template.ID)
+ preset := setupTestDBPreset(t, db, templateVersionID, 1, uuid.New().String())
+ workspace := setupTestDBWorkspace(
+ t, clock, db, pubsub,
+ transition, jobStatus, org.ID, preset, template.ID, templateVersionID, initiatorID, ownerID,
+ )
+ setupTestDBWorkspaceAgent(t, db, workspace.ID, eligible)
+ }
- for _, template := range templates {
- org, err := db.GetOrganizationByID(ctx, template.OrganizationID)
- require.NoError(t, err)
- templateVersions, err := db.GetTemplateVersionsByTemplateID(ctx, database.GetTemplateVersionsByTemplateIDParams{
- TemplateID: template.ID,
- })
+ metricsFamilies, err := registry.Gather()
require.NoError(t, err)
- require.Equal(t, 1, len(templateVersions))
- presets, err := db.GetPresetsByTemplateVersionID(ctx, templateVersions[0].ID)
+ templates, err := db.GetTemplates(ctx)
require.NoError(t, err)
- require.Equal(t, 1, len(presets))
+ require.Equal(t, numTemplates, len(templates))
- for _, preset := range presets {
- preset := preset // capture for parallel
- labels := map[string]string{
- "template_name": template.Name,
- "preset_name": preset.Name,
- "organization_name": org.Name,
- }
+ for _, template := range templates {
+ org, err := db.GetOrganizationByID(ctx, template.OrganizationID)
+ require.NoError(t, err)
+ templateVersions, err := db.GetTemplateVersionsByTemplateID(ctx, database.GetTemplateVersionsByTemplateIDParams{
+ TemplateID: template.ID,
+ })
+ require.NoError(t, err)
+ require.Equal(t, 1, len(templateVersions))
+
+ presets, err := db.GetPresetsByTemplateVersionID(ctx, templateVersions[0].ID)
+ require.NoError(t, err)
+ require.Equal(t, 1, len(presets))
- for _, check := range test.metrics {
- metric := findMetric(metricsFamilies, check.name, labels)
- if check.value == nil {
- continue
+ for _, preset := range presets {
+ preset := preset // capture for parallel
+ labels := map[string]string{
+ "template_name": template.Name,
+ "preset_name": preset.Name,
+ "organization_name": org.Name,
}
- require.NotNil(t, metric, "metric %s should exist", check.name)
+ for _, check := range test.metrics {
+ metric := findMetric(metricsFamilies, check.name, labels)
+ if check.value == nil {
+ continue
+ }
- if check.isCounter {
- require.Equal(t, *check.value, metric.GetCounter().GetValue(), "counter %s value mismatch", check.name)
- } else {
- require.Equal(t, *check.value, metric.GetGauge().GetValue(), "gauge %s value mismatch", check.name)
+ require.NotNil(t, metric, "metric %s should exist", check.name)
+
+ if check.isCounter {
+ require.Equal(t, *check.value, metric.GetCounter().GetValue(), "counter %s value mismatch", check.name)
+ } else {
+ require.Equal(t, *check.value, metric.GetGauge().GetValue(), "gauge %s value mismatch", check.name)
+ }
}
}
}
- }
- })
+ })
+ }
}
}
}
diff --git a/enterprise/coderd/prebuilds/reconcile_test.go b/enterprise/coderd/prebuilds/reconcile_test.go
index 3c18e557152bb..881aebdbbc893 100644
--- a/enterprise/coderd/prebuilds/reconcile_test.go
+++ b/enterprise/coderd/prebuilds/reconcile_test.go
@@ -10,6 +10,7 @@ import (
"github.com/prometheus/client_golang/prometheus"
+ "github.com/coder/coder/v2/coderd/database/dbtime"
"github.com/coder/coder/v2/coderd/util/slice"
"github.com/google/uuid"
@@ -1011,6 +1012,29 @@ func setupTestDBWorkspace(
return workspace
}
+func setupTestDBWorkspaceAgent(t *testing.T, db database.Store, workspaceID uuid.UUID, eligible bool) database.WorkspaceAgent {
+ build, err := db.GetLatestWorkspaceBuildByWorkspaceID(t.Context(), workspaceID)
+ require.NoError(t, err)
+
+ res := dbgen.WorkspaceResource(t, db, database.WorkspaceResource{JobID: build.JobID})
+ agent := dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{
+ ResourceID: res.ID,
+ })
+
+ // A prebuilt workspace is considered eligible when its agent is in a "ready" lifecycle state.
+ // i.e. connected to the control plane and all startup scripts have run.
+ if eligible {
+ require.NoError(t, db.UpdateWorkspaceAgentLifecycleStateByID(t.Context(), database.UpdateWorkspaceAgentLifecycleStateByIDParams{
+ ID: agent.ID,
+ LifecycleState: database.WorkspaceAgentLifecycleStateReady,
+ StartedAt: sql.NullTime{Time: dbtime.Now().Add(-time.Minute), Valid: true},
+ ReadyAt: sql.NullTime{Time: dbtime.Now(), Valid: true},
+ }))
+ }
+
+ return agent
+}
+
var allTransitions = []database.WorkspaceTransition{
database.WorkspaceTransitionStart,
database.WorkspaceTransitionStop,
@@ -1026,4 +1050,8 @@ var allJobStatuses = []database.ProvisionerJobStatus{
database.ProvisionerJobStatusCanceling,
}
-// TODO (sasswart): test mutual exclusion
+func allJobStatusesExcept(except ...database.ProvisionerJobStatus) []database.ProvisionerJobStatus {
+ return slice.Filter(except, func(status database.ProvisionerJobStatus) bool {
+ return !slice.Contains(allJobStatuses, status)
+ })
+}
From 131307a301919fda7232e68133d3838535da7dc7 Mon Sep 17 00:00:00 2001
From: Danny Kopping
Date: Fri, 25 Apr 2025 09:13:11 +0200
Subject: [PATCH 4/6] Review & linter feedback
Signed-off-by: Danny Kopping
---
enterprise/coderd/prebuilds/metricscollector.go | 2 +-
enterprise/coderd/prebuilds/reconcile_test.go | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/enterprise/coderd/prebuilds/metricscollector.go b/enterprise/coderd/prebuilds/metricscollector.go
index 48bae252c3b02..15e62904e9dc9 100644
--- a/enterprise/coderd/prebuilds/metricscollector.go
+++ b/enterprise/coderd/prebuilds/metricscollector.go
@@ -79,9 +79,9 @@ func (*MetricsCollector) Describe(descCh chan<- *prometheus.Desc) {
}
func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) {
+ // nolint:gocritic // We need to set an authz context to read metrics from the db.
ctx, cancel := context.WithTimeout(dbauthz.AsPrebuildsOrchestrator(context.Background()), 10*time.Second)
defer cancel()
- // nolint:gocritic // just until we get back to this
prebuildMetrics, err := mc.database.GetPrebuildMetrics(ctx)
if err != nil {
mc.logger.Error(ctx, "failed to get prebuild metrics", slog.Error(err))
diff --git a/enterprise/coderd/prebuilds/reconcile_test.go b/enterprise/coderd/prebuilds/reconcile_test.go
index 881aebdbbc893..9783b215f185b 100644
--- a/enterprise/coderd/prebuilds/reconcile_test.go
+++ b/enterprise/coderd/prebuilds/reconcile_test.go
@@ -1012,6 +1012,7 @@ func setupTestDBWorkspace(
return workspace
}
+// nolint:revive // It's a control flag, but this is a test.
func setupTestDBWorkspaceAgent(t *testing.T, db database.Store, workspaceID uuid.UUID, eligible bool) database.WorkspaceAgent {
build, err := db.GetLatestWorkspaceBuildByWorkspaceID(t.Context(), workspaceID)
require.NoError(t, err)
From 7287bd8ce9236767808ed15a6461211c2c9fc530 Mon Sep 17 00:00:00 2001
From: Danny Kopping
Date: Fri, 25 Apr 2025 09:48:54 +0200
Subject: [PATCH 5/6] Reword metric descriptions for clarity
Signed-off-by: Danny Kopping
---
enterprise/coderd/prebuilds/metricscollector.go | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/enterprise/coderd/prebuilds/metricscollector.go b/enterprise/coderd/prebuilds/metricscollector.go
index 15e62904e9dc9..0d782d609b50b 100644
--- a/enterprise/coderd/prebuilds/metricscollector.go
+++ b/enterprise/coderd/prebuilds/metricscollector.go
@@ -17,37 +17,41 @@ var (
labels = []string{"template_name", "preset_name", "organization_name"}
createdPrebuildsDesc = prometheus.NewDesc(
"coderd_prebuilt_workspaces_created_total",
- "The number of prebuilt workspaces that have been created to meet the desired count set by presets.",
+ "Total number of prebuilt workspaces that have been created to meet the desired instance count of each " +
+ "template preset.",
labels,
nil,
)
failedPrebuildsDesc = prometheus.NewDesc(
"coderd_prebuilt_workspaces_failed_total",
- "The number of prebuilt workspaces that failed to build.",
+ "Total number of prebuilt workspaces that failed to build.",
labels,
nil,
)
claimedPrebuildsDesc = prometheus.NewDesc(
"coderd_prebuilt_workspaces_claimed_total",
- "The number of prebuilt workspaces that were claimed by a user. Each count means that a user created a workspace using a preset and claimed a prebuilt workspace instead of a brand new workspace being created.",
+ "Total number of prebuilt workspaces which were claimed by users. Claiming refers to creating a workspace "+
+ "with a preset selected for which eligible prebuilt workspaces are available and one is reassigned to a user.",
labels,
nil,
)
desiredPrebuildsDesc = prometheus.NewDesc(
"coderd_prebuilt_workspaces_desired",
- "The number of prebuilt workspaces desired by each preset of each template.",
+ "Target number of prebuilt workspaces that should be available for each template preset.",
labels,
nil,
)
runningPrebuildsDesc = prometheus.NewDesc(
"coderd_prebuilt_workspaces_running",
- "The number of prebuilt workspaces that are currently running. Running prebuilt workspaces have successfully started, but includes both eligible and ineligible workspaces.",
+ "Current number of prebuilt workspaces that are in a running state. These workspaces have started "+
+ "successfully but may not yet be claimable by users (see coderd_prebuilt_workspaces_eligible).",
labels,
nil,
)
eligiblePrebuildsDesc = prometheus.NewDesc(
"coderd_prebuilt_workspaces_eligible",
- "The number of eligible prebuilt workspaces. Eligible prebuilt workspaces are ones whose agent is marked 'ready', and can be claimed by a user.",
+ "Current number of prebuilt workspaces that are eligible to be claimed by users. These are workspaces that " +
+ "have completed their build process with their agent reporting 'ready' status.",
labels,
nil,
)
From 1d66ade3fdfebb5cafc88eb6468e61ba494e5622 Mon Sep 17 00:00:00 2001
From: Danny Kopping
Date: Fri, 25 Apr 2025 09:53:55 +0200
Subject: [PATCH 6/6] make fmt
Signed-off-by: Danny Kopping
---
enterprise/coderd/prebuilds/metricscollector.go | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/enterprise/coderd/prebuilds/metricscollector.go b/enterprise/coderd/prebuilds/metricscollector.go
index 0d782d609b50b..7b55227effffa 100644
--- a/enterprise/coderd/prebuilds/metricscollector.go
+++ b/enterprise/coderd/prebuilds/metricscollector.go
@@ -17,7 +17,7 @@ var (
labels = []string{"template_name", "preset_name", "organization_name"}
createdPrebuildsDesc = prometheus.NewDesc(
"coderd_prebuilt_workspaces_created_total",
- "Total number of prebuilt workspaces that have been created to meet the desired instance count of each " +
+ "Total number of prebuilt workspaces that have been created to meet the desired instance count of each "+
"template preset.",
labels,
nil,
@@ -50,7 +50,7 @@ var (
)
eligiblePrebuildsDesc = prometheus.NewDesc(
"coderd_prebuilt_workspaces_eligible",
- "Current number of prebuilt workspaces that are eligible to be claimed by users. These are workspaces that " +
+ "Current number of prebuilt workspaces that are eligible to be claimed by users. These are workspaces that "+
"have completed their build process with their agent reporting 'ready' status.",
labels,
nil,
--- a PPN by Garber Painting Akron. With Image Size Reduction included!Fetched URL: http://github.com/coder/coder/pull/17547.patch
Alternative Proxies:
Alternative Proxy
pFad Proxy
pFad v3 Proxy
pFad v4 Proxy