diff --git a/coderd/prebuilds/api.go b/coderd/prebuilds/api.go index ba174d318d5fa..2342da5d62c07 100644 --- a/coderd/prebuilds/api.go +++ b/coderd/prebuilds/api.go @@ -5,6 +5,8 @@ import ( "github.com/google/uuid" "golang.org/x/xerrors" + + "github.com/coder/coder/v2/coderd/database" ) var ErrNoClaimablePrebuiltWorkspaces = xerrors.New("no claimable prebuilt workspaces found") @@ -25,12 +27,23 @@ type ReconciliationOrchestrator interface { } type Reconciler interface { + StateSnapshotter + // ReconcileAll orchestrates the reconciliation of all prebuilds across all templates. // It takes a global snapshot of the system state and then reconciles each preset // in parallel, creating or deleting prebuilds as needed to reach their desired states. ReconcileAll(ctx context.Context) error } +// StateSnapshotter defines the operations necessary to capture workspace prebuilds state. +type StateSnapshotter interface { + // SnapshotState captures the current state of all prebuilds across templates. + // It creates a global database snapshot that can be viewed as a collection of PresetSnapshots, + // each representing the state of prebuilds for a specific preset. + // MUST be called inside a repeatable-read transaction. + SnapshotState(ctx context.Context, store database.Store) (*GlobalSnapshot, error) +} + type Claimer interface { Claim(ctx context.Context, userID uuid.UUID, name string, presetID uuid.UUID) (*uuid.UUID, error) Initiator() uuid.UUID diff --git a/enterprise/coderd/coderd.go b/enterprise/coderd/coderd.go index 1f468997ac220..ca3531b60db78 100644 --- a/enterprise/coderd/coderd.go +++ b/enterprise/coderd/coderd.go @@ -1165,6 +1165,6 @@ func (api *API) setupPrebuilds(featureEnabled bool) (agplprebuilds.Reconciliatio } reconciler := prebuilds.NewStoreReconciler(api.Database, api.Pubsub, api.DeploymentValues.Prebuilds, - api.Logger.Named("prebuilds"), quartz.NewReal()) + api.Logger.Named("prebuilds"), quartz.NewReal(), api.PrometheusRegistry) return reconciler, prebuilds.EnterpriseClaimer{} } diff --git a/enterprise/coderd/prebuilds/claim_test.go b/enterprise/coderd/prebuilds/claim_test.go index 4f398724b8265..1573aab9387f1 100644 --- a/enterprise/coderd/prebuilds/claim_test.go +++ b/enterprise/coderd/prebuilds/claim_test.go @@ -10,6 +10,7 @@ import ( "time" "github.com/google/uuid" + "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/require" "golang.org/x/xerrors" @@ -142,7 +143,7 @@ func TestClaimPrebuild(t *testing.T) { EntitlementsUpdateInterval: time.Second, }) - reconciler := prebuilds.NewStoreReconciler(spy, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t)) + reconciler := prebuilds.NewStoreReconciler(spy, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t), prometheus.NewRegistry()) var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer(spy) api.AGPL.PrebuildsClaimer.Store(&claimer) @@ -419,7 +420,7 @@ func TestClaimPrebuild_CheckDifferentErrors(t *testing.T) { EntitlementsUpdateInterval: time.Second, }) - reconciler := prebuilds.NewStoreReconciler(errorStore, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t)) + reconciler := prebuilds.NewStoreReconciler(errorStore, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t), api.PrometheusRegistry) var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer(errorStore) api.AGPL.PrebuildsClaimer.Store(&claimer) diff --git a/enterprise/coderd/prebuilds/metricscollector.go b/enterprise/coderd/prebuilds/metricscollector.go new file mode 100644 index 0000000000000..7b55227effffa --- /dev/null +++ b/enterprise/coderd/prebuilds/metricscollector.go @@ -0,0 +1,123 @@ +package prebuilds + +import ( + "context" + "time" + + "cdr.dev/slog" + + "github.com/prometheus/client_golang/prometheus" + + "github.com/coder/coder/v2/coderd/database" + "github.com/coder/coder/v2/coderd/database/dbauthz" + "github.com/coder/coder/v2/coderd/prebuilds" +) + +var ( + labels = []string{"template_name", "preset_name", "organization_name"} + createdPrebuildsDesc = prometheus.NewDesc( + "coderd_prebuilt_workspaces_created_total", + "Total number of prebuilt workspaces that have been created to meet the desired instance count of each "+ + "template preset.", + labels, + nil, + ) + failedPrebuildsDesc = prometheus.NewDesc( + "coderd_prebuilt_workspaces_failed_total", + "Total number of prebuilt workspaces that failed to build.", + labels, + nil, + ) + claimedPrebuildsDesc = prometheus.NewDesc( + "coderd_prebuilt_workspaces_claimed_total", + "Total number of prebuilt workspaces which were claimed by users. Claiming refers to creating a workspace "+ + "with a preset selected for which eligible prebuilt workspaces are available and one is reassigned to a user.", + labels, + nil, + ) + desiredPrebuildsDesc = prometheus.NewDesc( + "coderd_prebuilt_workspaces_desired", + "Target number of prebuilt workspaces that should be available for each template preset.", + labels, + nil, + ) + runningPrebuildsDesc = prometheus.NewDesc( + "coderd_prebuilt_workspaces_running", + "Current number of prebuilt workspaces that are in a running state. These workspaces have started "+ + "successfully but may not yet be claimable by users (see coderd_prebuilt_workspaces_eligible).", + labels, + nil, + ) + eligiblePrebuildsDesc = prometheus.NewDesc( + "coderd_prebuilt_workspaces_eligible", + "Current number of prebuilt workspaces that are eligible to be claimed by users. These are workspaces that "+ + "have completed their build process with their agent reporting 'ready' status.", + labels, + nil, + ) +) + +type MetricsCollector struct { + database database.Store + logger slog.Logger + snapshotter prebuilds.StateSnapshotter +} + +var _ prometheus.Collector = new(MetricsCollector) + +func NewMetricsCollector(db database.Store, logger slog.Logger, snapshotter prebuilds.StateSnapshotter) *MetricsCollector { + return &MetricsCollector{ + database: db, + logger: logger.Named("prebuilds_metrics_collector"), + snapshotter: snapshotter, + } +} + +func (*MetricsCollector) Describe(descCh chan<- *prometheus.Desc) { + descCh <- createdPrebuildsDesc + descCh <- failedPrebuildsDesc + descCh <- claimedPrebuildsDesc + descCh <- desiredPrebuildsDesc + descCh <- runningPrebuildsDesc + descCh <- eligiblePrebuildsDesc +} + +func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) { + // nolint:gocritic // We need to set an authz context to read metrics from the db. + ctx, cancel := context.WithTimeout(dbauthz.AsPrebuildsOrchestrator(context.Background()), 10*time.Second) + defer cancel() + prebuildMetrics, err := mc.database.GetPrebuildMetrics(ctx) + if err != nil { + mc.logger.Error(ctx, "failed to get prebuild metrics", slog.Error(err)) + return + } + + for _, metric := range prebuildMetrics { + metricsCh <- prometheus.MustNewConstMetric(createdPrebuildsDesc, prometheus.CounterValue, float64(metric.CreatedCount), metric.TemplateName, metric.PresetName, metric.OrganizationName) + metricsCh <- prometheus.MustNewConstMetric(failedPrebuildsDesc, prometheus.CounterValue, float64(metric.FailedCount), metric.TemplateName, metric.PresetName, metric.OrganizationName) + metricsCh <- prometheus.MustNewConstMetric(claimedPrebuildsDesc, prometheus.CounterValue, float64(metric.ClaimedCount), metric.TemplateName, metric.PresetName, metric.OrganizationName) + } + + snapshot, err := mc.snapshotter.SnapshotState(ctx, mc.database) + if err != nil { + mc.logger.Error(ctx, "failed to get latest prebuild state", slog.Error(err)) + return + } + + for _, preset := range snapshot.Presets { + if !preset.UsingActiveVersion { + continue + } + + presetSnapshot, err := snapshot.FilterByPreset(preset.ID) + if err != nil { + mc.logger.Error(ctx, "failed to filter by preset", slog.Error(err)) + continue + } + state := presetSnapshot.CalculateState() + + metricsCh <- prometheus.MustNewConstMetric(desiredPrebuildsDesc, prometheus.GaugeValue, float64(state.Desired), preset.TemplateName, preset.Name, preset.OrganizationName) + metricsCh <- prometheus.MustNewConstMetric(runningPrebuildsDesc, prometheus.GaugeValue, float64(state.Actual), preset.TemplateName, preset.Name, preset.OrganizationName) + metricsCh <- prometheus.MustNewConstMetric(eligiblePrebuildsDesc, prometheus.GaugeValue, float64(state.Eligible), preset.TemplateName, preset.Name, preset.OrganizationName) + } +} diff --git a/enterprise/coderd/prebuilds/metricscollector_test.go b/enterprise/coderd/prebuilds/metricscollector_test.go new file mode 100644 index 0000000000000..859509ced6635 --- /dev/null +++ b/enterprise/coderd/prebuilds/metricscollector_test.go @@ -0,0 +1,331 @@ +package prebuilds_test + +import ( + "fmt" + "slices" + "testing" + + "github.com/google/uuid" + "github.com/stretchr/testify/require" + "tailscale.com/types/ptr" + + "github.com/prometheus/client_golang/prometheus" + prometheus_client "github.com/prometheus/client_model/go" + + "cdr.dev/slog/sloggers/slogtest" + "github.com/coder/quartz" + + "github.com/coder/coder/v2/coderd/database" + "github.com/coder/coder/v2/coderd/database/dbgen" + "github.com/coder/coder/v2/coderd/database/dbtestutil" + agplprebuilds "github.com/coder/coder/v2/coderd/prebuilds" + "github.com/coder/coder/v2/codersdk" + "github.com/coder/coder/v2/enterprise/coderd/prebuilds" + "github.com/coder/coder/v2/testutil" +) + +func TestMetricsCollector(t *testing.T) { + t.Parallel() + + if !dbtestutil.WillUsePostgres() { + t.Skip("this test requires postgres") + } + + type metricCheck struct { + name string + value *float64 + isCounter bool + } + + type testCase struct { + name string + transitions []database.WorkspaceTransition + jobStatuses []database.ProvisionerJobStatus + initiatorIDs []uuid.UUID + ownerIDs []uuid.UUID + metrics []metricCheck + templateDeleted []bool + eligible []bool + } + + tests := []testCase{ + { + name: "prebuild provisioned but not completed", + transitions: allTransitions, + jobStatuses: allJobStatusesExcept(database.ProvisionerJobStatusPending, database.ProvisionerJobStatusRunning, database.ProvisionerJobStatusCanceling), + initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID}, + ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID}, + metrics: []metricCheck{ + {"coderd_prebuilt_workspaces_created_total", ptr.To(1.0), true}, + {"coderd_prebuilt_workspaces_claimed_total", ptr.To(0.0), true}, + {"coderd_prebuilt_workspaces_failed_total", ptr.To(0.0), true}, + {"coderd_prebuilt_workspaces_desired", ptr.To(1.0), false}, + {"coderd_prebuilt_workspaces_running", ptr.To(0.0), false}, + {"coderd_prebuilt_workspaces_eligible", ptr.To(0.0), false}, + }, + templateDeleted: []bool{false}, + eligible: []bool{false}, + }, + { + name: "prebuild running", + transitions: []database.WorkspaceTransition{database.WorkspaceTransitionStart}, + jobStatuses: []database.ProvisionerJobStatus{database.ProvisionerJobStatusSucceeded}, + initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID}, + ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID}, + metrics: []metricCheck{ + {"coderd_prebuilt_workspaces_created_total", ptr.To(1.0), true}, + {"coderd_prebuilt_workspaces_claimed_total", ptr.To(0.0), true}, + {"coderd_prebuilt_workspaces_failed_total", ptr.To(0.0), true}, + {"coderd_prebuilt_workspaces_desired", ptr.To(1.0), false}, + {"coderd_prebuilt_workspaces_running", ptr.To(1.0), false}, + {"coderd_prebuilt_workspaces_eligible", ptr.To(0.0), false}, + }, + templateDeleted: []bool{false}, + eligible: []bool{false}, + }, + { + name: "prebuild failed", + transitions: allTransitions, + jobStatuses: []database.ProvisionerJobStatus{database.ProvisionerJobStatusFailed}, + initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID}, + ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID, uuid.New()}, + metrics: []metricCheck{ + {"coderd_prebuilt_workspaces_created_total", ptr.To(1.0), true}, + {"coderd_prebuilt_workspaces_failed_total", ptr.To(1.0), true}, + {"coderd_prebuilt_workspaces_desired", ptr.To(1.0), false}, + {"coderd_prebuilt_workspaces_running", ptr.To(0.0), false}, + {"coderd_prebuilt_workspaces_eligible", ptr.To(0.0), false}, + }, + templateDeleted: []bool{false}, + eligible: []bool{false}, + }, + { + name: "prebuild eligible", + transitions: []database.WorkspaceTransition{database.WorkspaceTransitionStart}, + jobStatuses: []database.ProvisionerJobStatus{database.ProvisionerJobStatusSucceeded}, + initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID}, + ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID}, + metrics: []metricCheck{ + {"coderd_prebuilt_workspaces_created_total", ptr.To(1.0), true}, + {"coderd_prebuilt_workspaces_claimed_total", ptr.To(0.0), true}, + {"coderd_prebuilt_workspaces_failed_total", ptr.To(0.0), true}, + {"coderd_prebuilt_workspaces_desired", ptr.To(1.0), false}, + {"coderd_prebuilt_workspaces_running", ptr.To(1.0), false}, + {"coderd_prebuilt_workspaces_eligible", ptr.To(1.0), false}, + }, + templateDeleted: []bool{false}, + eligible: []bool{true}, + }, + { + name: "prebuild ineligible", + transitions: allTransitions, + jobStatuses: allJobStatusesExcept(database.ProvisionerJobStatusSucceeded), + initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID}, + ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID}, + metrics: []metricCheck{ + {"coderd_prebuilt_workspaces_created_total", ptr.To(1.0), true}, + {"coderd_prebuilt_workspaces_claimed_total", ptr.To(0.0), true}, + {"coderd_prebuilt_workspaces_failed_total", ptr.To(0.0), true}, + {"coderd_prebuilt_workspaces_desired", ptr.To(1.0), false}, + {"coderd_prebuilt_workspaces_running", ptr.To(1.0), false}, + {"coderd_prebuilt_workspaces_eligible", ptr.To(0.0), false}, + }, + templateDeleted: []bool{false}, + eligible: []bool{false}, + }, + { + name: "prebuild claimed", + transitions: allTransitions, + jobStatuses: allJobStatuses, + initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID}, + ownerIDs: []uuid.UUID{uuid.New()}, + metrics: []metricCheck{ + {"coderd_prebuilt_workspaces_created_total", ptr.To(1.0), true}, + {"coderd_prebuilt_workspaces_claimed_total", ptr.To(1.0), true}, + {"coderd_prebuilt_workspaces_desired", ptr.To(1.0), false}, + {"coderd_prebuilt_workspaces_running", ptr.To(0.0), false}, + {"coderd_prebuilt_workspaces_eligible", ptr.To(0.0), false}, + }, + templateDeleted: []bool{false}, + eligible: []bool{false}, + }, + { + name: "workspaces that were not created by the prebuilds user are not counted", + transitions: allTransitions, + jobStatuses: allJobStatuses, + initiatorIDs: []uuid.UUID{uuid.New()}, + ownerIDs: []uuid.UUID{uuid.New()}, + metrics: []metricCheck{ + {"coderd_prebuilt_workspaces_desired", ptr.To(1.0), false}, + {"coderd_prebuilt_workspaces_running", ptr.To(0.0), false}, + {"coderd_prebuilt_workspaces_eligible", ptr.To(0.0), false}, + }, + templateDeleted: []bool{false}, + eligible: []bool{false}, + }, + { + name: "deleted templates never desire prebuilds", + transitions: allTransitions, + jobStatuses: allJobStatuses, + initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID}, + ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID, uuid.New()}, + metrics: []metricCheck{ + {"coderd_prebuilt_workspaces_desired", ptr.To(0.0), false}, + }, + templateDeleted: []bool{true}, + eligible: []bool{false}, + }, + { + name: "running prebuilds for deleted templates are still counted, so that they can be deleted", + transitions: []database.WorkspaceTransition{database.WorkspaceTransitionStart}, + jobStatuses: []database.ProvisionerJobStatus{database.ProvisionerJobStatusSucceeded}, + initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID}, + ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID}, + metrics: []metricCheck{ + {"coderd_prebuilt_workspaces_running", ptr.To(1.0), false}, + {"coderd_prebuilt_workspaces_eligible", ptr.To(0.0), false}, + }, + templateDeleted: []bool{true}, + eligible: []bool{false}, + }, + } + for _, test := range tests { + test := test // capture for parallel + for _, transition := range test.transitions { + transition := transition // capture for parallel + for _, jobStatus := range test.jobStatuses { + jobStatus := jobStatus // capture for parallel + for _, initiatorID := range test.initiatorIDs { + initiatorID := initiatorID // capture for parallel + for _, ownerID := range test.ownerIDs { + ownerID := ownerID // capture for parallel + for _, templateDeleted := range test.templateDeleted { + templateDeleted := templateDeleted // capture for parallel + for _, eligible := range test.eligible { + eligible := eligible // capture for parallel + t.Run(fmt.Sprintf("%v/transition:%s/jobStatus:%s", test.name, transition, jobStatus), func(t *testing.T) { + t.Parallel() + + logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}) + t.Cleanup(func() { + if t.Failed() { + t.Logf("failed to run test: %s", test.name) + t.Logf("transition: %s", transition) + t.Logf("jobStatus: %s", jobStatus) + t.Logf("initiatorID: %s", initiatorID) + t.Logf("ownerID: %s", ownerID) + t.Logf("templateDeleted: %t", templateDeleted) + } + }) + clock := quartz.NewMock(t) + db, pubsub := dbtestutil.NewDB(t) + reconciler := prebuilds.NewStoreReconciler(db, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t), prometheus.NewRegistry()) + ctx := testutil.Context(t, testutil.WaitLong) + + createdUsers := []uuid.UUID{agplprebuilds.SystemUserID} + for _, user := range slices.Concat(test.ownerIDs, test.initiatorIDs) { + if !slices.Contains(createdUsers, user) { + dbgen.User(t, db, database.User{ + ID: user, + }) + createdUsers = append(createdUsers, user) + } + } + + collector := prebuilds.NewMetricsCollector(db, logger, reconciler) + registry := prometheus.NewPedanticRegistry() + registry.Register(collector) + + numTemplates := 2 + for i := 0; i < numTemplates; i++ { + org, template := setupTestDBTemplate(t, db, ownerID, templateDeleted) + templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, pubsub, org.ID, ownerID, template.ID) + preset := setupTestDBPreset(t, db, templateVersionID, 1, uuid.New().String()) + workspace := setupTestDBWorkspace( + t, clock, db, pubsub, + transition, jobStatus, org.ID, preset, template.ID, templateVersionID, initiatorID, ownerID, + ) + setupTestDBWorkspaceAgent(t, db, workspace.ID, eligible) + } + + metricsFamilies, err := registry.Gather() + require.NoError(t, err) + + templates, err := db.GetTemplates(ctx) + require.NoError(t, err) + require.Equal(t, numTemplates, len(templates)) + + for _, template := range templates { + org, err := db.GetOrganizationByID(ctx, template.OrganizationID) + require.NoError(t, err) + templateVersions, err := db.GetTemplateVersionsByTemplateID(ctx, database.GetTemplateVersionsByTemplateIDParams{ + TemplateID: template.ID, + }) + require.NoError(t, err) + require.Equal(t, 1, len(templateVersions)) + + presets, err := db.GetPresetsByTemplateVersionID(ctx, templateVersions[0].ID) + require.NoError(t, err) + require.Equal(t, 1, len(presets)) + + for _, preset := range presets { + preset := preset // capture for parallel + labels := map[string]string{ + "template_name": template.Name, + "preset_name": preset.Name, + "organization_name": org.Name, + } + + for _, check := range test.metrics { + metric := findMetric(metricsFamilies, check.name, labels) + if check.value == nil { + continue + } + + require.NotNil(t, metric, "metric %s should exist", check.name) + + if check.isCounter { + require.Equal(t, *check.value, metric.GetCounter().GetValue(), "counter %s value mismatch", check.name) + } else { + require.Equal(t, *check.value, metric.GetGauge().GetValue(), "gauge %s value mismatch", check.name) + } + } + } + } + }) + } + } + } + } + } + } + } +} + +func findMetric(metricsFamilies []*prometheus_client.MetricFamily, name string, labels map[string]string) *prometheus_client.Metric { + for _, metricFamily := range metricsFamilies { + if metricFamily.GetName() != name { + continue + } + + for _, metric := range metricFamily.GetMetric() { + labelPairs := metric.GetLabel() + + // Convert label pairs to map for easier lookup + metricLabels := make(map[string]string, len(labelPairs)) + for _, label := range labelPairs { + metricLabels[label.GetName()] = label.GetValue() + } + + // Check if all requested labels match + for wantName, wantValue := range labels { + if metricLabels[wantName] != wantValue { + continue + } + } + + return metric + } + } + return nil +} diff --git a/enterprise/coderd/prebuilds/reconcile.go b/enterprise/coderd/prebuilds/reconcile.go index 081b4223a93c4..134365b65766b 100644 --- a/enterprise/coderd/prebuilds/reconcile.go +++ b/enterprise/coderd/prebuilds/reconcile.go @@ -9,6 +9,7 @@ import ( "time" "github.com/hashicorp/go-multierror" + "github.com/prometheus/client_golang/prometheus" "github.com/coder/quartz" @@ -31,11 +32,13 @@ import ( ) type StoreReconciler struct { - store database.Store - cfg codersdk.PrebuildsConfig - pubsub pubsub.Pubsub - logger slog.Logger - clock quartz.Clock + store database.Store + cfg codersdk.PrebuildsConfig + pubsub pubsub.Pubsub + logger slog.Logger + clock quartz.Clock + registerer prometheus.Registerer + metrics *MetricsCollector cancelFn context.CancelCauseFunc running atomic.Bool @@ -45,21 +48,30 @@ type StoreReconciler struct { var _ prebuilds.ReconciliationOrchestrator = &StoreReconciler{} -func NewStoreReconciler( - store database.Store, +func NewStoreReconciler(store database.Store, ps pubsub.Pubsub, cfg codersdk.PrebuildsConfig, logger slog.Logger, clock quartz.Clock, + registerer prometheus.Registerer, ) *StoreReconciler { - return &StoreReconciler{ - store: store, - pubsub: ps, - logger: logger, - cfg: cfg, - clock: clock, - done: make(chan struct{}, 1), + reconciler := &StoreReconciler{ + store: store, + pubsub: ps, + logger: logger, + cfg: cfg, + clock: clock, + registerer: registerer, + done: make(chan struct{}, 1), } + + reconciler.metrics = NewMetricsCollector(store, logger, reconciler) + if err := registerer.Register(reconciler.metrics); err != nil { + // If the registerer fails to register the metrics collector, it's not fatal. + logger.Error(context.Background(), "failed to register prometheus metrics", slog.Error(err)) + } + + return reconciler } func (c *StoreReconciler) Run(ctx context.Context) { @@ -128,6 +140,17 @@ func (c *StoreReconciler) Stop(ctx context.Context, cause error) { return } + // Unregister the metrics collector. + if c.metrics != nil && c.registerer != nil { + if !c.registerer.Unregister(c.metrics) { + // The API doesn't allow us to know why the de-registration failed, but it's not very consequential. + // The only time this would be an issue is if the premium license is removed, leading to the feature being + // disabled (and consequently this Stop method being called), and then adding a new license which enables the + // feature again. If the metrics cannot be registered, it'll log an error from NewStoreReconciler. + c.logger.Warn(context.Background(), "failed to unregister metrics collector") + } + } + // If the reconciler is not running, there's nothing else to do. if !c.running.Load() { return diff --git a/enterprise/coderd/prebuilds/reconcile_test.go b/enterprise/coderd/prebuilds/reconcile_test.go index 5c1ffe993ec42..9783b215f185b 100644 --- a/enterprise/coderd/prebuilds/reconcile_test.go +++ b/enterprise/coderd/prebuilds/reconcile_test.go @@ -8,6 +8,9 @@ import ( "testing" "time" + "github.com/prometheus/client_golang/prometheus" + + "github.com/coder/coder/v2/coderd/database/dbtime" "github.com/coder/coder/v2/coderd/util/slice" "github.com/google/uuid" @@ -45,7 +48,7 @@ func TestNoReconciliationActionsIfNoPresets(t *testing.T) { ReconciliationInterval: serpent.Duration(testutil.WaitLong), } logger := testutil.Logger(t) - controller := prebuilds.NewStoreReconciler(db, ps, cfg, logger, quartz.NewMock(t)) + controller := prebuilds.NewStoreReconciler(db, ps, cfg, logger, quartz.NewMock(t), prometheus.NewRegistry()) // given a template version with no presets org := dbgen.Organization(t, db, database.Organization{}) @@ -90,7 +93,7 @@ func TestNoReconciliationActionsIfNoPrebuilds(t *testing.T) { ReconciliationInterval: serpent.Duration(testutil.WaitLong), } logger := testutil.Logger(t) - controller := prebuilds.NewStoreReconciler(db, ps, cfg, logger, quartz.NewMock(t)) + controller := prebuilds.NewStoreReconciler(db, ps, cfg, logger, quartz.NewMock(t), prometheus.NewRegistry()) // given there are presets, but no prebuilds org := dbgen.Organization(t, db, database.Organization{}) @@ -317,7 +320,7 @@ func TestPrebuildReconciliation(t *testing.T) { t, &slogtest.Options{IgnoreErrors: true}, ).Leveled(slog.LevelDebug) db, pubSub := dbtestutil.NewDB(t) - controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, quartz.NewMock(t)) + controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, quartz.NewMock(t), prometheus.NewRegistry()) ownerID := uuid.New() dbgen.User(t, db, database.User{ @@ -419,7 +422,7 @@ func TestMultiplePresetsPerTemplateVersion(t *testing.T) { t, &slogtest.Options{IgnoreErrors: true}, ).Leveled(slog.LevelDebug) db, pubSub := dbtestutil.NewDB(t) - controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, quartz.NewMock(t)) + controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, quartz.NewMock(t), prometheus.NewRegistry()) ownerID := uuid.New() dbgen.User(t, db, database.User{ @@ -503,7 +506,7 @@ func TestInvalidPreset(t *testing.T) { t, &slogtest.Options{IgnoreErrors: true}, ).Leveled(slog.LevelDebug) db, pubSub := dbtestutil.NewDB(t) - controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, quartz.NewMock(t)) + controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, quartz.NewMock(t), prometheus.NewRegistry()) ownerID := uuid.New() dbgen.User(t, db, database.User{ @@ -575,7 +578,7 @@ func TestRunLoop(t *testing.T) { t, &slogtest.Options{IgnoreErrors: true}, ).Leveled(slog.LevelDebug) db, pubSub := dbtestutil.NewDB(t) - reconciler := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, clock) + reconciler := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, clock, prometheus.NewRegistry()) ownerID := uuid.New() dbgen.User(t, db, database.User{ @@ -705,7 +708,7 @@ func TestFailedBuildBackoff(t *testing.T) { t, &slogtest.Options{IgnoreErrors: true}, ).Leveled(slog.LevelDebug) db, ps := dbtestutil.NewDB(t) - reconciler := prebuilds.NewStoreReconciler(db, ps, cfg, logger, clock) + reconciler := prebuilds.NewStoreReconciler(db, ps, cfg, logger, clock, prometheus.NewRegistry()) // Given: an active template version with presets and prebuilds configured. const desiredInstances = 2 @@ -820,7 +823,7 @@ func TestReconciliationLock(t *testing.T) { codersdk.PrebuildsConfig{}, slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug), quartz.NewMock(t), - ) + prometheus.NewRegistry()) reconciler.WithReconciliationLock(ctx, logger, func(_ context.Context, _ database.Store) error { lockObtained := mutex.TryLock() // As long as the postgres lock is held, this mutex should always be unlocked when we get here. @@ -1009,6 +1012,30 @@ func setupTestDBWorkspace( return workspace } +// nolint:revive // It's a control flag, but this is a test. +func setupTestDBWorkspaceAgent(t *testing.T, db database.Store, workspaceID uuid.UUID, eligible bool) database.WorkspaceAgent { + build, err := db.GetLatestWorkspaceBuildByWorkspaceID(t.Context(), workspaceID) + require.NoError(t, err) + + res := dbgen.WorkspaceResource(t, db, database.WorkspaceResource{JobID: build.JobID}) + agent := dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{ + ResourceID: res.ID, + }) + + // A prebuilt workspace is considered eligible when its agent is in a "ready" lifecycle state. + // i.e. connected to the control plane and all startup scripts have run. + if eligible { + require.NoError(t, db.UpdateWorkspaceAgentLifecycleStateByID(t.Context(), database.UpdateWorkspaceAgentLifecycleStateByIDParams{ + ID: agent.ID, + LifecycleState: database.WorkspaceAgentLifecycleStateReady, + StartedAt: sql.NullTime{Time: dbtime.Now().Add(-time.Minute), Valid: true}, + ReadyAt: sql.NullTime{Time: dbtime.Now(), Valid: true}, + })) + } + + return agent +} + var allTransitions = []database.WorkspaceTransition{ database.WorkspaceTransitionStart, database.WorkspaceTransitionStop, @@ -1024,4 +1051,8 @@ var allJobStatuses = []database.ProvisionerJobStatus{ database.ProvisionerJobStatusCanceling, } -// TODO (sasswart): test mutual exclusion +func allJobStatusesExcept(except ...database.ProvisionerJobStatus) []database.ProvisionerJobStatus { + return slice.Filter(except, func(status database.ProvisionerJobStatus) bool { + return !slice.Contains(allJobStatuses, status) + }) +} pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy