Skip to content

Commit 9c3484a

Browse files
fix: reimplement reporting of preset-hard-limited metric
1 parent e8c75eb commit 9c3484a

File tree

4 files changed

+64
-45
lines changed

4 files changed

+64
-45
lines changed

coderd/prebuilds/global_snapshot.go

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@ import (
1212

1313
// GlobalSnapshot represents a full point-in-time snapshot of state relating to prebuilds across all templates.
1414
type GlobalSnapshot struct {
15-
Presets []database.GetTemplatePresetsWithPrebuildsRow
16-
RunningPrebuilds []database.GetRunningPrebuiltWorkspacesRow
17-
PrebuildsInProgress []database.CountInProgressPrebuildsRow
18-
Backoffs []database.GetPresetsBackoffRow
19-
HardLimitedPresets []database.GetPresetsAtFailureLimitRow
15+
Presets []database.GetTemplatePresetsWithPrebuildsRow
16+
RunningPrebuilds []database.GetRunningPrebuiltWorkspacesRow
17+
PrebuildsInProgress []database.CountInProgressPrebuildsRow
18+
Backoffs []database.GetPresetsBackoffRow
19+
HardLimitedPresetsMap map[uuid.UUID]database.GetPresetsAtFailureLimitRow
2020
}
2121

2222
func NewGlobalSnapshot(
@@ -26,12 +26,17 @@ func NewGlobalSnapshot(
2626
backoffs []database.GetPresetsBackoffRow,
2727
hardLimitedPresets []database.GetPresetsAtFailureLimitRow,
2828
) GlobalSnapshot {
29+
hardLimitedPresetsMap := make(map[uuid.UUID]database.GetPresetsAtFailureLimitRow, len(hardLimitedPresets))
30+
for _, preset := range hardLimitedPresets {
31+
hardLimitedPresetsMap[preset.PresetID] = preset
32+
}
33+
2934
return GlobalSnapshot{
30-
Presets: presets,
31-
RunningPrebuilds: runningPrebuilds,
32-
PrebuildsInProgress: prebuildsInProgress,
33-
Backoffs: backoffs,
34-
HardLimitedPresets: hardLimitedPresets,
35+
Presets: presets,
36+
RunningPrebuilds: runningPrebuilds,
37+
PrebuildsInProgress: prebuildsInProgress,
38+
Backoffs: backoffs,
39+
HardLimitedPresetsMap: hardLimitedPresetsMap,
3540
}
3641
}
3742

@@ -66,9 +71,7 @@ func (s GlobalSnapshot) FilterByPreset(presetID uuid.UUID) (*PresetSnapshot, err
6671
backoffPtr = &backoff
6772
}
6873

69-
_, isHardLimited := slice.Find(s.HardLimitedPresets, func(row database.GetPresetsAtFailureLimitRow) bool {
70-
return row.PresetID == preset.ID
71-
})
74+
_, isHardLimited := s.HardLimitedPresetsMap[preset.ID]
7275

7376
return &PresetSnapshot{
7477
Preset: preset,
@@ -80,6 +83,12 @@ func (s GlobalSnapshot) FilterByPreset(presetID uuid.UUID) (*PresetSnapshot, err
8083
}, nil
8184
}
8285

86+
func (s GlobalSnapshot) IsHardLimited(presetID uuid.UUID) bool {
87+
_, isHardLimited := s.HardLimitedPresetsMap[presetID]
88+
89+
return isHardLimited
90+
}
91+
8392
// filterExpiredWorkspaces splits running workspaces into expired and non-expired
8493
// based on the preset's TTL.
8594
// If TTL is missing or zero, all workspaces are considered non-expired.

enterprise/coderd/prebuilds/metricscollector.go

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -280,16 +280,9 @@ func (k hardLimitedPresetKey) String() string {
280280
return fmt.Sprintf("%s:%s:%s", k.orgName, k.templateName, k.presetName)
281281
}
282282

283-
// nolint:revive // isHardLimited determines if the preset should be reported as hard-limited in Prometheus.
284-
func (mc *MetricsCollector) trackHardLimitedStatus(orgName, templateName, presetName string, isHardLimited bool) {
283+
func (mc *MetricsCollector) registerHardLimitedPresets(isPresetHardLimited map[hardLimitedPresetKey]bool) {
285284
mc.isPresetHardLimitedMu.Lock()
286285
defer mc.isPresetHardLimitedMu.Unlock()
287286

288-
key := hardLimitedPresetKey{orgName: orgName, templateName: templateName, presetName: presetName}
289-
290-
if isHardLimited {
291-
mc.isPresetHardLimited[key] = true
292-
} else {
293-
delete(mc.isPresetHardLimited, key)
294-
}
287+
mc.isPresetHardLimited = isPresetHardLimited
295288
}

enterprise/coderd/prebuilds/reconcile.go

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,9 @@ func (c *StoreReconciler) ReconcileAll(ctx context.Context) error {
256256
if err != nil {
257257
return xerrors.Errorf("determine current snapshot: %w", err)
258258
}
259+
260+
c.reportHardLimitedPresets(snapshot)
261+
259262
if len(snapshot.Presets) == 0 {
260263
logger.Debug(ctx, "no templates found with prebuilds configured")
261264
return nil
@@ -296,6 +299,41 @@ func (c *StoreReconciler) ReconcileAll(ctx context.Context) error {
296299
return err
297300
}
298301

302+
func (c *StoreReconciler) reportHardLimitedPresets(snapshot *prebuilds.GlobalSnapshot) {
303+
// presetsMap is a map from key (orgName:templateName:presetName) to list of corresponding presets.
304+
// Multiple versions of a preset can exist with the same orgName, templateName, and presetName,
305+
// because templates can have multiple versions — or deleted templates can share the same name.
306+
presetsMap := make(map[hardLimitedPresetKey][]database.GetTemplatePresetsWithPrebuildsRow)
307+
for _, preset := range snapshot.Presets {
308+
key := hardLimitedPresetKey{
309+
orgName: preset.OrganizationName,
310+
templateName: preset.TemplateName,
311+
presetName: preset.Name,
312+
}
313+
314+
presetsMap[key] = append(presetsMap[key], preset)
315+
}
316+
317+
// Report a preset as hard-limited only if all the following conditions are met:
318+
// - The preset is marked as hard-limited
319+
// - The preset is using the active version of its template, and the template has not been deleted
320+
//
321+
// The second condition is important because a hard-limited preset that has become outdated is no longer relevant.
322+
// Its associated prebuilt workspaces were likely deleted, and it's not meaningful to continue reporting it
323+
// as hard-limited to the admin.
324+
isPresetHardLimited := make(map[hardLimitedPresetKey]bool)
325+
for key, presets := range presetsMap {
326+
for _, preset := range presets {
327+
if preset.UsingActiveVersion && !preset.Deleted && snapshot.IsHardLimited(preset.ID) {
328+
isPresetHardLimited[key] = true
329+
break
330+
}
331+
}
332+
}
333+
334+
c.metrics.registerHardLimitedPresets(isPresetHardLimited)
335+
}
336+
299337
// SnapshotState captures the current state of all prebuilds across templates.
300338
func (c *StoreReconciler) SnapshotState(ctx context.Context, store database.Store) (*prebuilds.GlobalSnapshot, error) {
301339
if err := ctx.Err(); err != nil {
@@ -361,24 +399,6 @@ func (c *StoreReconciler) ReconcilePreset(ctx context.Context, ps prebuilds.Pres
361399
slog.F("preset_name", ps.Preset.Name),
362400
)
363401

364-
// Report a metric only if the preset uses the latest version of the template and the template is not deleted.
365-
// This avoids conflicts between metrics from old and new template versions.
366-
//
367-
// NOTE: Multiple versions of a preset can exist with the same orgName, templateName, and presetName,
368-
// because templates can have multiple versions — or deleted templates can share the same name.
369-
//
370-
// The safest approach is to report the metric only for the latest version of the preset.
371-
// When a new template version is released, the metric for the new preset should overwrite
372-
// the old value in Prometheus.
373-
//
374-
// However, there’s one edge case: if an admin creates a template, it becomes hard-limited,
375-
// then deletes the template and never creates another with the same name,
376-
// the old preset will continue to be reported as hard-limited —
377-
// even though it’s deleted. This will persist until `coderd` is restarted.
378-
if ps.Preset.UsingActiveVersion && !ps.Preset.Deleted {
379-
c.metrics.trackHardLimitedStatus(ps.Preset.OrganizationName, ps.Preset.TemplateName, ps.Preset.Name, ps.IsHardLimited)
380-
}
381-
382402
// If the preset reached the hard failure limit for the first time during this iteration:
383403
// - Mark it as hard-limited in the database
384404
// - Send notifications to template admins

enterprise/coderd/prebuilds/reconcile_test.go

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1034,18 +1034,15 @@ func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) {
10341034
require.Equal(t, database.WorkspaceTransitionDelete, workspaceBuilds[0].Transition)
10351035
require.Equal(t, database.WorkspaceTransitionStart, workspaceBuilds[1].Transition)
10361036

1037-
// The metric is still set to 1, even though the preset has become outdated.
1038-
// This happens because the old value hasn't been overwritten by a newer preset yet.
1037+
// Metric is deleted after preset became outdated.
10391038
mf, err = registry.Gather()
10401039
require.NoError(t, err)
10411040
metric = findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{
10421041
"template_name": template.Name,
10431042
"preset_name": preset.Name,
10441043
"org_name": org.Name,
10451044
})
1046-
require.NotNil(t, metric)
1047-
require.NotNil(t, metric.GetGauge())
1048-
require.EqualValues(t, 1, metric.GetGauge().GetValue())
1045+
require.Nil(t, metric)
10491046
})
10501047
}
10511048
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy