diff --git a/cli/testdata/server-config.yaml.golden b/cli/testdata/server-config.yaml.golden index 9995a7f389130..7403819a2d10b 100644 --- a/cli/testdata/server-config.yaml.golden +++ b/cli/testdata/server-config.yaml.golden @@ -704,3 +704,7 @@ workspace_prebuilds: # backoff. # (default: 1h0m0s, type: duration) reconciliation_backoff_lookback_period: 1h0m0s + # Maximum number of consecutive failed prebuilds before a preset hits the hard + # limit; disabled when set to zero. + # (default: 3, type: int) + failure_hard_limit: 3 diff --git a/coderd/apidoc/docs.go b/coderd/apidoc/docs.go index e98197d3b5bb2..7cee63e183e7e 100644 --- a/coderd/apidoc/docs.go +++ b/coderd/apidoc/docs.go @@ -14326,6 +14326,10 @@ const docTemplate = `{ "codersdk.PrebuildsConfig": { "type": "object", "properties": { + "failure_hard_limit": { + "description": "FailureHardLimit defines the maximum number of consecutive failed prebuild attempts allowed\nbefore a preset is considered to be in a hard limit state. When a preset hits this limit,\nno new prebuilds will be created until the limit is reset.\nFailureHardLimit is disabled when set to zero.", + "type": "integer" + }, "reconciliation_backoff_interval": { "description": "ReconciliationBackoffInterval specifies the amount of time to increase the backoff interval\nwhen errors occur during reconciliation.", "type": "integer" diff --git a/coderd/apidoc/swagger.json b/coderd/apidoc/swagger.json index fa103f55fbe9f..89a582091496f 100644 --- a/coderd/apidoc/swagger.json +++ b/coderd/apidoc/swagger.json @@ -12968,6 +12968,10 @@ "codersdk.PrebuildsConfig": { "type": "object", "properties": { + "failure_hard_limit": { + "description": "FailureHardLimit defines the maximum number of consecutive failed prebuild attempts allowed\nbefore a preset is considered to be in a hard limit state. When a preset hits this limit,\nno new prebuilds will be created until the limit is reset.\nFailureHardLimit is disabled when set to zero.", + "type": "integer" + }, "reconciliation_backoff_interval": { "description": "ReconciliationBackoffInterval specifies the amount of time to increase the backoff interval\nwhen errors occur during reconciliation.", "type": "integer" diff --git a/coderd/database/dbauthz/dbauthz.go b/coderd/database/dbauthz/dbauthz.go index ab3781452dd2d..a210599d17cc4 100644 --- a/coderd/database/dbauthz/dbauthz.go +++ b/coderd/database/dbauthz/dbauthz.go @@ -2226,6 +2226,15 @@ func (q *querier) GetPresetParametersByTemplateVersionID(ctx context.Context, ar return q.db.GetPresetParametersByTemplateVersionID(ctx, args) } +func (q *querier) GetPresetsAtFailureLimit(ctx context.Context, hardLimit int64) ([]database.GetPresetsAtFailureLimitRow, error) { + // GetPresetsAtFailureLimit returns a list of template version presets that have reached the hard failure limit. + // Request the same authorization permissions as GetPresetsBackoff, since the methods are similar. + if err := q.authorizeContext(ctx, policy.ActionViewInsights, rbac.ResourceTemplate.All()); err != nil { + return nil, err + } + return q.db.GetPresetsAtFailureLimit(ctx, hardLimit) +} + func (q *querier) GetPresetsBackoff(ctx context.Context, lookback time.Time) ([]database.GetPresetsBackoffRow, error) { // GetPresetsBackoff returns a list of template version presets along with metadata such as the number of failed prebuilds. if err := q.authorizeContext(ctx, policy.ActionViewInsights, rbac.ResourceTemplate.All()); err != nil { @@ -4201,6 +4210,24 @@ func (q *querier) UpdateOrganizationDeletedByID(ctx context.Context, arg databas return deleteQ(q.log, q.auth, q.db.GetOrganizationByID, deleteF)(ctx, arg.ID) } +func (q *querier) UpdatePresetPrebuildStatus(ctx context.Context, arg database.UpdatePresetPrebuildStatusParams) error { + preset, err := q.db.GetPresetByID(ctx, arg.PresetID) + if err != nil { + return err + } + + object := rbac.ResourceTemplate. + WithID(preset.TemplateID.UUID). + InOrg(preset.OrganizationID) + + err = q.authorizeContext(ctx, policy.ActionUpdate, object) + if err != nil { + return err + } + + return q.db.UpdatePresetPrebuildStatus(ctx, arg) +} + func (q *querier) UpdateProvisionerDaemonLastSeenAt(ctx context.Context, arg database.UpdateProvisionerDaemonLastSeenAtParams) error { if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceProvisionerDaemon); err != nil { return err diff --git a/coderd/database/dbauthz/dbauthz_test.go b/coderd/database/dbauthz/dbauthz_test.go index e8b90afbc396d..703e51d739c47 100644 --- a/coderd/database/dbauthz/dbauthz_test.go +++ b/coderd/database/dbauthz/dbauthz_test.go @@ -4924,6 +4924,11 @@ func (s *MethodTestSuite) TestPrebuilds() { Asserts(rbac.ResourceWorkspace.All(), policy.ActionRead). ErrorsWithInMemDB(dbmem.ErrUnimplemented) })) + s.Run("GetPresetsAtFailureLimit", s.Subtest(func(_ database.Store, check *expects) { + check.Args(int64(0)). + Asserts(rbac.ResourceTemplate.All(), policy.ActionViewInsights). + ErrorsWithInMemDB(dbmem.ErrUnimplemented) + })) s.Run("GetPresetsBackoff", s.Subtest(func(_ database.Store, check *expects) { check.Args(time.Time{}). Asserts(rbac.ResourceTemplate.All(), policy.ActionViewInsights). @@ -4971,8 +4976,34 @@ func (s *MethodTestSuite) TestPrebuilds() { }, InvalidateAfterSecs: preset.InvalidateAfterSecs, OrganizationID: org.ID, + PrebuildStatus: database.PrebuildStatusHealthy, }) })) + s.Run("UpdatePresetPrebuildStatus", s.Subtest(func(db database.Store, check *expects) { + org := dbgen.Organization(s.T(), db, database.Organization{}) + user := dbgen.User(s.T(), db, database.User{}) + template := dbgen.Template(s.T(), db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + }) + templateVersion := dbgen.TemplateVersion(s.T(), db, database.TemplateVersion{ + TemplateID: uuid.NullUUID{ + UUID: template.ID, + Valid: true, + }, + OrganizationID: org.ID, + CreatedBy: user.ID, + }) + preset := dbgen.Preset(s.T(), db, database.InsertPresetParams{ + TemplateVersionID: templateVersion.ID, + }) + req := database.UpdatePresetPrebuildStatusParams{ + PresetID: preset.ID, + Status: database.PrebuildStatusHealthy, + } + check.Args(req). + Asserts(rbac.ResourceTemplate.WithID(template.ID).InOrg(org.ID), policy.ActionUpdate) + })) } func (s *MethodTestSuite) TestOAuth2ProviderApps() { diff --git a/coderd/database/dbmem/dbmem.go b/coderd/database/dbmem/dbmem.go index 75c56b9c2324d..1a1455d83045b 100644 --- a/coderd/database/dbmem/dbmem.go +++ b/coderd/database/dbmem/dbmem.go @@ -4287,6 +4287,7 @@ func (q *FakeQuerier) GetPresetByID(ctx context.Context, presetID uuid.UUID) (da CreatedAt: preset.CreatedAt, DesiredInstances: preset.DesiredInstances, InvalidateAfterSecs: preset.InvalidateAfterSecs, + PrebuildStatus: preset.PrebuildStatus, TemplateID: tv.TemplateID, OrganizationID: tv.OrganizationID, }, nil @@ -4352,6 +4353,10 @@ func (q *FakeQuerier) GetPresetParametersByTemplateVersionID(_ context.Context, return parameters, nil } +func (q *FakeQuerier) GetPresetsAtFailureLimit(ctx context.Context, hardLimit int64) ([]database.GetPresetsAtFailureLimitRow, error) { + return nil, ErrUnimplemented +} + func (*FakeQuerier) GetPresetsBackoff(_ context.Context, _ time.Time) ([]database.GetPresetsBackoffRow, error) { return nil, ErrUnimplemented } @@ -9089,6 +9094,7 @@ func (q *FakeQuerier) InsertPreset(_ context.Context, arg database.InsertPresetP Int32: 0, Valid: true, }, + PrebuildStatus: database.PrebuildStatusHealthy, } q.presets = append(q.presets, preset) return preset, nil @@ -10917,6 +10923,25 @@ func (q *FakeQuerier) UpdateOrganizationDeletedByID(_ context.Context, arg datab return sql.ErrNoRows } +func (q *FakeQuerier) UpdatePresetPrebuildStatus(ctx context.Context, arg database.UpdatePresetPrebuildStatusParams) error { + err := validateDatabaseType(arg) + if err != nil { + return err + } + + q.mutex.RLock() + defer q.mutex.RUnlock() + + for _, preset := range q.presets { + if preset.ID == arg.PresetID { + preset.PrebuildStatus = arg.Status + return nil + } + } + + return xerrors.Errorf("preset %v does not exist", arg.PresetID) +} + func (q *FakeQuerier) UpdateProvisionerDaemonLastSeenAt(_ context.Context, arg database.UpdateProvisionerDaemonLastSeenAtParams) error { err := validateDatabaseType(arg) if err != nil { diff --git a/coderd/database/dbmetrics/querymetrics.go b/coderd/database/dbmetrics/querymetrics.go index 47ec185915660..e35ec11b02453 100644 --- a/coderd/database/dbmetrics/querymetrics.go +++ b/coderd/database/dbmetrics/querymetrics.go @@ -1138,6 +1138,13 @@ func (m queryMetricsStore) GetPresetParametersByTemplateVersionID(ctx context.Co return r0, r1 } +func (m queryMetricsStore) GetPresetsAtFailureLimit(ctx context.Context, hardLimit int64) ([]database.GetPresetsAtFailureLimitRow, error) { + start := time.Now() + r0, r1 := m.s.GetPresetsAtFailureLimit(ctx, hardLimit) + m.queryLatencies.WithLabelValues("GetPresetsAtFailureLimit").Observe(time.Since(start).Seconds()) + return r0, r1 +} + func (m queryMetricsStore) GetPresetsBackoff(ctx context.Context, lookback time.Time) ([]database.GetPresetsBackoffRow, error) { start := time.Now() r0, r1 := m.s.GetPresetsBackoff(ctx, lookback) @@ -2692,6 +2699,13 @@ func (m queryMetricsStore) UpdateOrganizationDeletedByID(ctx context.Context, ar return r0 } +func (m queryMetricsStore) UpdatePresetPrebuildStatus(ctx context.Context, arg database.UpdatePresetPrebuildStatusParams) error { + start := time.Now() + r0 := m.s.UpdatePresetPrebuildStatus(ctx, arg) + m.queryLatencies.WithLabelValues("UpdatePresetPrebuildStatus").Observe(time.Since(start).Seconds()) + return r0 +} + func (m queryMetricsStore) UpdateProvisionerDaemonLastSeenAt(ctx context.Context, arg database.UpdateProvisionerDaemonLastSeenAtParams) error { start := time.Now() r0 := m.s.UpdateProvisionerDaemonLastSeenAt(ctx, arg) diff --git a/coderd/database/dbmock/dbmock.go b/coderd/database/dbmock/dbmock.go index e3a9a14698e42..7a1fc0c4b2a6f 100644 --- a/coderd/database/dbmock/dbmock.go +++ b/coderd/database/dbmock/dbmock.go @@ -2328,6 +2328,21 @@ func (mr *MockStoreMockRecorder) GetPresetParametersByTemplateVersionID(ctx, tem return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetPresetParametersByTemplateVersionID", reflect.TypeOf((*MockStore)(nil).GetPresetParametersByTemplateVersionID), ctx, templateVersionID) } +// GetPresetsAtFailureLimit mocks base method. +func (m *MockStore) GetPresetsAtFailureLimit(ctx context.Context, hardLimit int64) ([]database.GetPresetsAtFailureLimitRow, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetPresetsAtFailureLimit", ctx, hardLimit) + ret0, _ := ret[0].([]database.GetPresetsAtFailureLimitRow) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetPresetsAtFailureLimit indicates an expected call of GetPresetsAtFailureLimit. +func (mr *MockStoreMockRecorder) GetPresetsAtFailureLimit(ctx, hardLimit any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetPresetsAtFailureLimit", reflect.TypeOf((*MockStore)(nil).GetPresetsAtFailureLimit), ctx, hardLimit) +} + // GetPresetsBackoff mocks base method. func (m *MockStore) GetPresetsBackoff(ctx context.Context, lookback time.Time) ([]database.GetPresetsBackoffRow, error) { m.ctrl.T.Helper() @@ -5706,6 +5721,20 @@ func (mr *MockStoreMockRecorder) UpdateOrganizationDeletedByID(ctx, arg any) *go return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpdateOrganizationDeletedByID", reflect.TypeOf((*MockStore)(nil).UpdateOrganizationDeletedByID), ctx, arg) } +// UpdatePresetPrebuildStatus mocks base method. +func (m *MockStore) UpdatePresetPrebuildStatus(ctx context.Context, arg database.UpdatePresetPrebuildStatusParams) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "UpdatePresetPrebuildStatus", ctx, arg) + ret0, _ := ret[0].(error) + return ret0 +} + +// UpdatePresetPrebuildStatus indicates an expected call of UpdatePresetPrebuildStatus. +func (mr *MockStoreMockRecorder) UpdatePresetPrebuildStatus(ctx, arg any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpdatePresetPrebuildStatus", reflect.TypeOf((*MockStore)(nil).UpdatePresetPrebuildStatus), ctx, arg) +} + // UpdateProvisionerDaemonLastSeenAt mocks base method. func (m *MockStore) UpdateProvisionerDaemonLastSeenAt(ctx context.Context, arg database.UpdateProvisionerDaemonLastSeenAtParams) error { m.ctrl.T.Helper() diff --git a/coderd/database/dump.sql b/coderd/database/dump.sql index 2f23b3ad4ce78..ec196405df2d3 100644 --- a/coderd/database/dump.sql +++ b/coderd/database/dump.sql @@ -153,6 +153,12 @@ CREATE TYPE port_share_protocol AS ENUM ( 'https' ); +CREATE TYPE prebuild_status AS ENUM ( + 'healthy', + 'hard_limited', + 'validation_failed' +); + CREATE TYPE provisioner_daemon_status AS ENUM ( 'offline', 'idle', @@ -1439,7 +1445,8 @@ CREATE TABLE template_version_presets ( name text NOT NULL, created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, desired_instances integer, - invalidate_after_secs integer DEFAULT 0 + invalidate_after_secs integer DEFAULT 0, + prebuild_status prebuild_status DEFAULT 'healthy'::prebuild_status NOT NULL ); CREATE TABLE template_version_terraform_values ( diff --git a/coderd/database/migrations/000328_prebuild_failure_limit_notification.down.sql b/coderd/database/migrations/000328_prebuild_failure_limit_notification.down.sql new file mode 100644 index 0000000000000..40697c7bbc3d2 --- /dev/null +++ b/coderd/database/migrations/000328_prebuild_failure_limit_notification.down.sql @@ -0,0 +1 @@ +DELETE FROM notification_templates WHERE id = '414d9331-c1fc-4761-b40c-d1f4702279eb'; diff --git a/coderd/database/migrations/000328_prebuild_failure_limit_notification.up.sql b/coderd/database/migrations/000328_prebuild_failure_limit_notification.up.sql new file mode 100644 index 0000000000000..403bd667abd28 --- /dev/null +++ b/coderd/database/migrations/000328_prebuild_failure_limit_notification.up.sql @@ -0,0 +1,25 @@ +INSERT INTO notification_templates +(id, name, title_template, body_template, "group", actions) +VALUES ('414d9331-c1fc-4761-b40c-d1f4702279eb', + 'Prebuild Failure Limit Reached', + E'There is a problem creating prebuilt workspaces', + $$ +The number of failed prebuild attempts has reached the hard limit for template **{{ .Labels.template }}** and preset **{{ .Labels.preset }}**. + +To resume prebuilds, fix the underlying issue and upload a new template version. + +Refer to the documentation for more details: +- [Troubleshooting templates](https://coder.com/docs/admin/templates/troubleshooting) +- [Troubleshooting of prebuilt workspaces](https://coder.com/docs/admin/templates/extending-templates/prebuilt-workspaces#administration-and-troubleshooting) +$$, + 'Template Events', + '[ + { + "label": "View failed prebuilt workspaces", + "url": "{{base_url}}/workspaces?filter=owner:prebuilds+status:failed+template:{{.Labels.template}}" + }, + { + "label": "View template version", + "url": "{{base_url}}/templates/{{.Labels.org}}/{{.Labels.template}}/versions/{{.Labels.template_version}}" + } + ]'::jsonb); diff --git a/coderd/database/migrations/000329_add_status_to_template_presets.down.sql b/coderd/database/migrations/000329_add_status_to_template_presets.down.sql new file mode 100644 index 0000000000000..8fe04f99cae33 --- /dev/null +++ b/coderd/database/migrations/000329_add_status_to_template_presets.down.sql @@ -0,0 +1,5 @@ +-- Remove the column from the table first (must happen before dropping the enum type) +ALTER TABLE template_version_presets DROP COLUMN prebuild_status; + +-- Then drop the enum type +DROP TYPE prebuild_status; diff --git a/coderd/database/migrations/000329_add_status_to_template_presets.up.sql b/coderd/database/migrations/000329_add_status_to_template_presets.up.sql new file mode 100644 index 0000000000000..019a246f73a87 --- /dev/null +++ b/coderd/database/migrations/000329_add_status_to_template_presets.up.sql @@ -0,0 +1,7 @@ +CREATE TYPE prebuild_status AS ENUM ( + 'healthy', -- Prebuilds are working as expected; this is the default, healthy state. + 'hard_limited', -- Prebuilds have failed repeatedly and hit the configured hard failure limit; won't be retried anymore. + 'validation_failed' -- Prebuilds failed due to a non-retryable validation error (e.g. template misconfiguration); won't be retried. +); + +ALTER TABLE template_version_presets ADD COLUMN prebuild_status prebuild_status NOT NULL DEFAULT 'healthy'::prebuild_status; diff --git a/coderd/database/models.go b/coderd/database/models.go index ff49b8f471be0..d5047f6bbe65f 100644 --- a/coderd/database/models.go +++ b/coderd/database/models.go @@ -1343,6 +1343,67 @@ func AllPortShareProtocolValues() []PortShareProtocol { } } +type PrebuildStatus string + +const ( + PrebuildStatusHealthy PrebuildStatus = "healthy" + PrebuildStatusHardLimited PrebuildStatus = "hard_limited" + PrebuildStatusValidationFailed PrebuildStatus = "validation_failed" +) + +func (e *PrebuildStatus) Scan(src interface{}) error { + switch s := src.(type) { + case []byte: + *e = PrebuildStatus(s) + case string: + *e = PrebuildStatus(s) + default: + return fmt.Errorf("unsupported scan type for PrebuildStatus: %T", src) + } + return nil +} + +type NullPrebuildStatus struct { + PrebuildStatus PrebuildStatus `json:"prebuild_status"` + Valid bool `json:"valid"` // Valid is true if PrebuildStatus is not NULL +} + +// Scan implements the Scanner interface. +func (ns *NullPrebuildStatus) Scan(value interface{}) error { + if value == nil { + ns.PrebuildStatus, ns.Valid = "", false + return nil + } + ns.Valid = true + return ns.PrebuildStatus.Scan(value) +} + +// Value implements the driver Valuer interface. +func (ns NullPrebuildStatus) Value() (driver.Value, error) { + if !ns.Valid { + return nil, nil + } + return string(ns.PrebuildStatus), nil +} + +func (e PrebuildStatus) Valid() bool { + switch e { + case PrebuildStatusHealthy, + PrebuildStatusHardLimited, + PrebuildStatusValidationFailed: + return true + } + return false +} + +func AllPrebuildStatusValues() []PrebuildStatus { + return []PrebuildStatus{ + PrebuildStatusHealthy, + PrebuildStatusHardLimited, + PrebuildStatusValidationFailed, + } +} + // The status of a provisioner daemon. type ProvisionerDaemonStatus string @@ -3248,12 +3309,13 @@ type TemplateVersionParameter struct { } type TemplateVersionPreset struct { - ID uuid.UUID `db:"id" json:"id"` - TemplateVersionID uuid.UUID `db:"template_version_id" json:"template_version_id"` - Name string `db:"name" json:"name"` - CreatedAt time.Time `db:"created_at" json:"created_at"` - DesiredInstances sql.NullInt32 `db:"desired_instances" json:"desired_instances"` - InvalidateAfterSecs sql.NullInt32 `db:"invalidate_after_secs" json:"invalidate_after_secs"` + ID uuid.UUID `db:"id" json:"id"` + TemplateVersionID uuid.UUID `db:"template_version_id" json:"template_version_id"` + Name string `db:"name" json:"name"` + CreatedAt time.Time `db:"created_at" json:"created_at"` + DesiredInstances sql.NullInt32 `db:"desired_instances" json:"desired_instances"` + InvalidateAfterSecs sql.NullInt32 `db:"invalidate_after_secs" json:"invalidate_after_secs"` + PrebuildStatus PrebuildStatus `db:"prebuild_status" json:"prebuild_status"` } type TemplateVersionPresetParameter struct { diff --git a/coderd/database/querier.go b/coderd/database/querier.go index d248780397ead..ac7497b641a05 100644 --- a/coderd/database/querier.go +++ b/coderd/database/querier.go @@ -241,6 +241,15 @@ type sqlcQuerier interface { GetPresetByWorkspaceBuildID(ctx context.Context, workspaceBuildID uuid.UUID) (TemplateVersionPreset, error) GetPresetParametersByPresetID(ctx context.Context, presetID uuid.UUID) ([]TemplateVersionPresetParameter, error) GetPresetParametersByTemplateVersionID(ctx context.Context, templateVersionID uuid.UUID) ([]TemplateVersionPresetParameter, error) + // GetPresetsAtFailureLimit groups workspace builds by preset ID. + // Each preset is associated with exactly one template version ID. + // For each preset, the query checks the last hard_limit builds. + // If all of them failed, the preset is considered to have hit the hard failure limit. + // The query returns a list of preset IDs that have reached this failure threshold. + // Only active template versions with configured presets are considered. + // For each preset, check the last hard_limit builds. + // If all of them failed, the preset is considered to have hit the hard failure limit. + GetPresetsAtFailureLimit(ctx context.Context, hardLimit int64) ([]GetPresetsAtFailureLimitRow, error) // GetPresetsBackoff groups workspace builds by preset ID. // Each preset is associated with exactly one template version ID. // For each group, the query checks up to N of the most recent jobs that occurred within the @@ -568,6 +577,7 @@ type sqlcQuerier interface { UpdateOAuth2ProviderAppSecretByID(ctx context.Context, arg UpdateOAuth2ProviderAppSecretByIDParams) (OAuth2ProviderAppSecret, error) UpdateOrganization(ctx context.Context, arg UpdateOrganizationParams) (Organization, error) UpdateOrganizationDeletedByID(ctx context.Context, arg UpdateOrganizationDeletedByIDParams) error + UpdatePresetPrebuildStatus(ctx context.Context, arg UpdatePresetPrebuildStatusParams) error UpdateProvisionerDaemonLastSeenAt(ctx context.Context, arg UpdateProvisionerDaemonLastSeenAtParams) error UpdateProvisionerJobByID(ctx context.Context, arg UpdateProvisionerJobByIDParams) error UpdateProvisionerJobWithCancelByID(ctx context.Context, arg UpdateProvisionerJobWithCancelByIDParams) error diff --git a/coderd/database/querier_test.go b/coderd/database/querier_test.go index b2cc20c4894d5..5bafa58796b7a 100644 --- a/coderd/database/querier_test.go +++ b/coderd/database/querier_test.go @@ -4123,8 +4123,7 @@ func TestGetPresetsBackoff(t *testing.T) { }) tmpl1 := createTemplate(t, db, orgID, userID) - tmpl1V1 := createTmplVersionAndPreset(t, db, tmpl1, tmpl1.ActiveVersionID, now, nil) - _ = tmpl1V1 + createTmplVersionAndPreset(t, db, tmpl1, tmpl1.ActiveVersionID, now, nil) backoffs, err := db.GetPresetsBackoff(ctx, now.Add(-time.Hour)) require.NoError(t, err) @@ -4401,6 +4400,311 @@ func TestGetPresetsBackoff(t *testing.T) { }) } +func TestGetPresetsAtFailureLimit(t *testing.T) { + t.Parallel() + if !dbtestutil.WillUsePostgres() { + t.SkipNow() + } + + now := dbtime.Now() + hourBefore := now.Add(-time.Hour) + orgID := uuid.New() + userID := uuid.New() + + findPresetByTmplVersionID := func(hardLimitedPresets []database.GetPresetsAtFailureLimitRow, tmplVersionID uuid.UUID) *database.GetPresetsAtFailureLimitRow { + for _, preset := range hardLimitedPresets { + if preset.TemplateVersionID == tmplVersionID { + return &preset + } + } + + return nil + } + + testCases := []struct { + name string + // true - build is successful + // false - build is unsuccessful + buildSuccesses []bool + hardLimit int64 + expHitHardLimit bool + }{ + { + name: "failed build", + buildSuccesses: []bool{false}, + hardLimit: 1, + expHitHardLimit: true, + }, + { + name: "2 failed builds", + buildSuccesses: []bool{false, false}, + hardLimit: 1, + expHitHardLimit: true, + }, + { + name: "successful build", + buildSuccesses: []bool{true}, + hardLimit: 1, + expHitHardLimit: false, + }, + { + name: "last build is failed", + buildSuccesses: []bool{true, true, false}, + hardLimit: 1, + expHitHardLimit: true, + }, + { + name: "last build is successful", + buildSuccesses: []bool{false, false, true}, + hardLimit: 1, + expHitHardLimit: false, + }, + { + name: "last 3 builds are failed - hard limit is reached", + buildSuccesses: []bool{true, true, false, false, false}, + hardLimit: 3, + expHitHardLimit: true, + }, + { + name: "1 out of 3 last build is successful - hard limit is NOT reached", + buildSuccesses: []bool{false, false, true, false, false}, + hardLimit: 3, + expHitHardLimit: false, + }, + // hardLimit set to zero, implicitly disables the hard limit. + { + name: "despite 5 failed builds, the hard limit is not reached because it's disabled.", + buildSuccesses: []bool{false, false, false, false, false}, + hardLimit: 0, + expHitHardLimit: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + dbgen.Organization(t, db, database.Organization{ + ID: orgID, + }) + dbgen.User(t, db, database.User{ + ID: userID, + }) + + tmpl := createTemplate(t, db, orgID, userID) + tmplV1 := createTmplVersionAndPreset(t, db, tmpl, tmpl.ActiveVersionID, now, nil) + for idx, buildSuccess := range tc.buildSuccesses { + createPrebuiltWorkspace(ctx, t, db, tmpl, tmplV1, orgID, now, &createPrebuiltWorkspaceOpts{ + failedJob: !buildSuccess, + createdAt: hourBefore.Add(time.Duration(idx) * time.Second), + }) + } + + hardLimitedPresets, err := db.GetPresetsAtFailureLimit(ctx, tc.hardLimit) + require.NoError(t, err) + + if !tc.expHitHardLimit { + require.Len(t, hardLimitedPresets, 0) + return + } + + require.Len(t, hardLimitedPresets, 1) + hardLimitedPreset := hardLimitedPresets[0] + require.Equal(t, hardLimitedPreset.TemplateVersionID, tmpl.ActiveVersionID) + require.Equal(t, hardLimitedPreset.PresetID, tmplV1.preset.ID) + }) + } + + t.Run("Ignore Inactive Version", func(t *testing.T) { + t.Parallel() + + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + dbgen.Organization(t, db, database.Organization{ + ID: orgID, + }) + dbgen.User(t, db, database.User{ + ID: userID, + }) + + tmpl := createTemplate(t, db, orgID, userID) + tmplV1 := createTmplVersionAndPreset(t, db, tmpl, uuid.New(), now, nil) + createPrebuiltWorkspace(ctx, t, db, tmpl, tmplV1, orgID, now, &createPrebuiltWorkspaceOpts{ + failedJob: true, + }) + + // Active Version + tmplV2 := createTmplVersionAndPreset(t, db, tmpl, tmpl.ActiveVersionID, now, nil) + createPrebuiltWorkspace(ctx, t, db, tmpl, tmplV2, orgID, now, &createPrebuiltWorkspaceOpts{ + failedJob: true, + }) + createPrebuiltWorkspace(ctx, t, db, tmpl, tmplV2, orgID, now, &createPrebuiltWorkspaceOpts{ + failedJob: true, + }) + + hardLimitedPresets, err := db.GetPresetsAtFailureLimit(ctx, 1) + require.NoError(t, err) + + require.Len(t, hardLimitedPresets, 1) + hardLimitedPreset := hardLimitedPresets[0] + require.Equal(t, hardLimitedPreset.TemplateVersionID, tmpl.ActiveVersionID) + require.Equal(t, hardLimitedPreset.PresetID, tmplV2.preset.ID) + }) + + t.Run("Multiple Templates", func(t *testing.T) { + t.Parallel() + + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + dbgen.Organization(t, db, database.Organization{ + ID: orgID, + }) + dbgen.User(t, db, database.User{ + ID: userID, + }) + + tmpl1 := createTemplate(t, db, orgID, userID) + tmpl1V1 := createTmplVersionAndPreset(t, db, tmpl1, tmpl1.ActiveVersionID, now, nil) + createPrebuiltWorkspace(ctx, t, db, tmpl1, tmpl1V1, orgID, now, &createPrebuiltWorkspaceOpts{ + failedJob: true, + }) + + tmpl2 := createTemplate(t, db, orgID, userID) + tmpl2V1 := createTmplVersionAndPreset(t, db, tmpl2, tmpl2.ActiveVersionID, now, nil) + createPrebuiltWorkspace(ctx, t, db, tmpl2, tmpl2V1, orgID, now, &createPrebuiltWorkspaceOpts{ + failedJob: true, + }) + + hardLimitedPresets, err := db.GetPresetsAtFailureLimit(ctx, 1) + + require.NoError(t, err) + + require.Len(t, hardLimitedPresets, 2) + { + hardLimitedPreset := findPresetByTmplVersionID(hardLimitedPresets, tmpl1.ActiveVersionID) + require.Equal(t, hardLimitedPreset.TemplateVersionID, tmpl1.ActiveVersionID) + require.Equal(t, hardLimitedPreset.PresetID, tmpl1V1.preset.ID) + } + { + hardLimitedPreset := findPresetByTmplVersionID(hardLimitedPresets, tmpl2.ActiveVersionID) + require.Equal(t, hardLimitedPreset.TemplateVersionID, tmpl2.ActiveVersionID) + require.Equal(t, hardLimitedPreset.PresetID, tmpl2V1.preset.ID) + } + }) + + t.Run("Multiple Templates, Versions and Workspace Builds", func(t *testing.T) { + t.Parallel() + + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + dbgen.Organization(t, db, database.Organization{ + ID: orgID, + }) + dbgen.User(t, db, database.User{ + ID: userID, + }) + + tmpl1 := createTemplate(t, db, orgID, userID) + tmpl1V1 := createTmplVersionAndPreset(t, db, tmpl1, tmpl1.ActiveVersionID, now, nil) + createPrebuiltWorkspace(ctx, t, db, tmpl1, tmpl1V1, orgID, now, &createPrebuiltWorkspaceOpts{ + failedJob: true, + }) + createPrebuiltWorkspace(ctx, t, db, tmpl1, tmpl1V1, orgID, now, &createPrebuiltWorkspaceOpts{ + failedJob: true, + }) + + tmpl2 := createTemplate(t, db, orgID, userID) + tmpl2V1 := createTmplVersionAndPreset(t, db, tmpl2, tmpl2.ActiveVersionID, now, nil) + createPrebuiltWorkspace(ctx, t, db, tmpl2, tmpl2V1, orgID, now, &createPrebuiltWorkspaceOpts{ + failedJob: true, + }) + createPrebuiltWorkspace(ctx, t, db, tmpl2, tmpl2V1, orgID, now, &createPrebuiltWorkspaceOpts{ + failedJob: true, + }) + + tmpl3 := createTemplate(t, db, orgID, userID) + tmpl3V1 := createTmplVersionAndPreset(t, db, tmpl3, uuid.New(), now, nil) + createPrebuiltWorkspace(ctx, t, db, tmpl3, tmpl3V1, orgID, now, &createPrebuiltWorkspaceOpts{ + failedJob: true, + }) + + tmpl3V2 := createTmplVersionAndPreset(t, db, tmpl3, tmpl3.ActiveVersionID, now, nil) + createPrebuiltWorkspace(ctx, t, db, tmpl3, tmpl3V2, orgID, now, &createPrebuiltWorkspaceOpts{ + failedJob: true, + }) + createPrebuiltWorkspace(ctx, t, db, tmpl3, tmpl3V2, orgID, now, &createPrebuiltWorkspaceOpts{ + failedJob: true, + }) + + hardLimit := int64(2) + hardLimitedPresets, err := db.GetPresetsAtFailureLimit(ctx, hardLimit) + require.NoError(t, err) + + require.Len(t, hardLimitedPresets, 3) + { + hardLimitedPreset := findPresetByTmplVersionID(hardLimitedPresets, tmpl1.ActiveVersionID) + require.Equal(t, hardLimitedPreset.TemplateVersionID, tmpl1.ActiveVersionID) + require.Equal(t, hardLimitedPreset.PresetID, tmpl1V1.preset.ID) + } + { + hardLimitedPreset := findPresetByTmplVersionID(hardLimitedPresets, tmpl2.ActiveVersionID) + require.Equal(t, hardLimitedPreset.TemplateVersionID, tmpl2.ActiveVersionID) + require.Equal(t, hardLimitedPreset.PresetID, tmpl2V1.preset.ID) + } + { + hardLimitedPreset := findPresetByTmplVersionID(hardLimitedPresets, tmpl3.ActiveVersionID) + require.Equal(t, hardLimitedPreset.TemplateVersionID, tmpl3.ActiveVersionID) + require.Equal(t, hardLimitedPreset.PresetID, tmpl3V2.preset.ID) + } + }) + + t.Run("No Workspace Builds", func(t *testing.T) { + t.Parallel() + + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + dbgen.Organization(t, db, database.Organization{ + ID: orgID, + }) + dbgen.User(t, db, database.User{ + ID: userID, + }) + + tmpl1 := createTemplate(t, db, orgID, userID) + createTmplVersionAndPreset(t, db, tmpl1, tmpl1.ActiveVersionID, now, nil) + + hardLimitedPresets, err := db.GetPresetsAtFailureLimit(ctx, 1) + require.NoError(t, err) + require.Nil(t, hardLimitedPresets) + }) + + t.Run("No Failed Workspace Builds", func(t *testing.T) { + t.Parallel() + + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + dbgen.Organization(t, db, database.Organization{ + ID: orgID, + }) + dbgen.User(t, db, database.User{ + ID: userID, + }) + + tmpl1 := createTemplate(t, db, orgID, userID) + tmpl1V1 := createTmplVersionAndPreset(t, db, tmpl1, tmpl1.ActiveVersionID, now, nil) + successfulJobOpts := createPrebuiltWorkspaceOpts{} + createPrebuiltWorkspace(ctx, t, db, tmpl1, tmpl1V1, orgID, now, &successfulJobOpts) + createPrebuiltWorkspace(ctx, t, db, tmpl1, tmpl1V1, orgID, now, &successfulJobOpts) + createPrebuiltWorkspace(ctx, t, db, tmpl1, tmpl1V1, orgID, now, &successfulJobOpts) + + hardLimitedPresets, err := db.GetPresetsAtFailureLimit(ctx, 1) + require.NoError(t, err) + require.Nil(t, hardLimitedPresets) + }) +} + func requireUsersMatch(t testing.TB, expected []database.User, found []database.GetUsersRow, msg string) { t.Helper() require.ElementsMatch(t, expected, database.ConvertUserRows(found), msg) diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 99a8bf4603b57..ffd8ccb035206 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -6288,6 +6288,71 @@ func (q *sqlQuerier) GetPrebuildMetrics(ctx context.Context) ([]GetPrebuildMetri return items, nil } +const getPresetsAtFailureLimit = `-- name: GetPresetsAtFailureLimit :many +WITH filtered_builds AS ( + -- Only select builds which are for prebuild creations + SELECT wlb.template_version_id, wlb.created_at, tvp.id AS preset_id, wlb.job_status, tvp.desired_instances + FROM template_version_presets tvp + INNER JOIN workspace_latest_builds wlb ON wlb.template_version_preset_id = tvp.id + INNER JOIN workspaces w ON wlb.workspace_id = w.id + INNER JOIN template_versions tv ON wlb.template_version_id = tv.id + INNER JOIN templates t ON tv.template_id = t.id AND t.active_version_id = tv.id + WHERE tvp.desired_instances IS NOT NULL -- Consider only presets that have a prebuild configuration. + AND wlb.transition = 'start'::workspace_transition + AND w.owner_id = 'c42fdf75-3097-471c-8c33-fb52454d81c0' +), +time_sorted_builds AS ( + -- Group builds by preset, then sort each group by created_at. + SELECT fb.template_version_id, fb.created_at, fb.preset_id, fb.job_status, fb.desired_instances, + ROW_NUMBER() OVER (PARTITION BY fb.preset_id ORDER BY fb.created_at DESC) as rn + FROM filtered_builds fb +) +SELECT + tsb.template_version_id, + tsb.preset_id +FROM time_sorted_builds tsb +WHERE tsb.rn <= $1::bigint + AND tsb.job_status = 'failed'::provisioner_job_status +GROUP BY tsb.template_version_id, tsb.preset_id +HAVING COUNT(*) = $1::bigint +` + +type GetPresetsAtFailureLimitRow struct { + TemplateVersionID uuid.UUID `db:"template_version_id" json:"template_version_id"` + PresetID uuid.UUID `db:"preset_id" json:"preset_id"` +} + +// GetPresetsAtFailureLimit groups workspace builds by preset ID. +// Each preset is associated with exactly one template version ID. +// For each preset, the query checks the last hard_limit builds. +// If all of them failed, the preset is considered to have hit the hard failure limit. +// The query returns a list of preset IDs that have reached this failure threshold. +// Only active template versions with configured presets are considered. +// For each preset, check the last hard_limit builds. +// If all of them failed, the preset is considered to have hit the hard failure limit. +func (q *sqlQuerier) GetPresetsAtFailureLimit(ctx context.Context, hardLimit int64) ([]GetPresetsAtFailureLimitRow, error) { + rows, err := q.db.QueryContext(ctx, getPresetsAtFailureLimit, hardLimit) + if err != nil { + return nil, err + } + defer rows.Close() + var items []GetPresetsAtFailureLimitRow + for rows.Next() { + var i GetPresetsAtFailureLimitRow + if err := rows.Scan(&i.TemplateVersionID, &i.PresetID); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + const getPresetsBackoff = `-- name: GetPresetsBackoff :many WITH filtered_builds AS ( -- Only select builds which are for prebuild creations @@ -6438,6 +6503,7 @@ const getTemplatePresetsWithPrebuilds = `-- name: GetTemplatePresetsWithPrebuild SELECT t.id AS template_id, t.name AS template_name, + o.id AS organization_id, o.name AS organization_name, tv.id AS template_version_id, tv.name AS template_version_name, @@ -6445,6 +6511,7 @@ SELECT tvp.id, tvp.name, tvp.desired_instances AS desired_instances, + tvp.prebuild_status, t.deleted, t.deprecated != '' AS deprecated FROM templates t @@ -6457,17 +6524,19 @@ WHERE tvp.desired_instances IS NOT NULL -- Consider only presets that have a pre ` type GetTemplatePresetsWithPrebuildsRow struct { - TemplateID uuid.UUID `db:"template_id" json:"template_id"` - TemplateName string `db:"template_name" json:"template_name"` - OrganizationName string `db:"organization_name" json:"organization_name"` - TemplateVersionID uuid.UUID `db:"template_version_id" json:"template_version_id"` - TemplateVersionName string `db:"template_version_name" json:"template_version_name"` - UsingActiveVersion bool `db:"using_active_version" json:"using_active_version"` - ID uuid.UUID `db:"id" json:"id"` - Name string `db:"name" json:"name"` - DesiredInstances sql.NullInt32 `db:"desired_instances" json:"desired_instances"` - Deleted bool `db:"deleted" json:"deleted"` - Deprecated bool `db:"deprecated" json:"deprecated"` + TemplateID uuid.UUID `db:"template_id" json:"template_id"` + TemplateName string `db:"template_name" json:"template_name"` + OrganizationID uuid.UUID `db:"organization_id" json:"organization_id"` + OrganizationName string `db:"organization_name" json:"organization_name"` + TemplateVersionID uuid.UUID `db:"template_version_id" json:"template_version_id"` + TemplateVersionName string `db:"template_version_name" json:"template_version_name"` + UsingActiveVersion bool `db:"using_active_version" json:"using_active_version"` + ID uuid.UUID `db:"id" json:"id"` + Name string `db:"name" json:"name"` + DesiredInstances sql.NullInt32 `db:"desired_instances" json:"desired_instances"` + PrebuildStatus PrebuildStatus `db:"prebuild_status" json:"prebuild_status"` + Deleted bool `db:"deleted" json:"deleted"` + Deprecated bool `db:"deprecated" json:"deprecated"` } // GetTemplatePresetsWithPrebuilds retrieves template versions with configured presets and prebuilds. @@ -6485,6 +6554,7 @@ func (q *sqlQuerier) GetTemplatePresetsWithPrebuilds(ctx context.Context, templa if err := rows.Scan( &i.TemplateID, &i.TemplateName, + &i.OrganizationID, &i.OrganizationName, &i.TemplateVersionID, &i.TemplateVersionName, @@ -6492,6 +6562,7 @@ func (q *sqlQuerier) GetTemplatePresetsWithPrebuilds(ctx context.Context, templa &i.ID, &i.Name, &i.DesiredInstances, + &i.PrebuildStatus, &i.Deleted, &i.Deprecated, ); err != nil { @@ -6509,21 +6580,22 @@ func (q *sqlQuerier) GetTemplatePresetsWithPrebuilds(ctx context.Context, templa } const getPresetByID = `-- name: GetPresetByID :one -SELECT tvp.id, tvp.template_version_id, tvp.name, tvp.created_at, tvp.desired_instances, tvp.invalidate_after_secs, tv.template_id, tv.organization_id FROM +SELECT tvp.id, tvp.template_version_id, tvp.name, tvp.created_at, tvp.desired_instances, tvp.invalidate_after_secs, tvp.prebuild_status, tv.template_id, tv.organization_id FROM template_version_presets tvp INNER JOIN template_versions tv ON tvp.template_version_id = tv.id WHERE tvp.id = $1 ` type GetPresetByIDRow struct { - ID uuid.UUID `db:"id" json:"id"` - TemplateVersionID uuid.UUID `db:"template_version_id" json:"template_version_id"` - Name string `db:"name" json:"name"` - CreatedAt time.Time `db:"created_at" json:"created_at"` - DesiredInstances sql.NullInt32 `db:"desired_instances" json:"desired_instances"` - InvalidateAfterSecs sql.NullInt32 `db:"invalidate_after_secs" json:"invalidate_after_secs"` - TemplateID uuid.NullUUID `db:"template_id" json:"template_id"` - OrganizationID uuid.UUID `db:"organization_id" json:"organization_id"` + ID uuid.UUID `db:"id" json:"id"` + TemplateVersionID uuid.UUID `db:"template_version_id" json:"template_version_id"` + Name string `db:"name" json:"name"` + CreatedAt time.Time `db:"created_at" json:"created_at"` + DesiredInstances sql.NullInt32 `db:"desired_instances" json:"desired_instances"` + InvalidateAfterSecs sql.NullInt32 `db:"invalidate_after_secs" json:"invalidate_after_secs"` + PrebuildStatus PrebuildStatus `db:"prebuild_status" json:"prebuild_status"` + TemplateID uuid.NullUUID `db:"template_id" json:"template_id"` + OrganizationID uuid.UUID `db:"organization_id" json:"organization_id"` } func (q *sqlQuerier) GetPresetByID(ctx context.Context, presetID uuid.UUID) (GetPresetByIDRow, error) { @@ -6536,6 +6608,7 @@ func (q *sqlQuerier) GetPresetByID(ctx context.Context, presetID uuid.UUID) (Get &i.CreatedAt, &i.DesiredInstances, &i.InvalidateAfterSecs, + &i.PrebuildStatus, &i.TemplateID, &i.OrganizationID, ) @@ -6544,7 +6617,7 @@ func (q *sqlQuerier) GetPresetByID(ctx context.Context, presetID uuid.UUID) (Get const getPresetByWorkspaceBuildID = `-- name: GetPresetByWorkspaceBuildID :one SELECT - template_version_presets.id, template_version_presets.template_version_id, template_version_presets.name, template_version_presets.created_at, template_version_presets.desired_instances, template_version_presets.invalidate_after_secs + template_version_presets.id, template_version_presets.template_version_id, template_version_presets.name, template_version_presets.created_at, template_version_presets.desired_instances, template_version_presets.invalidate_after_secs, template_version_presets.prebuild_status FROM template_version_presets INNER JOIN workspace_builds ON workspace_builds.template_version_preset_id = template_version_presets.id @@ -6562,6 +6635,7 @@ func (q *sqlQuerier) GetPresetByWorkspaceBuildID(ctx context.Context, workspaceB &i.CreatedAt, &i.DesiredInstances, &i.InvalidateAfterSecs, + &i.PrebuildStatus, ) return i, err } @@ -6643,7 +6717,7 @@ func (q *sqlQuerier) GetPresetParametersByTemplateVersionID(ctx context.Context, const getPresetsByTemplateVersionID = `-- name: GetPresetsByTemplateVersionID :many SELECT - id, template_version_id, name, created_at, desired_instances, invalidate_after_secs + id, template_version_id, name, created_at, desired_instances, invalidate_after_secs, prebuild_status FROM template_version_presets WHERE @@ -6666,6 +6740,7 @@ func (q *sqlQuerier) GetPresetsByTemplateVersionID(ctx context.Context, template &i.CreatedAt, &i.DesiredInstances, &i.InvalidateAfterSecs, + &i.PrebuildStatus, ); err != nil { return nil, err } @@ -6696,7 +6771,7 @@ VALUES ( $4, $5, $6 -) RETURNING id, template_version_id, name, created_at, desired_instances, invalidate_after_secs +) RETURNING id, template_version_id, name, created_at, desired_instances, invalidate_after_secs, prebuild_status ` type InsertPresetParams struct { @@ -6725,6 +6800,7 @@ func (q *sqlQuerier) InsertPreset(ctx context.Context, arg InsertPresetParams) ( &i.CreatedAt, &i.DesiredInstances, &i.InvalidateAfterSecs, + &i.PrebuildStatus, ) return i, err } @@ -6773,6 +6849,22 @@ func (q *sqlQuerier) InsertPresetParameters(ctx context.Context, arg InsertPrese return items, nil } +const updatePresetPrebuildStatus = `-- name: UpdatePresetPrebuildStatus :exec +UPDATE template_version_presets +SET prebuild_status = $1 +WHERE id = $2 +` + +type UpdatePresetPrebuildStatusParams struct { + Status PrebuildStatus `db:"status" json:"status"` + PresetID uuid.UUID `db:"preset_id" json:"preset_id"` +} + +func (q *sqlQuerier) UpdatePresetPrebuildStatus(ctx context.Context, arg UpdatePresetPrebuildStatusParams) error { + _, err := q.db.ExecContext(ctx, updatePresetPrebuildStatus, arg.Status, arg.PresetID) + return err +} + const deleteOldProvisionerDaemons = `-- name: DeleteOldProvisionerDaemons :exec DELETE FROM provisioner_daemons WHERE ( (created_at < (NOW() - INTERVAL '7 days') AND last_seen_at IS NULL) OR diff --git a/coderd/database/queries/prebuilds.sql b/coderd/database/queries/prebuilds.sql index 8c27ddf62b7c3..9cd4321afec23 100644 --- a/coderd/database/queries/prebuilds.sql +++ b/coderd/database/queries/prebuilds.sql @@ -27,6 +27,7 @@ RETURNING w.id, w.name; SELECT t.id AS template_id, t.name AS template_name, + o.id AS organization_id, o.name AS organization_name, tv.id AS template_version_id, tv.name AS template_version_name, @@ -34,6 +35,7 @@ SELECT tvp.id, tvp.name, tvp.desired_instances AS desired_instances, + tvp.prebuild_status, t.deleted, t.deprecated != '' AS deprecated FROM templates t @@ -129,6 +131,42 @@ WHERE tsb.rn <= tsb.desired_instances -- Fetch the last N builds, where N is the AND created_at >= @lookback::timestamptz GROUP BY tsb.template_version_id, tsb.preset_id, fc.num_failed; +-- GetPresetsAtFailureLimit groups workspace builds by preset ID. +-- Each preset is associated with exactly one template version ID. +-- For each preset, the query checks the last hard_limit builds. +-- If all of them failed, the preset is considered to have hit the hard failure limit. +-- The query returns a list of preset IDs that have reached this failure threshold. +-- Only active template versions with configured presets are considered. +-- name: GetPresetsAtFailureLimit :many +WITH filtered_builds AS ( + -- Only select builds which are for prebuild creations + SELECT wlb.template_version_id, wlb.created_at, tvp.id AS preset_id, wlb.job_status, tvp.desired_instances + FROM template_version_presets tvp + INNER JOIN workspace_latest_builds wlb ON wlb.template_version_preset_id = tvp.id + INNER JOIN workspaces w ON wlb.workspace_id = w.id + INNER JOIN template_versions tv ON wlb.template_version_id = tv.id + INNER JOIN templates t ON tv.template_id = t.id AND t.active_version_id = tv.id + WHERE tvp.desired_instances IS NOT NULL -- Consider only presets that have a prebuild configuration. + AND wlb.transition = 'start'::workspace_transition + AND w.owner_id = 'c42fdf75-3097-471c-8c33-fb52454d81c0' +), +time_sorted_builds AS ( + -- Group builds by preset, then sort each group by created_at. + SELECT fb.template_version_id, fb.created_at, fb.preset_id, fb.job_status, fb.desired_instances, + ROW_NUMBER() OVER (PARTITION BY fb.preset_id ORDER BY fb.created_at DESC) as rn + FROM filtered_builds fb +) +SELECT + tsb.template_version_id, + tsb.preset_id +FROM time_sorted_builds tsb +-- For each preset, check the last hard_limit builds. +-- If all of them failed, the preset is considered to have hit the hard failure limit. +WHERE tsb.rn <= @hard_limit::bigint + AND tsb.job_status = 'failed'::provisioner_job_status +GROUP BY tsb.template_version_id, tsb.preset_id +HAVING COUNT(*) = @hard_limit::bigint; + -- name: GetPrebuildMetrics :many SELECT t.name as template_name, diff --git a/coderd/database/queries/presets.sql b/coderd/database/queries/presets.sql index 6d5646a285b4a..2fb6722bc2c33 100644 --- a/coderd/database/queries/presets.sql +++ b/coderd/database/queries/presets.sql @@ -25,6 +25,11 @@ SELECT unnest(@values :: TEXT[]) RETURNING *; +-- name: UpdatePresetPrebuildStatus :exec +UPDATE template_version_presets +SET prebuild_status = @status +WHERE id = @preset_id; + -- name: GetPresetsByTemplateVersionID :many SELECT * diff --git a/coderd/notifications/events.go b/coderd/notifications/events.go index 35d9925055da5..0e88361b56f68 100644 --- a/coderd/notifications/events.go +++ b/coderd/notifications/events.go @@ -42,6 +42,11 @@ var ( TemplateWorkspaceResourceReplaced = uuid.MustParse("89d9745a-816e-4695-a17f-3d0a229e2b8d") ) +// Prebuilds-related events +var ( + PrebuildFailureLimitReached = uuid.MustParse("414d9331-c1fc-4761-b40c-d1f4702279eb") +) + // Notification-related events. var ( TemplateTestNotification = uuid.MustParse("c425f63e-716a-4bf4-ae24-78348f706c3f") diff --git a/coderd/notifications/notifications_test.go b/coderd/notifications/notifications_test.go index 8f8a3c82441e0..fab87af41deb9 100644 --- a/coderd/notifications/notifications_test.go +++ b/coderd/notifications/notifications_test.go @@ -1250,6 +1250,22 @@ func TestNotificationTemplates_Golden(t *testing.T) { }, }, }, + { + name: "PrebuildFailureLimitReached", + id: notifications.PrebuildFailureLimitReached, + payload: types.MessagePayload{ + UserName: "Bobby", + UserEmail: "bobby@coder.com", + UserUsername: "bobby", + Labels: map[string]string{ + "org": "cern", + "template": "docker", + "template_version": "angry_torvalds", + "preset": "particle-accelerator", + }, + Data: map[string]any{}, + }, + }, } // We must have a test case for every notification_template. This is enforced below: diff --git a/coderd/notifications/testdata/rendered-templates/smtp/PrebuildFailureLimitReached.html.golden b/coderd/notifications/testdata/rendered-templates/smtp/PrebuildFailureLimitReached.html.golden new file mode 100644 index 0000000000000..69f13b86ca71c --- /dev/null +++ b/coderd/notifications/testdata/rendered-templates/smtp/PrebuildFailureLimitReached.html.golden @@ -0,0 +1,112 @@ +From: system@coder.com +To: bobby@coder.com +Subject: There is a problem creating prebuilt workspaces +Message-Id: 02ee4935-73be-4fa1-a290-ff9999026b13@blush-whale-48 +Date: Fri, 11 Oct 2024 09:03:06 +0000 +Content-Type: multipart/alternative; boundary=bbe61b741255b6098bb6b3c1f41b885773df633cb18d2a3002b68e4bc9c4 +MIME-Version: 1.0 + +--bbe61b741255b6098bb6b3c1f41b885773df633cb18d2a3002b68e4bc9c4 +Content-Transfer-Encoding: quoted-printable +Content-Type: text/plain; charset=UTF-8 + +Hi Bobby, + +The number of failed prebuild attempts has reached the hard limit for templ= +ate docker and preset particle-accelerator. + +To resume prebuilds, fix the underlying issue and upload a new template ver= +sion. + +Refer to the documentation for more details: + +Troubleshooting templates (https://coder.com/docs/admin/templates/troublesh= +ooting) +Troubleshooting of prebuilt workspaces (https://coder.com/docs/admin/templa= +tes/extending-templates/prebuilt-workspaces#administration-and-troubleshoot= +ing) + + +View failed prebuilt workspaces: http://test.com/workspaces?filter=3Downer:= +prebuilds+status:failed+template:docker + +View template version: http://test.com/templates/cern/docker/versions/angry= +_torvalds + +--bbe61b741255b6098bb6b3c1f41b885773df633cb18d2a3002b68e4bc9c4 +Content-Transfer-Encoding: quoted-printable +Content-Type: text/html; charset=UTF-8 + + + +
+ + +Hi Bobby,
+The number of failed prebuild attempts has reached the hard limi= +t for template docker and preset particle-accelera= +tor.
+ +To resume prebuilds, fix the underlying issue and upload a new template = +version.
+ +Refer to the documentation for more details:
+- Troubl=
+eshooting templates
+- Troubleshooting of pre=
+built workspaces
© 2024 Coder. All rights reserved - h= +ttp://test.com
+ + +Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: