Skip to content

Commit c4e4fe8

Browse files
fix(agent): start devcontainers through agentcontainers package (#18471)
Fixes coder/internal#706 Context for the implementation here coder/internal#706 (comment) Synchronously starts dev containers defined in terraform with our `DevcontainerCLI` abstraction, instead of piggybacking off of our `agentscripts` package. This gives us more control over logs, instead of being reliant on packages which may or may not exist in the user-provided image.
1 parent f6d9765 commit c4e4fe8

File tree

12 files changed

+304
-480
lines changed

12 files changed

+304
-480
lines changed

agent/agent.go

Lines changed: 77 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ type Options struct {
9191
Execer agentexec.Execer
9292
Devcontainers bool
9393
DevcontainerAPIOptions []agentcontainers.Option // Enable Devcontainers for these to be effective.
94+
Clock quartz.Clock
9495
}
9596

9697
type Client interface {
@@ -144,6 +145,9 @@ func New(options Options) Agent {
144145
if options.PortCacheDuration == 0 {
145146
options.PortCacheDuration = 1 * time.Second
146147
}
148+
if options.Clock == nil {
149+
options.Clock = quartz.NewReal()
150+
}
147151

148152
prometheusRegistry := options.PrometheusRegistry
149153
if prometheusRegistry == nil {
@@ -157,6 +161,7 @@ func New(options Options) Agent {
157161
hardCtx, hardCancel := context.WithCancel(context.Background())
158162
gracefulCtx, gracefulCancel := context.WithCancel(hardCtx)
159163
a := &agent{
164+
clock: options.Clock,
160165
tailnetListenPort: options.TailnetListenPort,
161166
reconnectingPTYTimeout: options.ReconnectingPTYTimeout,
162167
logger: options.Logger,
@@ -204,6 +209,7 @@ func New(options Options) Agent {
204209
}
205210

206211
type agent struct {
212+
clock quartz.Clock
207213
logger slog.Logger
208214
client Client
209215
exchangeToken func(ctx context.Context) (string, error)
@@ -273,7 +279,7 @@ type agent struct {
273279

274280
devcontainers bool
275281
containerAPIOptions []agentcontainers.Option
276-
containerAPI atomic.Pointer[agentcontainers.API] // Set by apiHandler.
282+
containerAPI *agentcontainers.API
277283
}
278284

279285
func (a *agent) TailnetConn() *tailnet.Conn {
@@ -330,6 +336,19 @@ func (a *agent) init() {
330336
// will not report anywhere.
331337
a.scriptRunner.RegisterMetrics(a.prometheusRegistry)
332338

339+
if a.devcontainers {
340+
containerAPIOpts := []agentcontainers.Option{
341+
agentcontainers.WithExecer(a.execer),
342+
agentcontainers.WithCommandEnv(a.sshServer.CommandEnv),
343+
agentcontainers.WithScriptLogger(func(logSourceID uuid.UUID) agentcontainers.ScriptLogger {
344+
return a.logSender.GetScriptLogger(logSourceID)
345+
}),
346+
}
347+
containerAPIOpts = append(containerAPIOpts, a.containerAPIOptions...)
348+
349+
a.containerAPI = agentcontainers.NewAPI(a.logger.Named("containers"), containerAPIOpts...)
350+
}
351+
333352
a.reconnectingPTYServer = reconnectingpty.NewServer(
334353
a.logger.Named("reconnecting-pty"),
335354
a.sshServer,
@@ -1141,15 +1160,18 @@ func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context,
11411160
}
11421161

11431162
var (
1144-
scripts = manifest.Scripts
1145-
scriptRunnerOpts []agentscripts.InitOption
1163+
scripts = manifest.Scripts
1164+
scriptRunnerOpts []agentscripts.InitOption
1165+
devcontainerScripts map[uuid.UUID]codersdk.WorkspaceAgentScript
11461166
)
11471167
if a.devcontainers {
1148-
var dcScripts []codersdk.WorkspaceAgentScript
1149-
scripts, dcScripts = agentcontainers.ExtractAndInitializeDevcontainerScripts(manifest.Devcontainers, scripts)
1150-
// See ExtractAndInitializeDevcontainerScripts for motivation
1151-
// behind running dcScripts as post start scripts.
1152-
scriptRunnerOpts = append(scriptRunnerOpts, agentscripts.WithPostStartScripts(dcScripts...))
1168+
a.containerAPI.Init(
1169+
agentcontainers.WithManifestInfo(manifest.OwnerName, manifest.WorkspaceName),
1170+
agentcontainers.WithDevcontainers(manifest.Devcontainers, scripts),
1171+
agentcontainers.WithSubAgentClient(agentcontainers.NewSubAgentClientFromAPI(a.logger, aAPI)),
1172+
)
1173+
1174+
scripts, devcontainerScripts = agentcontainers.ExtractDevcontainerScripts(manifest.Devcontainers, scripts)
11531175
}
11541176
err = a.scriptRunner.Init(scripts, aAPI.ScriptCompleted, scriptRunnerOpts...)
11551177
if err != nil {
@@ -1168,7 +1190,12 @@ func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context,
11681190
// finished (both start and post start). For instance, an
11691191
// autostarted devcontainer will be included in this time.
11701192
err := a.scriptRunner.Execute(a.gracefulCtx, agentscripts.ExecuteStartScripts)
1171-
err = errors.Join(err, a.scriptRunner.Execute(a.gracefulCtx, agentscripts.ExecutePostStartScripts))
1193+
1194+
for _, dc := range manifest.Devcontainers {
1195+
cErr := a.createDevcontainer(ctx, aAPI, dc, devcontainerScripts[dc.ID])
1196+
err = errors.Join(err, cErr)
1197+
}
1198+
11721199
dur := time.Since(start).Seconds()
11731200
if err != nil {
11741201
a.logger.Warn(ctx, "startup script(s) failed", slog.Error(err))
@@ -1187,14 +1214,6 @@ func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context,
11871214
}
11881215
a.metrics.startupScriptSeconds.WithLabelValues(label).Set(dur)
11891216
a.scriptRunner.StartCron()
1190-
1191-
// If the container API is enabled, trigger an immediate refresh
1192-
// for quick sub agent injection.
1193-
if cAPI := a.containerAPI.Load(); cAPI != nil {
1194-
if err := cAPI.RefreshContainers(ctx); err != nil {
1195-
a.logger.Error(ctx, "failed to refresh containers", slog.Error(err))
1196-
}
1197-
}
11981217
})
11991218
if err != nil {
12001219
return xerrors.Errorf("track conn goroutine: %w", err)
@@ -1204,6 +1223,38 @@ func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context,
12041223
}
12051224
}
12061225

1226+
func (a *agent) createDevcontainer(
1227+
ctx context.Context,
1228+
aAPI proto.DRPCAgentClient26,
1229+
dc codersdk.WorkspaceAgentDevcontainer,
1230+
script codersdk.WorkspaceAgentScript,
1231+
) (err error) {
1232+
var (
1233+
exitCode = int32(0)
1234+
startTime = a.clock.Now()
1235+
status = proto.Timing_OK
1236+
)
1237+
if err = a.containerAPI.CreateDevcontainer(dc.WorkspaceFolder, dc.ConfigPath); err != nil {
1238+
exitCode = 1
1239+
status = proto.Timing_EXIT_FAILURE
1240+
}
1241+
endTime := a.clock.Now()
1242+
1243+
if _, scriptErr := aAPI.ScriptCompleted(ctx, &proto.WorkspaceAgentScriptCompletedRequest{
1244+
Timing: &proto.Timing{
1245+
ScriptId: script.ID[:],
1246+
Start: timestamppb.New(startTime),
1247+
End: timestamppb.New(endTime),
1248+
ExitCode: exitCode,
1249+
Stage: proto.Timing_START,
1250+
Status: status,
1251+
},
1252+
}); scriptErr != nil {
1253+
a.logger.Warn(ctx, "reporting script completed failed", slog.Error(scriptErr))
1254+
}
1255+
return err
1256+
}
1257+
12071258
// createOrUpdateNetwork waits for the manifest to be set using manifestOK, then creates or updates
12081259
// the tailnet using the information in the manifest
12091260
func (a *agent) createOrUpdateNetwork(manifestOK, networkOK *checkpoint) func(context.Context, proto.DRPCAgentClient26) error {
@@ -1227,7 +1278,6 @@ func (a *agent) createOrUpdateNetwork(manifestOK, networkOK *checkpoint) func(co
12271278
// agent API.
12281279
network, err = a.createTailnet(
12291280
a.gracefulCtx,
1230-
aAPI,
12311281
manifest.AgentID,
12321282
manifest.DERPMap,
12331283
manifest.DERPForceWebSockets,
@@ -1262,9 +1312,9 @@ func (a *agent) createOrUpdateNetwork(manifestOK, networkOK *checkpoint) func(co
12621312
network.SetBlockEndpoints(manifest.DisableDirectConnections)
12631313

12641314
// Update the subagent client if the container API is available.
1265-
if cAPI := a.containerAPI.Load(); cAPI != nil {
1315+
if a.containerAPI != nil {
12661316
client := agentcontainers.NewSubAgentClientFromAPI(a.logger, aAPI)
1267-
cAPI.UpdateSubAgentClient(client)
1317+
a.containerAPI.UpdateSubAgentClient(client)
12681318
}
12691319
}
12701320
return nil
@@ -1382,7 +1432,6 @@ func (a *agent) trackGoroutine(fn func()) error {
13821432

13831433
func (a *agent) createTailnet(
13841434
ctx context.Context,
1385-
aAPI proto.DRPCAgentClient26,
13861435
agentID uuid.UUID,
13871436
derpMap *tailcfg.DERPMap,
13881437
derpForceWebSockets, disableDirectConnections bool,
@@ -1515,10 +1564,7 @@ func (a *agent) createTailnet(
15151564
}()
15161565
if err = a.trackGoroutine(func() {
15171566
defer apiListener.Close()
1518-
apiHandler, closeAPIHAndler := a.apiHandler(aAPI)
1519-
defer func() {
1520-
_ = closeAPIHAndler()
1521-
}()
1567+
apiHandler := a.apiHandler()
15221568
server := &http.Server{
15231569
BaseContext: func(net.Listener) context.Context { return ctx },
15241570
Handler: apiHandler,
@@ -1532,7 +1578,6 @@ func (a *agent) createTailnet(
15321578
case <-ctx.Done():
15331579
case <-a.hardCtx.Done():
15341580
}
1535-
_ = closeAPIHAndler()
15361581
_ = server.Close()
15371582
}()
15381583

@@ -1871,6 +1916,12 @@ func (a *agent) Close() error {
18711916
a.logger.Error(a.hardCtx, "script runner close", slog.Error(err))
18721917
}
18731918

1919+
if a.containerAPI != nil {
1920+
if err := a.containerAPI.Close(); err != nil {
1921+
a.logger.Error(a.hardCtx, "container API close", slog.Error(err))
1922+
}
1923+
}
1924+
18741925
// Wait for the graceful shutdown to complete, but don't wait forever so
18751926
// that we don't break user expectations.
18761927
go func() {

agent/agentcontainers/api.go

Lines changed: 61 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,10 @@ func WithDevcontainers(devcontainers []codersdk.WorkspaceAgentDevcontainer, scri
207207
api.devcontainerNames = make(map[string]bool, len(devcontainers))
208208
api.devcontainerLogSourceIDs = make(map[string]uuid.UUID)
209209
for _, dc := range devcontainers {
210+
if dc.Status == "" {
211+
dc.Status = codersdk.WorkspaceAgentDevcontainerStatusStarting
212+
}
213+
210214
api.knownDevcontainers[dc.WorkspaceFolder] = dc
211215
api.devcontainerNames[dc.Name] = true
212216
for _, script := range scripts {
@@ -265,8 +269,6 @@ func NewAPI(logger slog.Logger, options ...Option) *API {
265269
api := &API{
266270
ctx: ctx,
267271
cancel: cancel,
268-
watcherDone: make(chan struct{}),
269-
updaterDone: make(chan struct{}),
270272
initialUpdateDone: make(chan struct{}),
271273
updateTrigger: make(chan chan error),
272274
updateInterval: defaultUpdateInterval,
@@ -315,10 +317,28 @@ func NewAPI(logger slog.Logger, options ...Option) *API {
315317
api.subAgentClient.Store(&c)
316318
}
317319

320+
return api
321+
}
322+
323+
// Init applies a final set of options to the API and then
324+
// begins the watcherLoop and updaterLoop. This function
325+
// must only be called once.
326+
func (api *API) Init(opts ...Option) {
327+
api.mu.Lock()
328+
defer api.mu.Unlock()
329+
if api.closed {
330+
return
331+
}
332+
333+
for _, opt := range opts {
334+
opt(api)
335+
}
336+
337+
api.watcherDone = make(chan struct{})
338+
api.updaterDone = make(chan struct{})
339+
318340
go api.watcherLoop()
319341
go api.updaterLoop()
320-
321-
return api
322342
}
323343

324344
func (api *API) watcherLoop() {
@@ -909,8 +929,9 @@ func (api *API) handleDevcontainerRecreate(w http.ResponseWriter, r *http.Reques
909929
dc.Status = codersdk.WorkspaceAgentDevcontainerStatusStarting
910930
dc.Container = nil
911931
api.knownDevcontainers[dc.WorkspaceFolder] = dc
912-
api.asyncWg.Add(1)
913-
go api.recreateDevcontainer(dc, configPath)
932+
go func() {
933+
_ = api.CreateDevcontainer(dc.WorkspaceFolder, configPath, WithRemoveExistingContainer())
934+
}()
914935

915936
api.mu.Unlock()
916937

@@ -920,15 +941,29 @@ func (api *API) handleDevcontainerRecreate(w http.ResponseWriter, r *http.Reques
920941
})
921942
}
922943

923-
// recreateDevcontainer should run in its own goroutine and is responsible for
944+
// createDevcontainer should run in its own goroutine and is responsible for
924945
// recreating a devcontainer based on the provided devcontainer configuration.
925946
// It updates the devcontainer status and logs the process. The configPath is
926947
// passed as a parameter for the odd chance that the container being recreated
927948
// has a different config file than the one stored in the devcontainer state.
928949
// The devcontainer state must be set to starting and the asyncWg must be
929950
// incremented before calling this function.
930-
func (api *API) recreateDevcontainer(dc codersdk.WorkspaceAgentDevcontainer, configPath string) {
951+
func (api *API) CreateDevcontainer(workspaceFolder, configPath string, opts ...DevcontainerCLIUpOptions) error {
952+
api.mu.Lock()
953+
if api.closed {
954+
api.mu.Unlock()
955+
return nil
956+
}
957+
958+
dc, found := api.knownDevcontainers[workspaceFolder]
959+
if !found {
960+
api.mu.Unlock()
961+
return xerrors.Errorf("devcontainer not found")
962+
}
963+
964+
api.asyncWg.Add(1)
931965
defer api.asyncWg.Done()
966+
api.mu.Unlock()
932967

933968
var (
934969
err error
@@ -969,12 +1004,15 @@ func (api *API) recreateDevcontainer(dc codersdk.WorkspaceAgentDevcontainer, con
9691004

9701005
logger.Debug(ctx, "starting devcontainer recreation")
9711006

972-
_, err = api.dccli.Up(ctx, dc.WorkspaceFolder, configPath, WithUpOutput(infoW, errW), WithRemoveExistingContainer())
1007+
upOptions := []DevcontainerCLIUpOptions{WithUpOutput(infoW, errW)}
1008+
upOptions = append(upOptions, opts...)
1009+
1010+
_, err = api.dccli.Up(ctx, dc.WorkspaceFolder, configPath, upOptions...)
9731011
if err != nil {
9741012
// No need to log if the API is closing (context canceled), as this
9751013
// is expected behavior when the API is shutting down.
9761014
if !errors.Is(err, context.Canceled) {
977-
logger.Error(ctx, "devcontainer recreation failed", slog.Error(err))
1015+
logger.Error(ctx, "devcontainer creation failed", slog.Error(err))
9781016
}
9791017

9801018
api.mu.Lock()
@@ -983,10 +1021,11 @@ func (api *API) recreateDevcontainer(dc codersdk.WorkspaceAgentDevcontainer, con
9831021
api.knownDevcontainers[dc.WorkspaceFolder] = dc
9841022
api.recreateErrorTimes[dc.WorkspaceFolder] = api.clock.Now("agentcontainers", "recreate", "errorTimes")
9851023
api.mu.Unlock()
986-
return
1024+
1025+
return xerrors.Errorf("start devcontainer: %w", err)
9871026
}
9881027

989-
logger.Info(ctx, "devcontainer recreated successfully")
1028+
logger.Info(ctx, "devcontainer created successfully")
9901029

9911030
api.mu.Lock()
9921031
dc = api.knownDevcontainers[dc.WorkspaceFolder]
@@ -1009,8 +1048,11 @@ func (api *API) recreateDevcontainer(dc codersdk.WorkspaceAgentDevcontainer, con
10091048
// Ensure an immediate refresh to accurately reflect the
10101049
// devcontainer state after recreation.
10111050
if err := api.RefreshContainers(ctx); err != nil {
1012-
logger.Error(ctx, "failed to trigger immediate refresh after devcontainer recreation", slog.Error(err))
1051+
logger.Error(ctx, "failed to trigger immediate refresh after devcontainer creation", slog.Error(err))
1052+
return xerrors.Errorf("refresh containers: %w", err)
10131053
}
1054+
1055+
return nil
10141056
}
10151057

10161058
// markDevcontainerDirty finds the devcontainer with the given config file path
@@ -1609,8 +1651,12 @@ func (api *API) Close() error {
16091651
err := api.watcher.Close()
16101652

16111653
// Wait for loops to finish.
1612-
<-api.watcherDone
1613-
<-api.updaterDone
1654+
if api.watcherDone != nil {
1655+
<-api.watcherDone
1656+
}
1657+
if api.updaterDone != nil {
1658+
<-api.updaterDone
1659+
}
16141660

16151661
// Wait for all async tasks to complete.
16161662
api.asyncWg.Wait()

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy