Skip to content

Commit 1d0fae8

Browse files
authored
fix(coderd): prevent lost messages in watchWorkspaceAgentMetadata (coder#7934)
* fix(codersdk): wait for subscription in WatchWorkspaceAgentMetadata * fix(coderd): subscribe before sending initial metadata event * test(coderd): add retries to TestWorkspaceAgent_Metadata to avoid flake
1 parent 518300a commit 1d0fae8

File tree

3 files changed

+51
-17
lines changed

3 files changed

+51
-17
lines changed

coderd/workspaceagents.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1434,17 +1434,15 @@ func (api *API) watchWorkspaceAgentMetadata(rw http.ResponseWriter, r *http.Requ
14341434
})
14351435
}
14361436

1437-
// Send initial metadata.
1438-
sendMetadata(true)
1439-
14401437
// We debounce metadata updates to avoid overloading the frontend when
14411438
// an agent is sending a lot of updates.
14421439
pubsubDebounce := debounce.New(time.Second)
14431440
if flag.Lookup("test.v") != nil {
14441441
pubsubDebounce = debounce.New(time.Millisecond * 100)
14451442
}
14461443

1447-
// Send metadata on updates.
1444+
// Send metadata on updates, we must ensure subscription before sending
1445+
// initial metadata to guarantee that events in-between are not missed.
14481446
cancelSub, err := api.Pubsub.Subscribe(watchWorkspaceAgentMetadataChannel(workspaceAgent.ID), func(_ context.Context, _ []byte) {
14491447
pubsubDebounce(func() {
14501448
sendMetadata(true)
@@ -1456,12 +1454,14 @@ func (api *API) watchWorkspaceAgentMetadata(rw http.ResponseWriter, r *http.Requ
14561454
}
14571455
defer cancelSub()
14581456

1457+
// Send initial metadata.
1458+
sendMetadata(true)
1459+
14591460
for {
14601461
select {
14611462
case <-senderClosed:
14621463
return
14631464
case <-refreshTicker.C:
1464-
break
14651465
}
14661466

14671467
// Avoid spamming the DB with reads we know there are no updates. We want

coderd/workspaceagents_test.go

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1268,11 +1268,6 @@ func TestWorkspaceAgent_Metadata(t *testing.T) {
12681268

12691269
var update []codersdk.WorkspaceAgentMetadata
12701270

1271-
check := func(want codersdk.WorkspaceAgentMetadataResult, got codersdk.WorkspaceAgentMetadata) {
1272-
require.Equal(t, want.Value, got.Result.Value)
1273-
require.Equal(t, want.Error, got.Result.Error)
1274-
}
1275-
12761271
wantMetadata1 := codersdk.WorkspaceAgentMetadataResult{
12771272
CollectedAt: time.Now(),
12781273
Value: "bar",
@@ -1285,32 +1280,53 @@ func TestWorkspaceAgent_Metadata(t *testing.T) {
12851280

12861281
recvUpdate := func() []codersdk.WorkspaceAgentMetadata {
12871282
select {
1283+
case <-ctx.Done():
1284+
t.Fatalf("context done: %v", ctx.Err())
12881285
case err := <-errors:
12891286
t.Fatalf("error watching metadata: %v", err)
1290-
return nil
12911287
case update := <-updates:
12921288
return update
12931289
}
1290+
return nil
1291+
}
1292+
1293+
check := func(want codersdk.WorkspaceAgentMetadataResult, got codersdk.WorkspaceAgentMetadata, retry bool) {
1294+
// We can't trust the order of the updates due to timers and debounces,
1295+
// so let's check a few times more.
1296+
for i := 0; retry && i < 2 && (want.Value != got.Result.Value || want.Error != got.Result.Error); i++ {
1297+
update = recvUpdate()
1298+
for _, m := range update {
1299+
if m.Description.Key == got.Description.Key {
1300+
got = m
1301+
break
1302+
}
1303+
}
1304+
}
1305+
ok1 := assert.Equal(t, want.Value, got.Result.Value)
1306+
ok2 := assert.Equal(t, want.Error, got.Result.Error)
1307+
if !ok1 || !ok2 {
1308+
require.FailNow(t, "check failed")
1309+
}
12941310
}
12951311

12961312
update = recvUpdate()
12971313
require.Len(t, update, 3)
1298-
check(wantMetadata1, update[0])
1314+
check(wantMetadata1, update[0], false)
12991315
// The second metadata result is not yet posted.
13001316
require.Zero(t, update[1].Result.CollectedAt)
13011317

13021318
wantMetadata2 := wantMetadata1
13031319
post("foo2", wantMetadata2)
13041320
update = recvUpdate()
13051321
require.Len(t, update, 3)
1306-
check(wantMetadata1, update[0])
1307-
check(wantMetadata2, update[1])
1322+
check(wantMetadata1, update[0], true)
1323+
check(wantMetadata2, update[1], true)
13081324

13091325
wantMetadata1.Error = "error"
13101326
post("foo1", wantMetadata1)
13111327
update = recvUpdate()
13121328
require.Len(t, update, 3)
1313-
check(wantMetadata1, update[0])
1329+
check(wantMetadata1, update[0], true)
13141330

13151331
const maxValueLen = 32 << 10
13161332
tooLongValueMetadata := wantMetadata1
@@ -1319,6 +1335,9 @@ func TestWorkspaceAgent_Metadata(t *testing.T) {
13191335
tooLongValueMetadata.CollectedAt = time.Now()
13201336
post("foo3", tooLongValueMetadata)
13211337
got := recvUpdate()[2]
1338+
for i := 0; i < 2 && len(got.Result.Value) != maxValueLen; i++ {
1339+
got = recvUpdate()[2]
1340+
}
13221341
require.Len(t, got.Result.Value, maxValueLen)
13231342
require.NotEmpty(t, got.Result.Error)
13241343

codersdk/workspaceagents.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ func (c *Client) WatchWorkspaceAgentMetadata(ctx context.Context, id uuid.UUID)
304304

305305
metadataChan := make(chan []WorkspaceAgentMetadata, 256)
306306

307+
ready := make(chan struct{})
307308
watch := func() error {
308309
res, err := c.Request(ctx, http.MethodGet, fmt.Sprintf("/api/v2/workspaceagents/%s/watch-metadata", id), nil)
309310
if err != nil {
@@ -316,19 +317,24 @@ func (c *Client) WatchWorkspaceAgentMetadata(ctx context.Context, id uuid.UUID)
316317
nextEvent := ServerSentEventReader(ctx, res.Body)
317318
defer res.Body.Close()
318319

320+
firstEvent := true
319321
for {
320322
select {
321323
case <-ctx.Done():
322324
return ctx.Err()
323325
default:
324-
break
325326
}
326327

327328
sse, err := nextEvent()
328329
if err != nil {
329330
return err
330331
}
331332

333+
if firstEvent {
334+
close(ready) // Only close ready after the first event is received.
335+
firstEvent = false
336+
}
337+
332338
b, ok := sse.Data.([]byte)
333339
if !ok {
334340
return xerrors.Errorf("unexpected data type: %T", sse.Data)
@@ -358,9 +364,18 @@ func (c *Client) WatchWorkspaceAgentMetadata(ctx context.Context, id uuid.UUID)
358364
errorChan := make(chan error, 1)
359365
go func() {
360366
defer close(errorChan)
361-
errorChan <- watch()
367+
err := watch()
368+
select {
369+
case <-ready:
370+
default:
371+
close(ready) // Error before first event.
372+
}
373+
errorChan <- err
362374
}()
363375

376+
// Wait until first event is received and the subscription is registered.
377+
<-ready
378+
364379
return metadataChan, errorChan
365380
}
366381

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy