Skip to content

Commit

Permalink
adding daisy chain transfers and configurable retries for terminating…
Browse files Browse the repository at this point in the history
… messages (#4)
  • Loading branch information
rayjanoka authored Feb 27, 2023
1 parent 2f3b9f4 commit 04c5e8c
Show file tree
Hide file tree
Showing 14 changed files with 127 additions and 47 deletions.
6 changes: 6 additions & 0 deletions CONFIGURE.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,24 +29,30 @@ apiVersion: v1

logLevel: info
shutdownWait: 30s
skipEventBucketValidation: true
skipLifecycleExpired: true
maxRetries: 5
msgTimeout: 15m
excludePaths:
copyObject:
- ^\w{3}/(\w+)/\1\.tar\.zst$
removeObject:
- ^\w{3}/(\w+)/\1\.tar\.zst$
waitForMatchingETag: false
```
| Setting | Description |
|-----------------------------|-------------------------------------------------------------------------------------------------------------------------|
| `apiVersion` | config file api version (required) |
| `logLevel` | set the log level (default: info) |
| `shutdownWait` | time to wait for running transfers to complete before exiting |
| `skipEventBucketValidation` | don't check if the event's bucket name and source bucket name match |
| `skipLifecycleExpired` | don't propagate deletes initiated by the minio lifecycle expiration |
| `maxRetries` | the max retries for to retry when either the copy's source object or the object to be deleted are missing |
| `msgTimeout` | the max duration for a transfer includes the jetstream stream message ack timeout and internal transfer context timeout |
| `excludePaths.copyObject` | list of paths as regex patterns to exclude from copy operations (pcre support) |
| `excludePaths.removeObject` | list of paths as regex patterns to exclude from delete operations (pcre support) |
| `waitForMatchingETag` | when copying files wait for the matching etag |


### JetStream Options
Expand Down
39 changes: 21 additions & 18 deletions archie/archiver.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,24 @@ import (
)

type Archiver struct {
DestBucket string
DestClient client.Client
DestName string
DestPartSize uint64
DestThreads uint
FetchDone chan string
HealthCheckDisabled bool
IsOffline bool
MsgTimeout string
SkipLifecycleExpired bool
SrcBucket string
SrcClient client.Client
SrcName string
WaitGroup *sync.WaitGroup
ExcludePaths struct {
DestBucket string
DestClient client.Client
DestName string
DestPartSize uint64
DestThreads uint
FetchDone chan string
HealthCheckDisabled bool
IsOffline bool
MaxRetries uint64
MsgTimeout string
SkipEventBucketValidation bool
SkipLifecycleExpired bool
SrcBucket string
SrcClient client.Client
SrcName string
WaitForMatchingETag bool
WaitGroup *sync.WaitGroup
ExcludePaths struct {
CopyObject []*pcre.Regexp
RemoveObject []*pcre.Regexp
}
Expand All @@ -34,7 +37,7 @@ const (
Nak
SkipAck
Term
FiveNakThenTerm
NakThenTerm
None
)

Expand All @@ -46,8 +49,8 @@ func (s AckType) String() string {
return "nak"
case Term:
return "term"
case FiveNakThenTerm:
return "5nak_then_term"
case NakThenTerm:
return "nak_then_term"
case None:
return "none"
}
Expand Down
20 changes: 17 additions & 3 deletions archie/copyObject.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@ package archie

import (
"archie/client"
"archie/event"
"context"
"fmt"
"github.com/nats-io/nats.go"
"github.com/rs/zerolog"
"time"
)

func (a *Archiver) copyObject(ctx context.Context, mLog zerolog.Logger, eventObjKey string, msg *nats.Msg) (error, string, AckType) {
func (a *Archiver) copyObject(ctx context.Context, mLog zerolog.Logger, eventObjKey string, msg *nats.Msg, record event.Record) (error, string, AckType) {
metadata, _ := msg.Metadata()

for _, excludedPathRegexp := range a.ExcludePaths.CopyObject {
Expand Down Expand Up @@ -38,15 +40,23 @@ func (a *Archiver) copyObject(ctx context.Context, mLog zerolog.Logger, eventObj
if err != nil {
if err.Error() == "The specified key does not exist." {
// minio error
return err, "Failed to Stat the source object", FiveNakThenTerm
return err, "Failed to Stat the source object", NakThenTerm
} else if err.Error() == "storage: object doesn't exist" {
// gcs error
return err, "Failed to Stat the source object", FiveNakThenTerm
return err, "Failed to Stat the source object", NakThenTerm
} else {
return err, "Failed to Stat the source object", Nak
}
}

if a.WaitForMatchingETag {
if srcStat.ETag != record.S3.Object.ETag {
return fmt.Errorf(
"mismatch of ETags from the event (%s) and source (%s)", record.S3.Object.ETag, srcStat.ETag,
), "ETag mismatch", Nak
}
}

mLog.Info().
Int64("size", srcStat.Size).
Str("hSize", size(srcStat.Size)).
Expand All @@ -60,6 +70,10 @@ func (a *Archiver) copyObject(ctx context.Context, mLog zerolog.Logger, eventObj
PartSize: destPartSizeBytes,
}

if record.S3.Object.ETag != "" {
putOpts.ETag = record.S3.Object.ETag
}

start = time.Now()
_, err = a.DestClient.PutObject(ctx, a.DestBucket, eventObjKey, srcObject.GetReader(), srcStat.Size, putOpts)
if err != nil {
Expand Down
6 changes: 3 additions & 3 deletions archie/message.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ func (a *Archiver) message(ctx context.Context, msg *nats.Msg) {
// message type router
switch eventType {
case "s3:ObjectCreated":
err, execContext, ack = a.copyObject(ctx, mLog, eventObjKey, msg)
err, execContext, ack = a.copyObject(ctx, mLog, eventObjKey, msg, eventRecord)
if err != nil {
s3ErrMsg, s3ErrCode = logS3Error(err, execContext, &mLog)
}
Expand Down Expand Up @@ -133,8 +133,8 @@ func (a *Archiver) message(ctx context.Context, msg *nats.Msg) {
case Nak:
sendNakSignal(msg, &mLog)
a.cleanupAndCountMessagesProcessedMetric("failed", s3ErrMsg, s3ErrCode, event.EventName, eventType)
case FiveNakThenTerm:
maxDelivered := uint64(5)
case NakThenTerm:
maxDelivered := a.MaxRetries - 1
if metadata.NumDelivered > maxDelivered {
mLog.Error().Uint64("numDelivered", metadata.NumDelivered).Msg("Reached max delivered")
termErr := sendTermSignal(msg, &mLog)
Expand Down
6 changes: 4 additions & 2 deletions archie/messageValidation.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ func (a *Archiver) validateEventName(event event.Minio) error {

// validate src bucket name in config matches event bucket name
func (a *Archiver) validateEventBucket(eventBucket string) error {
if a.SrcBucket != eventBucket {
return fmt.Errorf("event bucket (%s) doesn't match configured source bucket (%s)", eventBucket, a.SrcBucket)
if !a.SkipEventBucketValidation {
if a.SrcBucket != eventBucket {
return fmt.Errorf("event bucket (%s) doesn't match configured source bucket (%s)", eventBucket, a.SrcBucket)
}
}
return nil
}
4 changes: 2 additions & 2 deletions archie/removeObject.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,10 @@ func (a *Archiver) removeObject(ctx context.Context, mLog zerolog.Logger, eventO
if err != nil {
if err.Error() == "The specified key does not exist." {
// minio error
return err, "Failed to RemoveObject from destination bucket", FiveNakThenTerm
return err, "Failed to RemoveObject from destination bucket", NakThenTerm
} else if err.Error() == "storage: object doesn't exist" {
// gcs error
return err, "Failed to RemoveObject from destination bucket", FiveNakThenTerm
return err, "Failed to RemoveObject from destination bucket", NakThenTerm
} else {
return err, "Failed to RemoveObject from destination bucket", Nak
}
Expand Down
2 changes: 2 additions & 0 deletions client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,14 @@ type Object interface {

type PutOptions struct {
ContentType string
ETag string
NumThreads uint
PartSize uint64
}

type ObjectInfo struct {
ContentType string
ETag string
Size int64
}

Expand Down
2 changes: 1 addition & 1 deletion client/gcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ func (o *GCSObject) Stat(ctx context.Context) (*ObjectInfo, error) {
if err != nil {
return nil, err
}
return &ObjectInfo{Size: obj.Size, ContentType: obj.ContentType}, nil
return &ObjectInfo{Size: obj.Size, ContentType: obj.ContentType, ETag: obj.Etag}, nil
}

func (o *GCSObject) GetReader() io.Reader {
Expand Down
7 changes: 6 additions & 1 deletion client/minio.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@ func (m *Minio) PutObject(ctx context.Context, bucket string, key string, reader
PartSize: opts.PartSize,
}

if opts.ETag != "" {
putOpts.UserMetadata = map[string]string{
"eTag": opts.ETag,
}
}
_, err := m.client.PutObject(ctx, bucket, key, reader, objectSize, putOpts)
if err != nil {
return UploadInfo{}, err
Expand Down Expand Up @@ -107,7 +112,7 @@ func (o *MinioObject) Stat(ctx context.Context) (*ObjectInfo, error) {
if err != nil {
return nil, err
}
return &ObjectInfo{Size: srcStat.Size, ContentType: srcStat.ContentType}, nil
return &ObjectInfo{Size: srcStat.Size, ContentType: srcStat.ContentType, ETag: srcStat.ETag}, nil
}

func (o *MinioObject) GetReader() io.Reader {
Expand Down
11 changes: 7 additions & 4 deletions config.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@ package main
type Config struct {
ApiVersion string `fig:"apiVersion" validate:"required"`

LogLevel string `fig:"logLevel" default:"info"`
MsgTimeout string `fig:"msgTimeout" default:"30m"`
ShutdownWait string `fig:"shutdownWait" default:"0s"`
SkipLifecycleExpired bool `fig:"skipLifecycleExpired"`
LogLevel string `fig:"logLevel" default:"info"`
MaxRetries uint64 `fig:"maxRetries" default:"5"`
MsgTimeout string `fig:"msgTimeout" default:"30m"`
ShutdownWait string `fig:"shutdownWait" default:"0s"`
SkipEventBucketValidation bool `fig:"skipEventBucketValidation"`
SkipLifecycleExpired bool `fig:"skipLifecycleExpired"`
WaitForMatchingETag bool `fig:"waitForMatchingETag"`

Src struct {
AccessKey string `fig:"accessKey"`
Expand Down
4 changes: 2 additions & 2 deletions helm/archie/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.3.3
version: 0.4.0

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "0.3.0"
appVersion: "0.4.0"
6 changes: 6 additions & 0 deletions helm/archie/templates/secret.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ stringData:
logLevel: {{ .Values.archie.logLevel }}
shutdownWait: {{ $shutdownWaitDuration }}
skipLifecycleExpired: {{ .Values.archie.skipLifecycleExpired }}
skipEventBucketValidation: {{ .Values.archie.skipEventBucketValidation }}
waitForMatchingETag: {{ .Values.archie.waitForMatchingETag }}
{{- if .Values.archie.maxRetries }}
maxRetries: {{ .Values.archie.maxRetries }}
{{- end }}
{{- if .Values.archie.msgTimeout }}
msgTimeout: {{ .Values.archie.msgTimeout }}
Expand Down
3 changes: 3 additions & 0 deletions helm/archie/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@ image:

archie:
logLevel: info # or "debug" or "trace"
maxRetries: 5 # for all terminating actions
msgTimeout: 30m
shutdownWait: 30 # seconds
skipEventBucketValidation: false
skipLifecycleExpired: false
# pcre regex matching
#excludePaths:
Expand Down Expand Up @@ -48,6 +50,7 @@ archie:
natsMessagesDeliveredLowThreshold: 30
natsMessagesPendingThreshold: 20000
natsMessagesRedeliveredPercentageThreshold: 2
waitForMatchingETag: false

jetstream:
url: nats://localhost:4222
Expand Down
58 changes: 47 additions & 11 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"archie/archie"
"archie/client"
"context"
"encoding/json"
"flag"
"github.com/kkyr/fig"
"github.com/rs/zerolog"
Expand Down Expand Up @@ -88,19 +89,54 @@ func main() {
msgCancel()
}()

// log all config settings
redactedCfg := cfg

if redactedCfg.Src.AccessKey != "" {
redactedCfg.Src.AccessKey = "REDACTED"
}
if redactedCfg.Src.SecretKey != "" {
redactedCfg.Src.SecretKey = "REDACTED"
}
if redactedCfg.Src.GoogleCredentials != "" {
redactedCfg.Src.GoogleCredentials = "REDACTED"
}
if redactedCfg.Dest.AccessKey != "" {
redactedCfg.Dest.AccessKey = "REDACTED"
}
if redactedCfg.Dest.SecretKey != "" {
redactedCfg.Dest.SecretKey = "REDACTED"
}
if redactedCfg.Dest.GoogleCredentials != "" {
redactedCfg.Dest.GoogleCredentials = "REDACTED"
}
if redactedCfg.Jetstream.Password != "" {
redactedCfg.Jetstream.Password = "REDACTED"
}

redactedCfgJSON, err := json.Marshal(redactedCfg)
if err != nil {
log.Fatal().Err(err).Msg("Failed to marshal config to json")
}

log.Info().RawJSON("cfg", redactedCfgJSON).Msg("Startup configuration")

// archiver
a := archie.Archiver{
DestBucket: cfg.Dest.Bucket,
DestName: cfg.Dest.Name,
DestPartSize: cfg.Dest.PartSize,
DestThreads: cfg.Dest.Threads,
FetchDone: make(chan string, 1),
HealthCheckDisabled: cfg.HealthCheck.Disabled,
MsgTimeout: cfg.MsgTimeout,
SkipLifecycleExpired: cfg.SkipLifecycleExpired,
SrcBucket: cfg.Src.Bucket,
SrcName: cfg.Src.Name,
WaitGroup: &sync.WaitGroup{},
DestBucket: cfg.Dest.Bucket,
DestName: cfg.Dest.Name,
DestPartSize: cfg.Dest.PartSize,
DestThreads: cfg.Dest.Threads,
FetchDone: make(chan string, 1),
HealthCheckDisabled: cfg.HealthCheck.Disabled,
MaxRetries: cfg.MaxRetries,
MsgTimeout: cfg.MsgTimeout,
SkipEventBucketValidation: cfg.SkipEventBucketValidation,
SkipLifecycleExpired: cfg.SkipLifecycleExpired,
SrcBucket: cfg.Src.Bucket,
SrcName: cfg.Src.Name,
WaitForMatchingETag: cfg.WaitForMatchingETag,
WaitGroup: &sync.WaitGroup{},
ExcludePaths: struct {
CopyObject []*pcre.Regexp
RemoveObject []*pcre.Regexp
Expand Down

0 comments on commit 04c5e8c

Please sign in to comment.
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy