From 6ee816b9edf4f8c33eff707d4cf4f66c67246c56 Mon Sep 17 00:00:00 2001 From: Andrei Kurilov <18027129+akurilov@users.noreply.github.com> Date: Mon, 14 Oct 2024 12:20:43 +0300 Subject: [PATCH] feat: conv tuning --- api/smtp/session.go | 13 +++++++++--- config/config.go | 10 +++------ helm/int-email/templates/deployment.yaml | 8 ++------ helm/int-email/values.yaml | 1 + main.go | 8 +++----- service/converter/service.go | 26 ++++++++++-------------- service/converter/service_test.go | 21 ++++++++++++++++++- service/service_test.go | 1 + 8 files changed, 51 insertions(+), 37 deletions(-) diff --git a/api/smtp/session.go b/api/smtp/session.go index 6bbf89f..25a9c7c 100644 --- a/api/smtp/session.go +++ b/api/smtp/session.go @@ -20,12 +20,19 @@ type session struct { } func newSession(rcptsPublish, rcptsInternal map[string]bool, dataLimit int64, svc service.Service) smtp.Session { - return &session{ - rcptsPublish: rcptsPublish, - rcptsInternal: rcptsInternal, + s := &session{ + rcptsPublish: make(map[string]bool), + rcptsInternal: make(map[string]bool), dataLimit: dataLimit, svc: svc, } + for r := range rcptsPublish { + s.rcptsPublish[strings.ToLower(r)] = true + } + for r := range rcptsInternal { + s.rcptsInternal[strings.ToLower(r)] = true + } + return s } func (s *session) Reset() { diff --git a/config/config.go b/config/config.go index c9486aa..5f191d4 100644 --- a/config/config.go +++ b/config/config.go @@ -18,7 +18,8 @@ type ApiConfig struct { Host string `envconfig:"API_SMTP_HOST" required:"true"` Port uint16 `envconfig:"API_SMTP_PORT" default:"465" required:"true"` Data struct { - Limit uint32 `envconfig:"API_SMTP_DATA_LIMIT" default:"1048576" required:"true"` + Limit uint32 `envconfig:"API_SMTP_DATA_LIMIT" default:"1048576" required:"true"` + TruncUrlQueries bool `envconfig:"API_SMTP_DATA_TRUNC_URL_QUERIES" default:"false"` } Recipients struct { Publish []string `envconfig:"API_SMTP_RECIPIENTS_PUBLISH" required:"true"` @@ -38,12 +39,7 @@ type ApiConfig struct { } Group string `envconfig:"API_GROUP" default:"default" required:"true"` EventType EventTypeConfig - Interests struct { - Uri string `envconfig:"API_INTERESTS_URI" required:"true" default:"subscriptions-proxy:50051"` - DetailsUriPrefix string `envconfig:"API_INTERESTS_DETAILS_URI_PREFIX" required:"true" default:"https://awakari.com/sub-details.html?id="` - } - Reader ReaderConfig - Writer struct { + Writer struct { Backoff time.Duration `envconfig:"API_WRITER_BACKOFF" default:"10s" required:"true"` BatchSize uint32 `envconfig:"API_WRITER_BATCH_SIZE" default:"16" required:"true"` Cache WriterCacheConfig diff --git a/helm/int-email/templates/deployment.yaml b/helm/int-email/templates/deployment.yaml index d07801c..8ac897c 100644 --- a/helm/int-email/templates/deployment.yaml +++ b/helm/int-email/templates/deployment.yaml @@ -65,10 +65,6 @@ spec: value: "{{ .Values.tls.client.auth.type }}" - name: API_GROUP value: "{{ .Values.api.group }}" - - name: API_INTERESTS_URI - value: "{{ .Values.api.interests.uri }}" - - name: API_INTERESTS_DETAILS_URI_PREFIX - value: "{{ .Values.api.interests.detailsUriPrefix }}" - name: API_WRITER_BACKOFF value: "{{ .Values.api.writer.backoff }}" - name: API_WRITER_BATCH_SIZE @@ -88,12 +84,12 @@ spec: key: "{{ .Values.api.writer.internal.name }}" - name: API_WRITER_INTERNAL_RATE_LIMIT_PER_MINUTE value: "{{ .Values.api.writer.internal.rateLimit.minute }}" - - name: API_READER_URI_EVT_BASE - value: "{{ .Values.api.reader.uriEvtBase }}" - name: LOG_LEVEL value: "{{ .Values.log.level }}" - name: API_EVENT_TYPE_SELF value: "{{ .Values.api.event.typ.self }}" + - name: API_SMTP_DATA_TRUNC_URL_QUERIES + value: "{{ .Values.api.smtp.data.truncUrlQueries }}" volumeMounts: - name: tls-certificates mountPath: /etc/smtp/tls # Mount the TLS secret here diff --git a/helm/int-email/values.yaml b/helm/int-email/values.yaml index c77b1d7..a367f89 100644 --- a/helm/int-email/values.yaml +++ b/helm/int-email/values.yaml @@ -89,6 +89,7 @@ api: smtp: data: limit: "1048576" + truncUrlQueries: true rcpt: names: "publish" limit: "100" diff --git a/main.go b/main.go index c342836..fa8c7c7 100644 --- a/main.go +++ b/main.go @@ -13,7 +13,6 @@ import ( "github.com/emersion/go-smtp" "log/slog" "os" - "strings" ) func main() { @@ -36,7 +35,6 @@ func main() { clientAwk, err = api. NewClientBuilder(). WriterUri(cfg.Api.Writer.Uri). - SubscriptionsUri(cfg.Api.Interests.Uri). Build() if err != nil { panic(fmt.Sprintf("failed to initialize the Awakari API client: %s", err)) @@ -49,16 +47,16 @@ func main() { rcptsPublish := map[string]bool{} for _, name := range cfg.Api.Smtp.Recipients.Publish { - rcptsPublish[strings.ToLower(name)] = true + rcptsPublish[name] = true } - svcConv := converter.NewConverter(cfg.Api.EventType.Self, util.HtmlPolicy(), cfg.Api.Writer.Internal, rcptsPublish) + svcConv := converter.NewConverter(cfg.Api.EventType.Self, util.HtmlPolicy(), cfg.Api.Writer.Internal, rcptsPublish, cfg.Api.Smtp.Data.TruncUrlQueries) svcConv = converter.NewLogging(svcConv, log) svc := service.NewService(svcConv, svcWriter, cfg.Api.Group) svc = service.NewLogging(svc, log) rcptsInternal := map[string]bool{} for _, name := range cfg.Api.Smtp.Recipients.Internal { - rcptsInternal[strings.ToLower(name)] = true + rcptsInternal[name] = true } b := apiSmtp.NewBackend(rcptsPublish, rcptsInternal, int64(cfg.Api.Smtp.Data.Limit), svc) b = apiSmtp.NewBackendLogging(b, log) diff --git a/service/converter/service.go b/service/converter/service.go index 1a19ab7..9de7701 100644 --- a/service/converter/service.go +++ b/service/converter/service.go @@ -25,6 +25,7 @@ type svc struct { htmlPolicy *bluemonday.Policy writerInternalCfg config.WriterInternalConfig rcptsPublish map[string]bool + truncUrlQuery bool } const ceKeyLenMax = 20 @@ -66,14 +67,15 @@ var headerWhiteList = map[string]bool{ "xreportabuse": true, "xvirusscanned": true, } -var reUrlTail = regexp.MustCompile(`\?[a-zA-Z0-9_\-]+=[a-zA-Z0-9_\-~.%&/#+]*`) +var reUrlQuery = regexp.MustCompile(`\?[a-zA-Z0-9_\-]+=[a-zA-Z0-9_\-~.%&/#+]*`) -func NewConverter(evtType string, htmlPolicy *bluemonday.Policy, writerInternalCfg config.WriterInternalConfig, rcptsPublish map[string]bool) Service { +func NewConverter(evtType string, htmlPolicy *bluemonday.Policy, writerInternalCfg config.WriterInternalConfig, rcptsPublish map[string]bool, truncUrlQuery bool) Service { return svc{ evtType: evtType, htmlPolicy: htmlPolicy, writerInternalCfg: writerInternalCfg, rcptsPublish: rcptsPublish, + truncUrlQuery: truncUrlQuery, } } @@ -204,7 +206,9 @@ func (c svc) convertBody(src *enmime.Envelope, dst *pb.CloudEvent, internal bool if err == nil { txt = src.HTML if !internal { - txt = reUrlTail.ReplaceAllString(txt, "\"") + if c.truncUrlQuery { + txt = reUrlQuery.ReplaceAllString(txt, "\"") + } txt = c.htmlPolicy.Sanitize(txt) } } @@ -311,18 +315,10 @@ func (c svc) convertAttachments(src *enmime.Envelope, dst *pb.CloudEvent, from s func (c svc) cleanRecipients(src string) (dst string) { dst = src for rcpt := range c.rcptsPublish { - if strings.Contains(dst, rcpt+"@") { - dst = strings.ReplaceAll(dst, rcpt+"@", "") - } - if strings.Contains(dst, strings.ToLower(rcpt)+"@") { - dst = strings.ReplaceAll(dst, strings.ToLower(rcpt)+"@", "") - } - if strings.Contains(dst, rcpt) { - dst = strings.ReplaceAll(dst, rcpt, "") - } - if strings.Contains(dst, strings.ToLower(rcpt)) { - dst = strings.ReplaceAll(dst, strings.ToLower(rcpt), "") - } + dst = strings.ReplaceAll(dst, rcpt+"@", "") + dst = strings.ReplaceAll(dst, strings.ToLower(rcpt)+"@", "") + dst = strings.ReplaceAll(dst, rcpt, "") + dst = strings.ReplaceAll(dst, strings.ToLower(rcpt), "") } return } diff --git a/service/converter/service_test.go b/service/converter/service_test.go index aef2320..1154da2 100644 --- a/service/converter/service_test.go +++ b/service/converter/service_test.go @@ -231,6 +231,7 @@ John`), map[string]bool{ "jane.smith": true, }, + false, ) conv = NewLogging(conv, slog.Default()) for k, c := range cases { @@ -253,7 +254,7 @@ John`), } } -func Test_handleHtml(t *testing.T) { +func TestSvc_handleHtml(t *testing.T) { if os.Getenv("CI") == "true" { t.Skip("Skipping test in CI environment") } @@ -269,3 +270,21 @@ func Test_handleHtml(t *testing.T) { assert.Nil(t, err) fmt.Printf("%+v\n", evt.Attributes) } + +func TestSvc_cleanRecipients(t *testing.T) { + if os.Getenv("CI") == "true" { + t.Skip("Skipping test in CI environment") + } + d, err := os.ReadFile("emaildata.html") + require.Nil(t, err) + conv := svc{ + htmlPolicy: util.HtmlPolicy(), + rcptsPublish: map[string]bool{ + "QaZxSw": true, + }, + } + src := string(d) + assert.True(t, strings.Contains(src, "QaZxSw")) + dst := conv.cleanRecipients(src) + assert.False(t, strings.Contains(dst, "QaZxSw")) +} diff --git a/service/service_test.go b/service/service_test.go index 08799bd..8dd6fd7 100644 --- a/service/service_test.go +++ b/service/service_test.go @@ -70,6 +70,7 @@ John`), bluemonday.NewPolicy(), config.WriterInternalConfig{}, map[string]bool{}, + false, ), log, ),