Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tlscommon and httpcommon diagnostics hooks #3587

Merged
merged 13 commits into from
Jun 24, 2024
Merged
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Kind can be one of:
# - breaking-change: a change to previously-documented behavior
# - deprecation: functionality that is being removed in a later release
# - bug-fix: fixes a problem in a previous version
# - enhancement: extends functionality but does not break or fix existing behavior
# - feature: new functionality
# - known-issue: problems that we are aware of in a given version
# - security: impacts on the security of a product or a user’s deployment.
# - upgrade: important information for someone upgrading from a prior version
# - other: does not fit into any of the other categories
kind: enhancement

# Change summary; a 80ish characters long description of the change.
summary: Add tlscommon and httpcommon diagnostic information

# Long description; in case the summary is not enough to describe the change
# this field accommodate a description without length limits.
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
description: |
Add tlscommon and httpcommon hooks for fleet-server to add API and
output TLS diagnostics files when diagnostics are collected as well as
an httpcommon diagnostics trace for fleet-server's connection to
Elasticsearch.

# Affected component; a word indicating the component this changeset affects.
component:

# PR URL; optional; the PR number that added the changeset.
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
# Please provide it if you are adding a fragment for a different PR.
pr: https://github.com/elastic/fleet-server/pull/3587

# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
# If not present is automatically filled by the tooling with the issue linked to the PR number.
#issue: https://github.com/owner/repo/1234
3 changes: 2 additions & 1 deletion internal/pkg/api/openapi.gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

49 changes: 49 additions & 0 deletions internal/pkg/config/output.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
package config

import (
"bytes"
"context"
"fmt"
"net"
"net/http"
Expand All @@ -16,8 +18,10 @@ import (
"time"

urlutil "github.com/elastic/elastic-agent-libs/kibana"
"github.com/elastic/elastic-agent-libs/transport/httpcommon"
"github.com/elastic/elastic-agent-libs/transport/tlscommon"
"github.com/elastic/go-elasticsearch/v8"
"github.com/rs/zerolog"
)

// The timeout would be driven by the server for long poll.
Expand Down Expand Up @@ -228,3 +232,48 @@ func makeURL(defaultScheme string, defaultPath string, rawURL string, defaultPor
addr.Host = host + ":" + port
return addr.String(), nil
}

func (c *Elasticsearch) DiagRequests(ctx context.Context) []byte {
pURL, err := httpcommon.NewProxyURIFromString(c.ProxyURL)
if err != nil {
zerolog.Ctx(ctx).Warn().Err(err).Msg("Unable to transform proxy_url to url.URL")
}
settings := httpcommon.HTTPTransportSettings{
TLS: c.TLS,
Timeout: c.Timeout,
Proxy: httpcommon.HTTPClientProxySettings{
Disable: c.ProxyDisable,
URL: pURL,
Headers: httpcommon.ProxyHeaders(c.ProxyHeaders),
},
}
headers := http.Header{}
for k, v := range c.Headers {
headers.Set(k, v)
}

reqs := make([]*http.Request, 0, len(c.Hosts))

var res bytes.Buffer
for _, host := range c.Hosts {
u, err := url.Parse(host)
if err != nil {
zerolog.Ctx(ctx).Warn().Err(err).Str("host", host).Msg("Unable to transform host to url.URL")
michel-laterman marked this conversation as resolved.
Show resolved Hide resolved
res.WriteString(fmt.Sprintf("Unable to transform host %q to url.URL: %v\n", host, err))
continue
}
if u.Scheme == "" {
u.Scheme = c.Protocol
}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil)
if err != nil {
zerolog.Ctx(ctx).Warn().Err(err).Str("host", host).Msg("Unable to create request to host")
michel-laterman marked this conversation as resolved.
Show resolved Hide resolved
res.WriteString(fmt.Sprintf("Unable to create request to host %q: %v\n", host, err))
continue
}
req.Header = headers.Clone()
reqs = append(reqs, req)
}
res.Write(settings.DiagRequests(reqs)())
return res.Bytes()
}
18 changes: 18 additions & 0 deletions internal/pkg/config/output_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@
package config

import (
"context"
"crypto/tls"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"testing"
Expand Down Expand Up @@ -382,3 +384,19 @@ func setTestEnv(t *testing.T, env map[string]string) {
t.Setenv(k, v)
}
}

func Test_Elasticsearch_DiagRequests(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
defer srv.Close()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
es := &Elasticsearch{}
es.InitDefaults()
es.Hosts = []string{srv.URL}

p := es.DiagRequests(ctx)
require.NotEmpty(t, p)
require.Contains(t, string(p), "request 0 successful.")
}
38 changes: 38 additions & 0 deletions internal/pkg/server/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,44 @@ func (a *Agent) Run(ctx context.Context) error {
}
return p
})
a.agent.RegisterDiagnosticHook("fleet-server api tls diag", "fleet-server's API TLS config", "fleet-server-api-tls.txt", "text/plain", func() []byte {
if a.srv == nil {
log.Warn().Msg("Diagnostics hook failure fleet-server is nil.")
michel-laterman marked this conversation as resolved.
Show resolved Hide resolved
return []byte(`Diagnostics hook failure fleet-server is nil`)
}
cfg := a.srv.GetConfig()
if cfg == nil || len(cfg.Inputs) == 0 {
log.Warn().Msg("Diagnostics hook failure config is nil.")
michel-laterman marked this conversation as resolved.
Show resolved Hide resolved
return []byte(`Diagnostics hook failure config is nil`)
}
return cfg.Inputs[0].Server.TLS.DiagCerts()()
})
a.agent.RegisterDiagnosticHook("fleet-server output tls diag", "fleet-server's output TLS config", "fleet-server-output-tls.txt", "text/plain", func() []byte {
if a.srv == nil {
log.Warn().Msg("Diagnostics hook failure fleet-server is nil.")
michel-laterman marked this conversation as resolved.
Show resolved Hide resolved
return []byte(`Diagnostics hook failure fleet-server is nil`)
}
cfg := a.srv.GetConfig()
if cfg == nil {
log.Warn().Msg("Diagnostics hook failure config is nil.")
michel-laterman marked this conversation as resolved.
Show resolved Hide resolved
return []byte(`Diagnostics hook failure config is nil`)
}
return cfg.Output.Elasticsearch.TLS.DiagCerts()()
})
a.agent.RegisterOptionalDiagnosticHook("CONN", "fleet-server output request diag", "fleet-server output request trace diagnostics", "fleet-server-output-request.txt", "text/plain", func() []byte {
if a.srv == nil {
log.Warn().Msg("Diagnostics hook failure fleet-server is nil.")
michel-laterman marked this conversation as resolved.
Show resolved Hide resolved
return []byte(`Diagnostics hook failure fleet-server is nil`)
}
cfg := a.srv.GetConfig()
if cfg == nil {
log.Warn().Msg("Diagnostics hook failure config is nil.")
michel-laterman marked this conversation as resolved.
Show resolved Hide resolved
return []byte(`Diagnostics hook failure config is nil`)
}
ctx, cancel := context.WithTimeout(ctx, time.Second*30) // TODO(michel-laterman): duration/timeout should be part of the diagnostics action from fleet-server (https://github.com/elastic/fleet-server/issues/3648) and the control protocol (https://github.com/elastic/elastic-agent-client/issues/113)
defer cancel()
return cfg.Output.Elasticsearch.DiagRequests(ctx)
michel-laterman marked this conversation as resolved.
Show resolved Hide resolved
})

subCtx, subCanceller := context.WithCancel(ctx)
defer subCanceller()
Expand Down
1 change: 1 addition & 0 deletions model/openapi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,7 @@ components:
type: string
enum:
- CPU
- CONN
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From the discussion in elastic/elastic-agent#4880, we want to have the HTTP connection request diagnostics as an optional value that is enabled by default.

actionPolicyReassign:
description: The POLICY_REASSIGN action data.
type: object
Expand Down
3 changes: 2 additions & 1 deletion pkg/api/types.gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading