-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add registry to Filebeat's diagnostic #41795
base: main
Are you sure you want to change the base?
Changes from 13 commits
862af71
d8d42a4
65fba59
4c83385
01774ed
0e36f68
f741dbe
a8c5f4f
df75570
d51565a
7a4ce56
74c042a
1ddc906
14c8f99
9d8b2a1
2ec1ae6
affd18b
82b42b8
464ea06
d44c609
2e3d85d
add8e21
608d841
2c45a4c
29f7f8e
f63dc6d
216a5d1
7f3e5ae
44c25ce
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
// Licensed to Elasticsearch B.V. under one or more contributor | ||
// license agreements. See the NOTICE file distributed with | ||
// this work for additional information regarding copyright | ||
// ownership. Elasticsearch B.V. licenses this file to you under | ||
// the Apache License, Version 2.0 (the "License"); you may | ||
// not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
package beater | ||
|
||
import ( | ||
"archive/tar" | ||
"bytes" | ||
"compress/gzip" | ||
"fmt" | ||
"io" | ||
"io/fs" | ||
"os" | ||
"path/filepath" | ||
"strings" | ||
|
||
"github.com/elastic/elastic-agent-libs/logp" | ||
"github.com/elastic/elastic-agent-libs/paths" | ||
) | ||
|
||
func gzipRegistry() []byte { | ||
logger := logp.L().Named("diagnostics") | ||
buf := bytes.Buffer{} | ||
dataPath := paths.Resolve(paths.Data, "") | ||
registryPath := filepath.Join(dataPath, "registry") | ||
f, err := os.CreateTemp("", "filebeat-registry-*.tar") | ||
if err != nil { | ||
logger.Errorw("cannot create temporary registry archive", "error.message", err) | ||
} | ||
// Close the file, we just need the empty file created to use it later | ||
f.Close() | ||
defer logger.Debug("finished gziping Filebeat's registry") | ||
|
||
defer func() { | ||
if err := os.Remove(f.Name()); err != nil { | ||
logp.L().Named("diagnostics").Warnf("cannot remove temporary registry archive '%s': '%s'", f.Name(), err) | ||
} | ||
}() | ||
|
||
logger.Debugf("temporary file '%s' created", f.Name()) | ||
if err := tarFolder(logger, registryPath, f.Name()); err != nil { | ||
logger.Errorw(fmt.Sprintf("cannot archive Filebeat's registry at '%s'", f.Name()), "error.message", err) | ||
} | ||
|
||
if err := gzipFile(logger, f.Name(), &buf); err != nil { | ||
logger.Errorw("cannot gzip Filebeat's registry", "error.message", err) | ||
} | ||
|
||
return buf.Bytes() | ||
} | ||
|
||
// gzipFile gzips src writing the compressed data to dst | ||
func gzipFile(logger *logp.Logger, src string, dst io.Writer) error { | ||
reader, err := os.Open(src) | ||
if err != nil { | ||
return fmt.Errorf("cannot open '%s': '%w'", src, err) | ||
} | ||
defer reader.Close() | ||
|
||
writer := gzip.NewWriter(dst) | ||
defer writer.Close() | ||
writer.Name = filepath.Base(src) | ||
|
||
if _, err := io.Copy(writer, reader); err != nil { | ||
if err != nil { | ||
return fmt.Errorf("cannot gzip file '%s': '%w'", src, err) | ||
} | ||
} | ||
|
||
return nil | ||
} | ||
|
||
// tarFolder creates a tar archive from the folder src and stores it at dst. | ||
// | ||
// dst must be the full path with extension, e.g: /tmp/foo.tar | ||
// If src is not a folder an error is retruned | ||
func tarFolder(logger *logp.Logger, src, dst string) error { | ||
fullPath, err := filepath.Abs(src) | ||
if err != nil { | ||
return fmt.Errorf("cannot get full path from '%s': '%w'", src, err) | ||
} | ||
|
||
tarFile, err := os.Create(dst) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will be empty if there are no matches, can we avoid including an empty .tar file to avoid people looking into it and wondering if it is supposed to be empty? If the registry directory exists without a log or filestream input, the regexes will never match for agent Filebeat sub-processes that don't run either of those input types so this will be common. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As I mentioned on #41795 (comment) the registry folder is always there and contain some data. Also other inputs can use the registry, from the top of my head: The tar file from this "empty" registry is 3.5kb, the gziped is 228 bytes, it's so little I don't see any problems of having it in the diagnostics. It also works as a sort of "snapshot" of the registry, even if empty, which is a helpful information to have. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't care about the impact of the size, rather the analysis impact of knowing when its worth decompressing and looking at A lot of inputs don't use the registry at all, my suspicion is people and tooling will waste time looking at the empty ones because there's no other way to know if its useful besides memorizing which inputs use the registry. |
||
if err != nil { | ||
return fmt.Errorf("cannot create tar file '%s': '%w'", dst, err) | ||
} | ||
defer tarFile.Close() | ||
|
||
tarWriter := tar.NewWriter(tarFile) | ||
defer tarWriter.Close() | ||
|
||
info, err := os.Stat(fullPath) | ||
if err != nil { | ||
return fmt.Errorf("cannot stat '%s': '%w'", fullPath, err) | ||
} | ||
|
||
if !info.IsDir() { | ||
return fmt.Errorf("'%s' is not a directory", fullPath) | ||
} | ||
baseDir := filepath.Base(src) | ||
|
||
logger.Debugf("starting to walk '%s'", fullPath) | ||
return filepath.Walk(fullPath, func(path string, info fs.FileInfo, prevErr error) error { | ||
// Stop if there is any errors | ||
if prevErr != nil { | ||
return prevErr | ||
} | ||
|
||
header, err := tar.FileInfoHeader(info, info.Name()) | ||
if err != nil { | ||
return fmt.Errorf("cannot create tar info header: '%w'", err) | ||
} | ||
header.Name = filepath.Join(baseDir, strings.TrimPrefix(path, src)) | ||
|
||
if err := tarWriter.WriteHeader(header); err != nil { | ||
return fmt.Errorf("cannot write tar header for '%s': '%w'", path, err) | ||
} | ||
|
||
if info.IsDir() { | ||
return nil | ||
} | ||
|
||
file, err := os.Open(path) | ||
if err != nil { | ||
return fmt.Errorf("cannot open '%s' for reading: '%w", path, err) | ||
} | ||
defer file.Close() | ||
|
||
logger.Debugf("adding '%s' to the tar archive", file.Name()) | ||
if _, err := io.Copy(tarWriter, file); err != nil { | ||
return fmt.Errorf("cannot read '%s': '%w'", path, err) | ||
} | ||
|
||
return nil | ||
}) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -159,6 +159,19 @@ func (r *runner) Stop() { | |
r.statusReporter = nil | ||
} | ||
|
||
// func (r *runner) Diagnostics() []diagnostics.DiagnosticSetup { | ||
// fmt.Println("================================================== Diagnostics called!") | ||
// setup := diagnostics.DiagnosticSetup{ | ||
// Name: "registry collector", | ||
// Description: "Collect Filebeat's registry", | ||
// Filename: "registry.tar.gz", | ||
// ContentType: "application/octet-stream", | ||
// Callback: getRegistry, | ||
// } | ||
|
||
// return []diagnostics.DiagnosticSetup{setup} | ||
// } | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we need this commented out code. Correct? |
||
func configID(config *conf.C) (string, error) { | ||
tmp := struct { | ||
ID string `config:"id"` | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this always guaranteed to exist if there isn't a filestream or log input?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, the registry is created very early during Filebeat's start up, even when using the benchmark input the registry folder,
meta.json
andlog.json
are created.Given that they exist in this very simple case, I rather have them in the diagnostics then trying to be clever and detect when the registry is actually being used by one of the inputs.