-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add registry to Filebeat's diagnostic #41795
base: main
Are you sure you want to change the base?
Changes from all commits
862af71
d8d42a4
65fba59
4c83385
01774ed
0e36f68
f741dbe
a8c5f4f
df75570
d51565a
7a4ce56
74c042a
1ddc906
14c8f99
9d8b2a1
2ec1ae6
affd18b
82b42b8
464ea06
d44c609
2e3d85d
add8e21
608d841
2c45a4c
29f7f8e
f63dc6d
216a5d1
7f3e5ae
44c25ce
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,217 @@ | ||
// Licensed to Elasticsearch B.V. under one or more contributor | ||
// license agreements. See the NOTICE file distributed with | ||
// this work for additional information regarding copyright | ||
// ownership. Elasticsearch B.V. licenses this file to you under | ||
// the Apache License, Version 2.0 (the "License"); you may | ||
// not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
package beater | ||
|
||
import ( | ||
"archive/tar" | ||
"bytes" | ||
"compress/gzip" | ||
"fmt" | ||
"io" | ||
"io/fs" | ||
"os" | ||
"path/filepath" | ||
"regexp" | ||
"strings" | ||
|
||
"github.com/elastic/elastic-agent-libs/logp" | ||
"github.com/elastic/elastic-agent-libs/paths" | ||
) | ||
|
||
func getRegexpsForRegistryFiles() ([]*regexp.Regexp, error) { | ||
// We use regexps here because globs do not support specifying a character | ||
// range like we do in the checkpoint file. | ||
|
||
registryFileRegExps := []*regexp.Regexp{} | ||
preFilesList := [][]string{ | ||
[]string{"^registry$"}, | ||
[]string{"^registry", "filebeat$"}, | ||
[]string{"^registry", "filebeat", "meta\\.json$"}, | ||
[]string{"^registry", "filebeat", "log\\.json$"}, | ||
[]string{"^registry", "filebeat", "active\\.dat$"}, | ||
[]string{"^registry", "filebeat", "[[:digit:]]*\\.json$"}, | ||
} | ||
|
||
for _, lst := range preFilesList { | ||
var path string | ||
if filepath.Separator == '\\' { | ||
path = strings.Join(lst, `\\`) | ||
} else { | ||
path = filepath.Join(lst...) | ||
} | ||
|
||
// Compile the reg exp, if there is an error, stop and return. | ||
// There should be no error here as this code is tested in all | ||
// supported OSes, however to avoid a code path that leads to a | ||
// panic, we cannot use `regexp.MustCompile` and handle the error | ||
re, err := regexp.Compile(path) | ||
if err != nil { | ||
return nil, fmt.Errorf("cannot compile reg exp: %w", err) | ||
} | ||
|
||
registryFileRegExps = append(registryFileRegExps, re) | ||
} | ||
|
||
return registryFileRegExps, nil | ||
} | ||
|
||
func gzipRegistry() []byte { | ||
logger := logp.L().Named("diagnostics") | ||
buf := bytes.Buffer{} | ||
dataPath := paths.Resolve(paths.Data, "") | ||
registryPath := filepath.Join(dataPath, "registry") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this always guaranteed to exist if there isn't a filestream or log input? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, the registry is created very early during Filebeat's start up, even when using the benchmark input the registry folder, Given that they exist in this very simple case, I rather have them in the diagnostics then trying to be clever and detect when the registry is actually being used by one of the inputs. |
||
f, err := os.CreateTemp("", "filebeat-registry-*.tar") | ||
if err != nil { | ||
logger.Errorw("cannot create temporary registry archive", "error.message", err) | ||
} | ||
// Close the file, we just need the empty file created to use it later | ||
f.Close() | ||
defer logger.Debug("finished gziping Filebeat's registry") | ||
|
||
defer func() { | ||
if err := os.Remove(f.Name()); err != nil { | ||
logp.L().Named("diagnostics").Warnf("cannot remove temporary registry archive '%s': '%s'", f.Name(), err) | ||
} | ||
}() | ||
|
||
logger.Debugf("temporary file '%s' created", f.Name()) | ||
if err := tarFolder(logger, registryPath, f.Name()); err != nil { | ||
logger.Errorw(fmt.Sprintf("cannot archive Filebeat's registry at '%s'", f.Name()), "error.message", err) | ||
} | ||
|
||
if err := gzipFile(logger, f.Name(), &buf); err != nil { | ||
logger.Errorw("cannot gzip Filebeat's registry", "error.message", err) | ||
} | ||
|
||
// if the final file is too large, skip it | ||
if buf.Len() >= 20_000_000 { // 20 Mb | ||
logger.Warnf("registry is too large for diagnostics, %dmb bytes > 20mb", buf.Len()/1_000_000) | ||
return nil | ||
} | ||
|
||
return buf.Bytes() | ||
} | ||
|
||
// gzipFile gzips src writing the compressed data to dst | ||
func gzipFile(logger *logp.Logger, src string, dst io.Writer) error { | ||
reader, err := os.Open(src) | ||
if err != nil { | ||
return fmt.Errorf("cannot open '%s': '%w'", src, err) | ||
} | ||
defer reader.Close() | ||
|
||
writer := gzip.NewWriter(dst) | ||
defer writer.Close() | ||
writer.Name = filepath.Base(src) | ||
|
||
if _, err := io.Copy(writer, reader); err != nil { | ||
if err != nil { | ||
return fmt.Errorf("cannot gzip file '%s': '%w'", src, err) | ||
} | ||
} | ||
|
||
return nil | ||
} | ||
|
||
// tarFolder creates a tar archive from the folder src and stores it at dst. | ||
// | ||
// dst must be the full path with extension, e.g: /tmp/foo.tar | ||
// If src is not a folder an error is retruned | ||
func tarFolder(logger *logp.Logger, src, dst string) error { | ||
fullPath, err := filepath.Abs(src) | ||
if err != nil { | ||
return fmt.Errorf("cannot get full path from '%s': '%w'", src, err) | ||
} | ||
|
||
tarFile, err := os.Create(dst) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will be empty if there are no matches, can we avoid including an empty .tar file to avoid people looking into it and wondering if it is supposed to be empty? If the registry directory exists without a log or filestream input, the regexes will never match for agent Filebeat sub-processes that don't run either of those input types so this will be common. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As I mentioned on #41795 (comment) the registry folder is always there and contain some data. Also other inputs can use the registry, from the top of my head: The tar file from this "empty" registry is 3.5kb, the gziped is 228 bytes, it's so little I don't see any problems of having it in the diagnostics. It also works as a sort of "snapshot" of the registry, even if empty, which is a helpful information to have. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't care about the impact of the size, rather the analysis impact of knowing when its worth decompressing and looking at A lot of inputs don't use the registry at all, my suspicion is people and tooling will waste time looking at the empty ones because there's no other way to know if its useful besides memorizing which inputs use the registry. |
||
if err != nil { | ||
return fmt.Errorf("cannot create tar file '%s': '%w'", dst, err) | ||
} | ||
defer tarFile.Close() | ||
|
||
tarWriter := tar.NewWriter(tarFile) | ||
defer tarWriter.Close() | ||
|
||
info, err := os.Stat(fullPath) | ||
if err != nil { | ||
return fmt.Errorf("cannot stat '%s': '%w'", fullPath, err) | ||
} | ||
|
||
if !info.IsDir() { | ||
return fmt.Errorf("'%s' is not a directory", fullPath) | ||
} | ||
baseDir := filepath.Base(src) | ||
|
||
logger.Debugf("starting to walk '%s'", fullPath) | ||
|
||
// This error should never happen at runtime, if something | ||
// breaks it should break the tests and be fixed before a | ||
// release. We handle the error here to avoid a code path | ||
// that can end into a panic. | ||
registryFileRegExps, err := getRegexpsForRegistryFiles() | ||
if err != nil { | ||
return err | ||
} | ||
|
||
return filepath.Walk(fullPath, func(path string, info fs.FileInfo, prevErr error) error { | ||
// Stop if there is any errors | ||
if prevErr != nil { | ||
return prevErr | ||
} | ||
|
||
pathInTar := filepath.Join(baseDir, strings.TrimPrefix(path, src)) | ||
if !matchRegistyFiles(registryFileRegExps, pathInTar) { | ||
return nil | ||
} | ||
header, err := tar.FileInfoHeader(info, info.Name()) | ||
if err != nil { | ||
return fmt.Errorf("cannot create tar info header: '%w'", err) | ||
} | ||
header.Name = pathInTar | ||
|
||
if err := tarWriter.WriteHeader(header); err != nil { | ||
return fmt.Errorf("cannot write tar header for '%s': '%w'", path, err) | ||
} | ||
|
||
if info.IsDir() { | ||
return nil | ||
} | ||
|
||
file, err := os.Open(path) | ||
if err != nil { | ||
return fmt.Errorf("cannot open '%s' for reading: '%w", path, err) | ||
} | ||
defer file.Close() | ||
|
||
logger.Debugf("adding '%s' to the tar archive", file.Name()) | ||
if _, err := io.Copy(tarWriter, file); err != nil { | ||
return fmt.Errorf("cannot read '%s': '%w'", path, err) | ||
} | ||
|
||
return nil | ||
}) | ||
} | ||
|
||
func matchRegistyFiles(registryFileRegExps []*regexp.Regexp, path string) bool { | ||
for _, regExp := range registryFileRegExps { | ||
if regExp.MatchString(path) { | ||
return true | ||
} | ||
} | ||
return false | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
// Licensed to Elasticsearch B.V. under one or more contributor | ||
// license agreements. See the NOTICE file distributed with | ||
// this work for additional information regarding copyright | ||
// ownership. Elasticsearch B.V. licenses this file to you under | ||
// the Apache License, Version 2.0 (the "License"); you may | ||
// not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
package beater | ||
|
||
import ( | ||
"fmt" | ||
"path/filepath" | ||
"testing" | ||
) | ||
|
||
func TestMatchRegistryFiles(t *testing.T) { | ||
positiveMatches := []string{ | ||
filepath.Join("registry", "filebeat", "49855.json"), | ||
filepath.Join("registry", "filebeat", "active.dat"), | ||
filepath.Join("registry", "filebeat", "meta.json"), | ||
filepath.Join("registry", "filebeat", "log.json"), | ||
} | ||
negativeMatches := []string{ | ||
filepath.Join("registry", "filebeat", "bar.dat"), | ||
filepath.Join("registry", "filebeat", "log.txt"), | ||
filepath.Join("registry", "42.json"), | ||
filepath.Join("nop", "active.dat"), | ||
} | ||
registryFileRegExps, err := getRegexpsForRegistryFiles() | ||
if err != nil { | ||
t.Fatalf("cannot compile regexps for registry paths: %s", err) | ||
} | ||
|
||
testFn := func(t *testing.T, path string, match bool) { | ||
result := matchRegistyFiles(registryFileRegExps, path) | ||
if result != match { | ||
t.Errorf( | ||
"mathRegisryFiles('%s') should return %t, got %t instead", | ||
path, | ||
match, | ||
result) | ||
} | ||
} | ||
|
||
for _, path := range positiveMatches { | ||
t.Run(fmt.Sprintf("%s returns true", path), func(t *testing.T) { | ||
testFn(t, path, true) | ||
}) | ||
} | ||
|
||
for _, path := range negativeMatches { | ||
t.Run(fmt.Sprintf("%s returns false", path), func(t *testing.T) { | ||
testFn(t, path, false) | ||
}) | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you avoid init()? This appears OK but we have constant problems with init in agentbeat and beats receivers, I wouldn't want this to encourage more init.
Can you just use the
os.PathSeparator
constant instead of doing this detection here? The path separator would be known at compile time based on the target OS.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I put it on a
init()
because it's just local variables for the specific purpose of archiving the registry and not having to re-do it every time a diagnostics is requested.I agree with avoiding using
init()
and any global state given all the issues it can (and is) bring. I believe it does not fall into this category, however, I can just re-do it every time it's needed, the performance implications are definitely negligible.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, because the detection here is to actually escape the path separator in the regexp. If I just use
filepath.Join
it won't be escaped in the final string and the regexp won't compile.