diff --git a/pkg/kubelet/kubelet_volumes.go b/pkg/kubelet/kubelet_volumes.go index da43701d1b416..5ba437ce19663 100644 --- a/pkg/kubelet/kubelet_volumes.go +++ b/pkg/kubelet/kubelet_volumes.go @@ -18,6 +18,8 @@ package kubelet import ( "fmt" + "io/ioutil" + "path/filepath" "syscall" v1 "k8s.io/api/core/v1" @@ -90,6 +92,57 @@ func (kl *Kubelet) newVolumeMounterFromPlugins(spec *volume.Spec, pod *v1.Pod, o return physicalMounter, nil } +// removeOrphanedPodVolumeDirs attempts to remove the pod volumes directory and +// its subdirectories. There should be no files left under normal conditions +// when this is called, so it effectively does a recursive rmdir instead of +// RemoveAll to ensure it only removes directories and not regular files. +func (kl *Kubelet) removeOrphanedPodVolumeDirs(uid types.UID) []error { + orphanVolumeErrors := []error{} + + // If there are still volume directories, attempt to rmdir them + volumePaths, err := kl.getPodVolumePathListFromDisk(uid) + if err != nil { + orphanVolumeErrors = append(orphanVolumeErrors, fmt.Errorf("orphaned pod %q found, but error %v occurred during reading volume dir from disk", uid, err)) + return orphanVolumeErrors + } + if len(volumePaths) > 0 { + for _, volumePath := range volumePaths { + if err := syscall.Rmdir(volumePath); err != nil { + orphanVolumeErrors = append(orphanVolumeErrors, fmt.Errorf("orphaned pod %q found, but failed to rmdir() volume at path %v: %v", uid, volumePath, err)) + } else { + klog.Warningf("Cleaned up orphaned volume from pod %q at %s", uid, volumePath) + } + } + } + + // If there are any volume-subpaths, attempt to rmdir them + subpathVolumePaths, err := kl.getPodVolumeSubpathListFromDisk(uid) + if err != nil { + orphanVolumeErrors = append(orphanVolumeErrors, fmt.Errorf("orphaned pod %q found, but error %v occurred during reading of volume-subpaths dir from disk", uid, err)) + return orphanVolumeErrors + } + if len(subpathVolumePaths) > 0 { + for _, subpathVolumePath := range subpathVolumePaths { + if err := syscall.Rmdir(subpathVolumePath); err != nil { + orphanVolumeErrors = append(orphanVolumeErrors, fmt.Errorf("orphaned pod %q found, but failed to rmdir() subpath at path %v: %v", uid, subpathVolumePath, err)) + } else { + klog.Warningf("Cleaned up orphaned volume subpath from pod %q at %s", uid, subpathVolumePath) + } + } + } + + // Remove any remaining subdirectories along with the volumes directory itself. + // Fail if any regular files are encountered. + podVolDir := kl.getPodVolumesDir(uid) + if err := removeall.RemoveDirsOneFilesystem(kl.mounter, podVolDir); err != nil { + orphanVolumeErrors = append(orphanVolumeErrors, fmt.Errorf("orphaned pod %q found, but error %v occurred when trying to remove the volumes dir", uid, err)) + } else { + klog.Warningf("Cleaned up orphaned pod volumes dir from pod %q at %s", uid, podVolDir) + } + + return orphanVolumeErrors +} + // cleanupOrphanedPodDirs removes the volumes of pods that should not be // running and that have no containers running. Note that we roll up logs here since it runs in the main loop. func (kl *Kubelet) cleanupOrphanedPodDirs(pods []*v1.Pod, runningPods []*kubecontainer.Pod) error { @@ -122,55 +175,48 @@ func (kl *Kubelet) cleanupOrphanedPodDirs(pods []*v1.Pod, runningPods []*kubecon continue } - allVolumesCleanedUp := true - - // If there are still volume directories, attempt to rmdir them - volumePaths, err := kl.getPodVolumePathListFromDisk(uid) - if err != nil { - orphanVolumeErrors = append(orphanVolumeErrors, fmt.Errorf("orphaned pod %q found, but error %v occurred during reading volume dir from disk", uid, err)) + // Attempt to remove the pod volumes directory and its subdirs + podVolumeErrors := kl.removeOrphanedPodVolumeDirs(uid) + if len(podVolumeErrors) > 0 { + orphanVolumeErrors = append(orphanVolumeErrors, podVolumeErrors...) + // Not all volumes were removed, so don't clean up the pod directory yet. It is likely + // that there are still mountpoints or files left which could cause removal of the pod + // directory to fail below. + // Errors for all removal operations have already been recorded, so don't add another + // one here. continue } - if len(volumePaths) > 0 { - for _, volumePath := range volumePaths { - if err := syscall.Rmdir(volumePath); err != nil { - orphanVolumeErrors = append(orphanVolumeErrors, fmt.Errorf("orphaned pod %q found, but failed to rmdir() volume at path %v: %v", uid, volumePath, err)) - allVolumesCleanedUp = false - } else { - klog.Warningf("Cleaned up orphaned volume from pod %q at %s", uid, volumePath) - } - } - } - // If there are any volume-subpaths, attempt to rmdir them - subpathVolumePaths, err := kl.getPodVolumeSubpathListFromDisk(uid) + // Call RemoveAllOneFilesystem for remaining subdirs under the pod directory + podDir := kl.getPodDir(uid) + podSubdirs, err := ioutil.ReadDir(podDir) if err != nil { - orphanVolumeErrors = append(orphanVolumeErrors, fmt.Errorf("orphaned pod %q found, but error %v occurred during reading of volume-subpaths dir from disk", uid, err)) + klog.Errorf("Could not read directory %q; err: %v", podDir, err) + orphanRemovalErrors = append(orphanRemovalErrors, fmt.Errorf("orphaned pod %q found, but error %v occurred during reading the pod dir from disk", uid, err)) continue } - if len(subpathVolumePaths) > 0 { - for _, subpathVolumePath := range subpathVolumePaths { - if err := syscall.Rmdir(subpathVolumePath); err != nil { - orphanVolumeErrors = append(orphanVolumeErrors, fmt.Errorf("orphaned pod %q found, but failed to rmdir() subpath at path %v: %v", uid, subpathVolumePath, err)) - allVolumesCleanedUp = false - } else { - klog.Warningf("Cleaned up orphaned volume subpath from pod %q at %s", uid, subpathVolumePath) - } + for _, podSubdir := range podSubdirs { + podSubdirName := podSubdir.Name() + podSubdirPath := filepath.Join(podDir, podSubdirName) + // Never attempt RemoveAllOneFilesystem on the volumes directory, + // as this could lead to data loss in some situations. The volumes + // directory should have been removed by removeOrphanedPodVolumeDirs. + if podSubdirName == "volumes" { + err := fmt.Errorf("volumes subdir was found after it was removed") + klog.Errorf("Orphaned pod %q found, but failed to remove volumes subdir %q; err: %v", uid, podSubdirPath, err) + continue + } + if err := removeall.RemoveAllOneFilesystem(kl.mounter, podSubdirPath); err != nil { + klog.Errorf("Failed to remove orphaned pod %q subdir %q; err: %v", uid, podSubdirPath, err) + orphanRemovalErrors = append(orphanRemovalErrors, fmt.Errorf("orphaned pod %q found, but error %v occurred when trying to remove subdir %q", uid, err, podSubdirPath)) } } - if !allVolumesCleanedUp { - // Not all volumes were removed, so don't clean up the pod directory yet. It is likely - // that there are still mountpoints left which could stall RemoveAllOneFilesystem which - // would otherwise be called below. - // Errors for all removal operations have already been recorded, so don't add another - // one here. - continue - } - + // Rmdir the pod dir, which should be empty if everything above was successful klog.V(3).Infof("Orphaned pod %q found, removing", uid) - if err := removeall.RemoveAllOneFilesystem(kl.mounter, kl.getPodDir(uid)); err != nil { + if err := syscall.Rmdir(podDir); err != nil { klog.Errorf("Failed to remove orphaned pod %q dir; err: %v", uid, err) - orphanRemovalErrors = append(orphanRemovalErrors, err) + orphanRemovalErrors = append(orphanRemovalErrors, fmt.Errorf("orphaned pod %q found, but error %v occurred when trying to remove the pod directory", uid, err)) } } diff --git a/pkg/kubelet/kubelet_volumes_linux_test.go b/pkg/kubelet/kubelet_volumes_linux_test.go index ea652eaa64a5d..bf5c629966e9c 100644 --- a/pkg/kubelet/kubelet_volumes_linux_test.go +++ b/pkg/kubelet/kubelet_volumes_linux_test.go @@ -104,6 +104,16 @@ func TestCleanupOrphanedPodDirs(t *testing.T) { return validateDirNotExists(podDir) }, }, + "pod-doesnot-exist-with-volume-subdir": { + prepareFunc: func(kubelet *Kubelet) error { + podDir := kubelet.getPodDir("pod1uid") + return os.MkdirAll(filepath.Join(podDir, "volumes/plugin/name/subdir"), 0750) + }, + validateFunc: func(kubelet *Kubelet) error { + podDir := kubelet.getPodDir("pod1uid") + return validateDirNotExists(filepath.Join(podDir, "volumes")) + }, + }, "pod-doesnot-exist-with-subpath": { prepareFunc: func(kubelet *Kubelet) error { podDir := kubelet.getPodDir("pod1uid") diff --git a/pkg/util/removeall/OWNERS b/pkg/util/removeall/OWNERS new file mode 100644 index 0000000000000..6b5f6ba5041c8 --- /dev/null +++ b/pkg/util/removeall/OWNERS @@ -0,0 +1,8 @@ +# See the OWNERS docs at https://go.k8s.io/owners + +approvers: +- sig-storage-approvers +reviewers: +- sig-storage-reviewers +labels: +- sig/storage diff --git a/pkg/util/removeall/removeall.go b/pkg/util/removeall/removeall.go index 1d268e18ee645..7801ecffc9870 100644 --- a/pkg/util/removeall/removeall.go +++ b/pkg/util/removeall/removeall.go @@ -25,16 +25,16 @@ import ( "k8s.io/mount-utils" ) -// RemoveAllOneFilesystem removes path and any children it contains. -// It removes everything it can but returns the first error -// it encounters. If the path does not exist, RemoveAll +// RemoveAllOneFilesystemCommon removes the path and any children it contains, +// using the provided remove function. It removes everything it can but returns +// the first error it encounters. If the path does not exist, RemoveAll // returns nil (no error). // It makes sure it does not cross mount boundary, i.e. it does *not* remove // files from another filesystems. Like 'rm -rf --one-file-system'. // It is copied from RemoveAll() sources, with IsLikelyNotMountPoint -func RemoveAllOneFilesystem(mounter mount.Interface, path string) error { +func RemoveAllOneFilesystemCommon(mounter mount.Interface, path string, remove func(string) error) error { // Simple case: if Remove works, we're done. - err := os.Remove(path) + err := remove(path) if err == nil || os.IsNotExist(err) { return nil } @@ -48,7 +48,7 @@ func RemoveAllOneFilesystem(mounter mount.Interface, path string) error { return serr } if !dir.IsDir() { - // Not a directory; return the error from Remove. + // Not a directory; return the error from remove. return err } @@ -76,7 +76,7 @@ func RemoveAllOneFilesystem(mounter mount.Interface, path string) error { for { names, err1 := fd.Readdirnames(100) for _, name := range names { - err1 := RemoveAllOneFilesystem(mounter, path+string(os.PathSeparator)+name) + err1 := RemoveAllOneFilesystemCommon(mounter, path+string(os.PathSeparator)+name, remove) if err == nil { err = err1 } @@ -97,7 +97,7 @@ func RemoveAllOneFilesystem(mounter mount.Interface, path string) error { fd.Close() // Remove directory. - err1 := os.Remove(path) + err1 := remove(path) if err1 == nil || os.IsNotExist(err1) { return nil } @@ -106,3 +106,23 @@ func RemoveAllOneFilesystem(mounter mount.Interface, path string) error { } return err } + +// RemoveAllOneFilesystem removes the path and any children it contains, using +// the os.Remove function. It makes sure it does not cross mount boundaries, +// i.e. it returns an error rather than remove files from another filesystem. +// It removes everything it can but returns the first error it encounters. +// If the path does not exist, it returns nil (no error). +func RemoveAllOneFilesystem(mounter mount.Interface, path string) error { + return RemoveAllOneFilesystemCommon(mounter, path, os.Remove) +} + +// RemoveDirsOneFilesystem removes the path and any empty subdirectories it +// contains, using the syscall.Rmdir function. Unlike RemoveAllOneFilesystem, +// RemoveDirsOneFilesystem will remove only directories and returns an error if +// it encounters any files in the directory tree. It makes sure it does not +// cross mount boundaries, i.e. it returns an error rather than remove dirs +// from another filesystem. It removes everything it can but returns the first +// error it encounters. If the path does not exist, it returns nil (no error). +func RemoveDirsOneFilesystem(mounter mount.Interface, path string) error { + return RemoveAllOneFilesystemCommon(mounter, path, syscall.Rmdir) +} diff --git a/pkg/util/removeall/removeall_test.go b/pkg/util/removeall/removeall_test.go index 6b164d22b2c40..2b2a641af176f 100644 --- a/pkg/util/removeall/removeall_test.go +++ b/pkg/util/removeall/removeall_test.go @@ -138,3 +138,123 @@ func TestRemoveAllOneFilesystem(t *testing.T) { } } } + +func TestRemoveDirsOneFilesystem(t *testing.T) { + tests := []struct { + name string + // Items of the test directory. Directories end with "/". + // Directories starting with "mount" are considered to be mount points. + // Directories starting with "err" will cause an error in + // IsLikelyNotMountPoint. + items []string + expectError bool + }{ + { + "empty dir", + []string{}, + false, + }, + { + "non-mount-no-files", + []string{ + "dir/", + "dir/subdir1/", + "dir2/", + "dir2/subdir2/", + "dir2/subdir2/subdir3/", + "dir3/", + }, + false, + }, + { + "non-mount-with-files", + []string{ + "dir/", + "dir/file", + "dir2/", + "file2", + }, + true, + }, + { + "mount-no-files", + []string{ + "dir/", + "dir/subdir1/", + "dir2/", + "dir2/subdir2/", + "dir2/subdir2/subdir3/", + "mount/", + "mount/dir3/", + }, + true, + }, + { + "mount-with-files", + []string{ + "dir/", + "dir/file", + "dir2/", + "file2", + "mount/", + "mount/file3", + }, + true, + }, + { + "innermount", + []string{ + "dir/", + "dir/subdir1/", + "dir/dir2/", + "dir/dir2/subdir2/", + "dir/dir2/mount/", + "dir/dir2/mount/subdir3/", + }, + true, + }, + { + "error", + []string{ + "dir/", + "dir/subdir1/", + "dir2/", + "err/", + "err/subdir3/", + }, + true, + }, + } + + for _, test := range tests { + tmpDir, err := utiltesting.MkTmpdir("removeall-" + test.name + "-") + if err != nil { + t.Fatalf("Can't make a tmp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + // Create the directory structure + for _, item := range test.items { + if strings.HasSuffix(item, "/") { + item = strings.TrimRight(item, "/") + if err = os.Mkdir(path.Join(tmpDir, item), 0777); err != nil { + t.Fatalf("error creating %s: %v", item, err) + } + } else { + f, err := os.Create(path.Join(tmpDir, item)) + if err != nil { + t.Fatalf("error creating %s: %v", item, err) + } + f.Close() + } + } + + mounter := &fakeMounter{} + err = RemoveDirsOneFilesystem(mounter, tmpDir) + if err == nil && test.expectError { + t.Errorf("test %q failed: expected error and got none", test.name) + } + if err != nil && !test.expectError { + t.Errorf("test %q failed: unexpected error: %v", test.name, err) + } + } +}