Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Squashed all layers #3138

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions internal/task/scope_tasks.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package task

import (
"context"

"github.com/anchore/syft/internal/sbomsync"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/sbom"
)

func NewScopesTask() Task {
fn := func(_ context.Context, _ file.Resolver, builder sbomsync.Builder) error {
finalizeScope(builder)
return nil
}

return NewTask("scope-cataloger", fn)
}

func finalizeScope(builder sbomsync.Builder) {
accessor := builder.(sbomsync.Accessor)

// remove all packages that doesn't exist in the final state of the image
packagesToDelete := packagesToRemove(accessor)
builder.DeletePackages(packagesToDelete...)
}

func packagesToRemove(accessor sbomsync.Accessor) []artifact.ID {
pkgsToDelete := make([]artifact.ID, 0)
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
// remove packages which doesn't exist in the final state of the image
pkgsToDelete = append(pkgsToDelete, getPackagesToDelete(s)...)
})
return pkgsToDelete
}

func getPackagesToDelete(s *sbom.SBOM) []artifact.ID {
pkgsToDelete := make([]artifact.ID, 0)
for p := range s.Artifacts.Packages.Enumerate() {
toDelete := true
for _, l := range p.Locations.ToSlice() {
if l.IsSquashedLayer {
toDelete = false
break
}
}
if toDelete {
pkgsToDelete = append(pkgsToDelete, p.ID())
}
}
return pkgsToDelete
}
16 changes: 16 additions & 0 deletions syft/create_sbom_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ func (c *CreateSBOMConfig) makeTaskGroups(src source.Description) ([][]task.Task

// generate package and file tasks based on the configuration
environmentTasks := c.environmentTasks()
scopeTasks := c.scopeTasks()
relationshipsTasks := c.relationshipTasks(src)
fileTasks := c.fileTasks()
pkgTasks, selectionEvidence, err := c.packageTasks(src)
Expand All @@ -179,6 +180,11 @@ func (c *CreateSBOMConfig) makeTaskGroups(src source.Description) ([][]task.Task
taskGroups = append(taskGroups, append(pkgTasks, fileTasks...))
}

// all scope work must be done after all nodes (files and packages) have been cataloged and before the relationship
if source.ParseScope(c.Search.Scope.String()) == source.SquashWithAllLayersScope {
taskGroups = append(taskGroups, scopeTasks)
}

// all relationship work must be done after all nodes (files and packages) have been cataloged
if len(relationshipsTasks) > 0 {
taskGroups = append(taskGroups, relationshipsTasks)
Expand Down Expand Up @@ -306,6 +312,16 @@ func (c *CreateSBOMConfig) userPackageTasks(cfg task.CatalogingFactoryConfig) ([
return persistentPackageTasks, selectablePackageTasks, nil
}

// scopeTasks returns the set of tasks that should be run to generate additional scope information
func (c *CreateSBOMConfig) scopeTasks() []task.Task {
var tsks []task.Task

if t := task.NewScopesTask(); t != nil {
tsks = append(tsks, t)
}
return tsks
}

// relationshipTasks returns the set of tasks that should be run to generate additional relationships as well as
// prune existing relationships.
func (c *CreateSBOMConfig) relationshipTasks(src source.Description) []task.Task {
Expand Down
3 changes: 2 additions & 1 deletion syft/file/location.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ func (l LocationData) Reference() file.Reference {
}

type LocationMetadata struct {
Annotations map[string]string `json:"annotations,omitempty"` // Arbitrary key-value pairs that can be used to annotate a location
Annotations map[string]string `json:"annotations,omitempty"` // Arbitrary key-value pairs that can be used to annotate a location
IsSquashedLayer bool `json:"-"`
}

func (m *LocationMetadata) merge(other LocationMetadata) error {
Expand Down
1 change: 1 addition & 0 deletions syft/format/syftjson/to_format_model.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ func toPackageModels(catalog *pkg.Collection, cfg EncoderConfig) []model.Package
for _, p := range catalog.Sorted() {
artifacts = append(artifacts, toPackageModel(p, cfg))
}

return artifacts
}

Expand Down
3 changes: 2 additions & 1 deletion syft/internal/fileresolver/container_image_all_layers.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,8 @@ func (r *ContainerImageAllLayers) FilesByPath(paths ...string) ([]file.Location,
}

// FilesByGlob returns all file.References that match the given path glob pattern from any layer in the image.
// nolint:gocognit
//
//nolint:gocognit
func (r *ContainerImageAllLayers) FilesByGlob(patterns ...string) ([]file.Location, error) {
uniqueFileIDs := stereoscopeFile.NewFileReferenceSet()
uniqueLocations := make([]file.Location, 0)
Expand Down
3 changes: 2 additions & 1 deletion syft/internal/fileresolver/container_image_squash.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ func (r *ContainerImageSquash) FilesByPath(paths ...string) ([]file.Location, er
}

// FilesByGlob returns all file.References that match the given path glob pattern within the squashed representation of the image.
// nolint:gocognit
//
//nolint:gocognit
func (r *ContainerImageSquash) FilesByGlob(patterns ...string) ([]file.Location, error) {
uniqueFileIDs := stereoscopeFile.NewFileReferenceSet()
uniqueLocations := make([]file.Location, 0)
Expand Down
167 changes: 167 additions & 0 deletions syft/internal/fileresolver/container_image_squash_all_layers.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
package fileresolver

import (
"context"
"io"

"github.com/anchore/stereoscope/pkg/image"
"github.com/anchore/syft/syft/file"
)

var _ file.Resolver = (*ContainerImageSquashAllLayers)(nil)

// ContainerImageSquashAllLayers implements path and content access for the Squashed all layers source option for container image data sources.
type ContainerImageSquashAllLayers struct {
squashed *ContainerImageSquash
allLayers *ContainerImageAllLayers
}

// NewFromContainerImageSquashAllLayers returns a new resolver from the perspective of all image layers for the given image.
func NewFromContainerImageSquashAllLayers(img *image.Image) (*ContainerImageSquashAllLayers, error) {
squashed, err := NewFromContainerImageSquash(img)
if err != nil {
return nil, err
}

allLayers, err := NewFromContainerImageAllLayers(img)
if err != nil {
return nil, err
}

return &ContainerImageSquashAllLayers{
squashed: squashed,
allLayers: allLayers,
}, nil
}

// HasPath indicates if the given path exists in the underlying source.
func (i *ContainerImageSquashAllLayers) HasPath(path string) bool {
return i.squashed.HasPath(path)
}

// FilesByPath returns all file.References that match the given paths from any layer in the image.
func (i *ContainerImageSquashAllLayers) FilesByPath(paths ...string) ([]file.Location, error) {
squashedLocations, err := i.squashed.FilesByPath(paths...)
if err != nil {
return nil, err
}

allLayersLocations, err := i.allLayers.FilesByPath(paths...)
if err != nil {
return nil, err
}

var mergedLocations []file.Location
for _, l := range squashedLocations {
mergedLocations = append(mergedLocations, file.Location{
LocationData: l.LocationData,
LocationMetadata: file.LocationMetadata{
Annotations: l.Annotations,
IsSquashedLayer: true,
},
})
}

for _, l := range allLayersLocations {
mergedLocations = append(mergedLocations, file.Location{
LocationData: l.LocationData,
LocationMetadata: file.LocationMetadata{
Annotations: l.Annotations,
IsSquashedLayer: false,
},
})
}

return mergedLocations, nil
}

// FilesByGlob returns all file.References that match the given path glob pattern from any layer in the image.
func (i *ContainerImageSquashAllLayers) FilesByGlob(patterns ...string) ([]file.Location, error) {
squashedLocations, err := i.squashed.FilesByGlob(patterns...)
if err != nil {
return nil, err
}

allLayersLocations, err := i.allLayers.FilesByGlob(patterns...)
if err != nil {
return nil, err
}

var mergedLocations []file.Location
for _, l := range squashedLocations {
mergedLocations = append(mergedLocations, file.Location{
LocationData: l.LocationData,
LocationMetadata: file.LocationMetadata{
Annotations: l.Annotations,
IsSquashedLayer: true,
},
})
}

for _, l := range allLayersLocations {
mergedLocations = append(mergedLocations, file.Location{
LocationData: l.LocationData,
LocationMetadata: file.LocationMetadata{
Annotations: l.Annotations,
IsSquashedLayer: false,
},
})
}

return mergedLocations, nil
}

// RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference.
// This is helpful when attempting to find a file that is in the same layer or lower as another file.
func (i *ContainerImageSquashAllLayers) RelativeFileByPath(location file.Location, path string) *file.Location {
return i.squashed.RelativeFileByPath(location, path)
}

// FileContentsByLocation fetches file contents for a single file reference, irregardless of the source layer.
// If the path does not exist an error is returned.
func (i *ContainerImageSquashAllLayers) FileContentsByLocation(location file.Location) (io.ReadCloser, error) {
return i.squashed.FileContentsByLocation(location)
}

func (i *ContainerImageSquashAllLayers) FilesByMIMEType(types ...string) ([]file.Location, error) {
squashedLocations, err := i.squashed.FilesByMIMEType(types...)
if err != nil {
return nil, err
}

allLayersLocations, err := i.allLayers.FilesByMIMEType(types...)
if err != nil {
return nil, err
}

var mergedLocations []file.Location
for _, l := range squashedLocations {
mergedLocations = append(mergedLocations, file.Location{
LocationData: l.LocationData,
LocationMetadata: file.LocationMetadata{
Annotations: l.Annotations,
IsSquashedLayer: true,
},
})
}

for _, l := range allLayersLocations {
mergedLocations = append(mergedLocations, file.Location{
LocationData: l.LocationData,
LocationMetadata: file.LocationMetadata{
Annotations: l.Annotations,
IsSquashedLayer: false,
},
})
}

return mergedLocations, nil
}

func (i *ContainerImageSquashAllLayers) AllLocations(ctx context.Context) <-chan file.Location {
return i.squashed.AllLocations(ctx)
}

func (i *ContainerImageSquashAllLayers) FileMetadataByLocation(location file.Location) (file.Metadata, error) {
return i.squashed.FileMetadataByLocation(location)
}
17 changes: 11 additions & 6 deletions syft/pkg/collection.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@ import (

// Collection represents a collection of Packages.
type Collection struct {
byID map[artifact.ID]Package
idsByName map[string]orderedIDSet
idsByType map[Type]orderedIDSet
idsByPath map[string]orderedIDSet // note: this is real path or virtual path
lock sync.RWMutex
byID map[artifact.ID]Package
idsByName map[string]orderedIDSet
idsByType map[Type]orderedIDSet
idsByPath map[string]orderedIDSet // note: this is real path or virtual path
isSquashAllLayer bool
lock sync.RWMutex
}

// NewCollection returns a new empty Collection
Expand Down Expand Up @@ -285,12 +286,16 @@ func (c *Collection) Sorted(types ...Type) (pkgs []Package) {
for p := range c.Enumerate(types...) {
pkgs = append(pkgs, p)
}

Sort(pkgs)

return pkgs
}

// IsSquashedAllLayers return if the package collection were used with squashed all layers resolver
func (c *Collection) IsSquashedAllLayers() bool {
return c.isSquashAllLayer
}

type orderedIDSet struct {
slice []artifact.ID
}
Expand Down
5 changes: 5 additions & 0 deletions syft/source/scope.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,15 @@ const (
SquashedScope Scope = "squashed"
// AllLayersScope indicates to catalog content on all layers, regardless if it is visible from the container at runtime.
AllLayersScope Scope = "all-layers"
// SquashWithAllLayersScope indicates to catalog content on all layers, but only include content visible from the squashed filesystem representation.
SquashWithAllLayersScope Scope = "squash-with-all-layers"
)

// AllScopes is a slice containing all possible scope options
var AllScopes = []Scope{
SquashedScope,
AllLayersScope,
SquashWithAllLayersScope,
}

// ParseScope returns a scope as indicated from the given string.
Expand All @@ -27,6 +30,8 @@ func ParseScope(userStr string) Scope {
return SquashedScope
case "alllayers", AllLayersScope.String():
return AllLayersScope
case "squash-with-all-layers", strings.ToLower(SquashWithAllLayersScope.String()):
return SquashWithAllLayersScope
}
return UnknownScope
}
Expand Down
2 changes: 2 additions & 0 deletions syft/source/stereoscopesource/image_source.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ func (s stereoscopeImageSource) FileResolver(scope source.Scope) (file.Resolver,
res, err = fileresolver.NewFromContainerImageSquash(s.image)
case source.AllLayersScope:
res, err = fileresolver.NewFromContainerImageAllLayers(s.image)
case source.SquashWithAllLayersScope:
res, err = fileresolver.NewFromContainerImageSquashAllLayers(s.image)
default:
return nil, fmt.Errorf("bad image scope provided: %+v", scope)
}
Expand Down
Loading