diff --git a/api/scanner/scan.go b/api/scanner/scan.go index cfdc95b..ffe63b2 100644 --- a/api/scanner/scan.go +++ b/api/scanner/scan.go @@ -145,14 +145,15 @@ func (s *ScanSpec) ScanLicenseText(licenseLibrary *licenses.LicenseLibrary, resu // find the licenses in the normalized text and return a list of SPDX IDs // in case of an error, return as much as we have along with an error - results, err := identifier.Identify(identifier.Options{}, licenseLibrary, normalizedData) + identifierResults := identifier.IdentifierResults{} + err := identifier.Identify(&identifierResults, identifier.Options{}, licenseLibrary, normalizedData) if err != nil { r.Error = err return r } // if the results are empty, add unknown as the SPDX ID - if len(results.Matches) == 0 { + if len(identifierResults.Matches) == 0 { // Add NOASSERTION to the LicenseChoice of the SPDX Name for this scan r.CycloneDXLicenses = append(r.CycloneDXLicenses, cyclonedx.LicenseChoice{ License: &cyclonedx.License{ @@ -161,7 +162,7 @@ func (s *ScanSpec) ScanLicenseText(licenseLibrary *licenses.LicenseLibrary, resu }) } else { // iterate over the list of matches and maintain the unique list of SPDX IDs in the result - for id := range results.Matches { + for id := range identifierResults.Matches { // Add an SPDX ID from the match // update the LicenseChoice to include each new match diff --git a/cmd/root.go b/cmd/root.go index 525a621..72b80c5 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -7,6 +7,7 @@ import ( "fmt" "os" "sort" + "strings" "time" "github.com/CycloneDX/license-scanner/configurer" @@ -161,6 +162,32 @@ func listLicenses(cfg *viper.Viper) error { return nil } +func getCommandLineOptions(cfg *viper.Viper) (options identifier.Options) { + options = identifier.Options{ + ForceResult: true, + // Default to all pattern matching functions + Patterns: configurer.SUPPORTED_MATCH_PATTERNS, + Enhancements: identifier.Enhancements{ + AddNotes: "", + AddTextBlocks: true, + FlagAcceptable: cfg.GetBool(configurer.AcceptableFlag), + FlagCopyrights: cfg.GetBool(configurer.CopyrightsFlag), + FlagKeywords: cfg.GetBool(configurer.KeywordsFlag), + }, + } + + // Parse out patterns into easy-to-test map + tmpPatterns := cfg.GetString(configurer.PatternsFlag) + if tmpPatterns != "" { + options.Patterns = strings.Split(tmpPatterns, ",") + } + options.PatternMap = make(map[string]bool) + for _, pattern := range options.Patterns { + options.PatternMap[pattern] = true + } + return +} + func findLicensesInDirectory(cfg *viper.Viper) error { d := cfg.GetString(configurer.DirFlag) @@ -171,17 +198,8 @@ func findLicensesInDirectory(cfg *viper.Viper) error { if err := licenseLibrary.AddAll(); err != nil { return err } - - options := identifier.Options{ - ForceResult: true, - Enhancements: identifier.Enhancements{ - AddNotes: "", - AddTextBlocks: true, - FlagAcceptable: cfg.GetBool(configurer.AcceptableFlag), - FlagCopyrights: cfg.GetBool(configurer.CopyrightsFlag), - FlagKeywords: cfg.GetBool(configurer.KeywordsFlag), - }, - } + // retrieve command line options from flags + options := getCommandLineOptions(cfg) results, err := identifier.IdentifyLicensesInDirectory(d, options, licenseLibrary) if err != nil { @@ -240,16 +258,8 @@ func findLicensesInFile(cfg *viper.Viper, f string) error { return err } - options := identifier.Options{ - ForceResult: true, - Enhancements: identifier.Enhancements{ - AddNotes: "", - AddTextBlocks: true, - FlagAcceptable: cfg.GetBool(configurer.AcceptableFlag), - FlagCopyrights: cfg.GetBool(configurer.CopyrightsFlag), - FlagKeywords: cfg.GetBool(configurer.KeywordsFlag), - }, - } + // retrieve command line options from flags + options := getCommandLineOptions(cfg) results, err := identifier.IdentifyLicensesInFile(f, options, licenseLibrary) if err != nil { diff --git a/configurer/configurer.go b/configurer/configurer.go index f118ad8..288d9e9 100644 --- a/configurer/configurer.go +++ b/configurer/configurer.go @@ -8,9 +8,9 @@ import ( "path" "path/filepath" "runtime" + "strings" "github.com/spf13/pflag" - "github.com/spf13/viper" ) @@ -35,8 +35,19 @@ const ( SpdxPathFlag = "spdxPath" CustomFlag = "custom" CustomPathFlag = "customPath" + PatternsFlag = "patterns" +) + +const ( + MATCH_PATTERN_SPDX_ID = "spdx-id" + MATCH_PATTERN_ALIAS = "alias" + MATCH_PATTERN_URL = "url" + MATCH_PATTERN_PRIMARY = "primary" + MATCH_PATTERN_ASSOCIATED = "associated" ) +var SUPPORTED_MATCH_PATTERNS = []string{MATCH_PATTERN_SPDX_ID, MATCH_PATTERN_ALIAS, MATCH_PATTERN_URL, MATCH_PATTERN_PRIMARY, MATCH_PATTERN_ASSOCIATED} + var ( _, thisFile, _, _ = runtime.Caller(0) // Dirs/files are relative to this file thisDir = filepath.Dir(thisFile) @@ -147,4 +158,8 @@ func AddDefaultFlags(flagSet *pflag.FlagSet) { flagSet.String(SpdxPathFlag, "", "Path to external SPDX templates to use") flagSet.String(CustomFlag, DefaultResource, "Custom templates to use") flagSet.String(CustomPathFlag, "", "Path to external custom templates to use") + + help_msg_pattern := fmt.Sprintf("Comma-separated list of license pattern-matching functions to execute. One or more of: [%v]; defaults to all patterns.", + strings.Join(SUPPORTED_MATCH_PATTERNS, ", ")) + flagSet.StringP(PatternsFlag, "", "", help_msg_pattern) } diff --git a/go.mod b/go.mod index 26522fd..9a3df19 100644 --- a/go.mod +++ b/go.mod @@ -4,25 +4,25 @@ go 1.18 require ( github.com/CycloneDX/cyclonedx-go v0.7.1 - github.com/CycloneDX/sbom-utility v0.9.3 - github.com/google/go-cmp v0.5.8 - github.com/spf13/cobra v1.6.1 + github.com/CycloneDX/sbom-utility v0.16.0 + github.com/google/go-cmp v0.5.9 + github.com/spf13/cobra v1.7.0 github.com/spf13/pflag v1.0.5 github.com/spf13/viper v1.12.0 - golang.org/x/exp v0.0.0-20220428152302-39d4317da171 + golang.org/x/exp v0.0.0-20231006140011-7918f672742d golang.org/x/sync v0.0.0-20220601150217-0de741cfad7f ) require ( github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect - github.com/fatih/color v1.14.1 // indirect + github.com/fatih/color v1.15.0 // indirect github.com/fsnotify/fsnotify v1.5.4 // indirect github.com/hashicorp/hcl v1.0.0 // indirect github.com/hokaccha/go-prettyjson v0.0.0-20211117102719-0474bc63780f // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/magiconair/properties v1.8.6 // indirect github.com/mattn/go-colorable v0.1.13 // indirect - github.com/mattn/go-isatty v0.0.17 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/pelletier/go-toml v1.9.5 // indirect github.com/pelletier/go-toml/v2 v2.0.1 // indirect @@ -31,8 +31,8 @@ require ( github.com/spf13/cast v1.5.0 // indirect github.com/spf13/jwalterweatherman v1.1.0 // indirect github.com/subosito/gotenv v1.3.0 // indirect - golang.org/x/sys v0.4.0 // indirect - golang.org/x/text v0.3.8 // indirect + golang.org/x/sys v0.13.0 // indirect + golang.org/x/text v0.9.0 // indirect gopkg.in/ini.v1 v1.66.4 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index 759c1ac..b36e3d9 100644 --- a/go.sum +++ b/go.sum @@ -40,8 +40,8 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03 github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/CycloneDX/cyclonedx-go v0.7.1 h1:5w1SxjGm9MTMNTuRbEPyw21ObdbaagTWF/KfF0qHTRE= github.com/CycloneDX/cyclonedx-go v0.7.1/go.mod h1:N/nrdWQI2SIjaACyyDs/u7+ddCkyl/zkNs8xFsHF2Ps= -github.com/CycloneDX/sbom-utility v0.9.3 h1:kbseWT30dvnnyR1pMg1uqXBmIVXMcf00EMbXpH26pvM= -github.com/CycloneDX/sbom-utility v0.9.3/go.mod h1:n9hQR2A0Qa7EnC25BJEhY5sDXqUPwMWyAGcypB/H3ik= +github.com/CycloneDX/sbom-utility v0.16.0 h1:EpHNoLmw3vfVQWFFflFHmwo7mCWO833qRm+AlbG8wXY= +github.com/CycloneDX/sbom-utility v0.16.0/go.mod h1:+EfZPoy8g6iGhVpo5cH+y2la5pQ3qKImGjHMy2xA+tM= github.com/bradleyjkemp/cupaloy/v2 v2.8.0 h1:any4BmKE+jGIaMpnU8YgH/I2LPiLBufr6oMMlVBbn9M= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= @@ -62,8 +62,8 @@ github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1m github.com/envoyproxy/go-control-plane v0.9.7/go.mod h1:cwu0lG7PUMfa9snN8LXBig5ynNVH9qI8YYLbd1fK2po= github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/fatih/color v1.14.1 h1:qfhVLaG5s+nCROl1zJsZRxFeYrHLqWroPOQ8BWiNb4w= -github.com/fatih/color v1.14.1/go.mod h1:2oHN61fhTpgcxD3TSWCgKDiH1+x4OiDVVGH8WlgGZGg= +github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs= +github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw= github.com/frankban/quicktest v1.14.3 h1:FJKSZTDHjyhriyC81FLQ0LY93eSai0ZyR/ZIkd3ZUKE= github.com/fsnotify/fsnotify v1.5.4 h1:jRbGcIw6P2Meqdwuo0H1p6JVLbL5DHKAKlYndzMwVZI= github.com/fsnotify/fsnotify v1.5.4/go.mod h1:OVB6XrOHzAwXMpEM7uPOzcehqUV2UqJxmVXmkdnm1bU= @@ -106,8 +106,8 @@ github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= -github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= github.com/google/martian/v3 v3.1.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= @@ -134,7 +134,6 @@ github.com/hokaccha/go-prettyjson v0.0.0-20211117102719-0474bc63780f h1:7LYC+Yfk github.com/hokaccha/go-prettyjson v0.0.0-20211117102719-0474bc63780f/go.mod h1:pFlLw2CfqZiIBOx6BuCeRLCrfxBJipTY0nIOF/VbGcI= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= -github.com/inconshreveable/mousetrap v1.0.1/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= @@ -151,8 +150,8 @@ github.com/magiconair/properties v1.8.6/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPK github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= -github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng= -github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8= @@ -172,8 +171,8 @@ github.com/spf13/afero v1.8.2 h1:xehSyVa0YnHWsJ49JFljMpg1HX19V6NDZ1fkm1Xznbo= github.com/spf13/afero v1.8.2/go.mod h1:CtAatgMJh6bJEIs48Ay/FOnkljP3WeGUG0MC1RfAqwo= github.com/spf13/cast v1.5.0 h1:rj3WzYc11XZaIZMPKmwP96zkFEnnAmV8s6XbB2aY32w= github.com/spf13/cast v1.5.0/go.mod h1:SpXXQ5YoyJw6s3/6cMTQuxvgRl3PCJiyaX9p6b155UU= -github.com/spf13/cobra v1.6.1 h1:o94oiPyS4KD1mPy2fmcYYHHfCxLqYjJOhGsCHFZtEzA= -github.com/spf13/cobra v1.6.1/go.mod h1:IOw/AERYS7UzyrGinqmz6HLUo219MORXGxhbaJUqzrY= +github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I= +github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0= github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk= github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= @@ -216,8 +215,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= -golang.org/x/exp v0.0.0-20220428152302-39d4317da171 h1:TfdoLivD44QwvssI9Sv1xwa5DcL5XQr4au4sZ2F2NV4= -golang.org/x/exp v0.0.0-20220428152302-39d4317da171/go.mod h1:lgLbSvA5ygNOMpwM/9anMpWVlVJ7Z+cHWq/eFuinpGE= +golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI= +golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= @@ -329,8 +328,9 @@ golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.4.0 h1:Zr2JFtRQNX3BCZ8YtxRE9hNJYC8J6I1MVbMg6owUp18= -golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= +golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -338,8 +338,8 @@ golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.8 h1:nAL+RVCQ9uMn3vJZbV+MRnydTJFPf8qqY42YiA6MrqY= -golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= +golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -483,8 +483,8 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4= google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/ini.v1 v1.66.4 h1:SsAcf+mM7mRZo2nJNGt8mZCjG8ZRaNGMURJw7BsIST4= gopkg.in/ini.v1 v1.66.4/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= diff --git a/identifier/identifier.go b/identifier/identifier.go index b640db5..0d7d301 100644 --- a/identifier/identifier.go +++ b/identifier/identifier.go @@ -3,9 +3,9 @@ package identifier import ( + "bufio" "fmt" "io/fs" - "io/ioutil" "os" "path/filepath" "regexp" @@ -17,6 +17,7 @@ import ( "golang.org/x/exp/slices" "golang.org/x/sync/errgroup" + "github.com/CycloneDX/license-scanner/configurer" "github.com/CycloneDX/license-scanner/licenses" "github.com/CycloneDX/license-scanner/normalizer" ) @@ -29,9 +30,26 @@ var ( type Options struct { ForceResult bool OmitBlocks bool + Patterns []string + PatternMap map[string]bool Enhancements Enhancements } +// Parse out patterns into easy-to-test map +func (options *Options) ParsePatternsFromSlice(patterns []string) { + options.PatternMap = make(map[string]bool) + for _, pattern := range patterns { + options.PatternMap[pattern] = true + } +} + +func (options *Options) ParsePatternsFromString(patterns string) { + if patterns != "" { + options.Patterns = strings.Split(patterns, ",") + } + options.ParsePatternsFromSlice(options.Patterns) +} + type licenseMatch struct { LicenseId string Match Match @@ -66,62 +84,135 @@ type Block struct { Matches []string } -func Identify(options Options, licenseLibrary *licenses.LicenseLibrary, normalizedData normalizer.NormalizationData) (IdentifierResults, error) { +func Identify(identifierResults *IdentifierResults, options Options, licenseLibrary *licenses.LicenseLibrary, normalizedData normalizer.NormalizationData) (err error) { // find the licenses in the normalized text and return a list of SPDX IDs // in case of an error, return as much as we have along with an error - licenseResults, err := findAllLicensesInNormalizedData(licenseLibrary, normalizedData) - if err != nil { - return IdentifierResults{}, err + if err = findAllLicensesInNormalizedData(identifierResults, licenseLibrary, normalizedData); err != nil { + return } - if err := FromOptions(&licenseResults, options.Enhancements, licenseLibrary); err != nil { - return IdentifierResults{}, err + if err = FromOptions(identifierResults, options.Enhancements, licenseLibrary); err != nil { + return } - if err := applyMutatorLicenses(licenseLibrary.LicenseMap, &licenseResults); err != nil { - return IdentifierResults{}, err + if err = applyMutatorLicenses(licenseLibrary.LicenseMap, identifierResults); err != nil { + return } + // TODO: document why we are initializing here and why this element is never used if options.OmitBlocks { - licenseResults.Blocks = []Block{} + identifierResults.Blocks = []Block{} } - return licenseResults, err + return } -func IdentifyLicensesInString(input string, options Options, licenseLibrary *licenses.LicenseLibrary) (IdentifierResults, error) { +func IdentifyLicensesInString(identifierResults *IdentifierResults, input string, options Options, licenseLibrary *licenses.LicenseLibrary) (err error) { // instantiate normalizedData with the input license text normalizedData := normalizer.NormalizationData{ OriginalText: input, } // normalize the input license text - if err := normalizedData.NormalizeText(); err != nil { - return IdentifierResults{}, err + if err = normalizedData.NormalizeText(); err != nil { + return } - return Identify(options, licenseLibrary, normalizedData) + return Identify(identifierResults, options, licenseLibrary, normalizedData) } -func IdentifyLicensesInFile(filePath string, options Options, licenseLibrary *licenses.LicenseLibrary) (IdentifierResults, error) { - fi, err := os.Stat(filePath) - if err != nil { - return IdentifierResults{}, err +func IdentifyLicensesInFile(filePath string, options Options, licenseLibrary *licenses.LicenseLibrary) (identifierResults IdentifierResults, err error) { + // Carry filepath used for matches in result set + identifierResults = IdentifierResults{} + identifierResults.File = filePath + identifierResults.Matches = make(map[string][]Match) + + // Verify filepath exists + fi, errStat := os.Stat(filePath) + if errStat != nil { + return identifierResults, errStat } + // Only scan files of reasonable sizes + // TODO: make the max. size configurable if fi.Size() > 1000000 { - Logger.Errorf("file too large (%v > 1000000)", fi.Size()) // log error, but return nil - return IdentifierResults{}, nil + err = Logger.Errorf("file too large (%v > 1000000)", fi.Size()) // log error, but return nil + return + } + + // Pattern match: "spdx-id" + if options.PatternMap[configurer.MATCH_PATTERN_SPDX_ID] { + Logger.Infof("Matching pattern: `%s`\n", configurer.MATCH_PATTERN_SPDX_ID) + // Scan for match in first 10 lines of file + // TODO: make first X lines configurable + licenseMatches, errSpdx := findSPDXIdentifierInFile(filePath, 10) + if errSpdx != nil { + err = errSpdx + return + } + if len(licenseMatches) > 0 { + spdxId := licenseMatches[0].LicenseId + sliceMatches := []Match{licenseMatches[0].Match} + identifierResults.Matches[spdxId] = sliceMatches + } } - b, err := ioutil.ReadFile(filePath) + // Pattern match: includes "alias", "url", "primary", "associated" + // We will need to read the entire file into memory + if options.PatternMap[configurer.MATCH_PATTERN_PRIMARY] || + options.PatternMap[configurer.MATCH_PATTERN_URL] || + options.PatternMap[configurer.MATCH_PATTERN_ALIAS] || + options.PatternMap[configurer.MATCH_PATTERN_ASSOCIATED] { + var bytes []byte + bytes, err = os.ReadFile(filePath) + if err != nil { + return IdentifierResults{}, err + } + input := string(bytes) + err = IdentifyLicensesInString(&identifierResults, input, options, licenseLibrary) + } + + return +} + +const SPDX_ID_KEY = "SPDX-License-Identifier:" + +var LEN_SPDX_ID_KEY = len(SPDX_ID_KEY) + +func findSPDXIdentifierInFile(filePath string, maxLines int) (licenseMatches []licenseMatch, err error) { + var file *os.File + // Note: parent function has already verified the file exists + // TODO: this function should perhaps accept a file handle and allow the parent to open and provide it + file, err = os.Open(filePath) if err != nil { - return IdentifierResults{}, err + Logger.Errorf("cannot open file: %s", filePath) + return } - input := string(b) + defer file.Close() + + fileReader := bufio.NewReader(file) + fileScanner := bufio.NewScanner(fileReader) - result, err := IdentifyLicensesInString(input, options, licenseLibrary) - result.File = filePath - return result, err + fileScanner.Split(bufio.ScanLines) + var foundLine string + for i := 0; i < maxLines; i++ { + fileScanner.Scan() + if strings.Contains(fileScanner.Text(), SPDX_ID_KEY) { + foundLine = fileScanner.Text() + break + } + } + if foundLine != "" { + var match licenseMatch + idx := strings.Index(foundLine, SPDX_ID_KEY) + // find start index of where the actual SPDX ID is by + // adding in the length of the SPDX License Identifier key + // Then trim any whitespace to extract the actual SPDX ID value + idx += LEN_SPDX_ID_KEY + spdxIdPlus := foundLine[idx:] + match.LicenseId = strings.TrimSpace(spdxIdPlus) + licenseMatches = append(licenseMatches, match) + } + return } func IdentifyLicensesInDirectory(dirPath string, options Options, licenseLibrary *licenses.LicenseLibrary) (ret []IdentifierResults, err error) { @@ -184,23 +275,24 @@ func IdentifyLicensesInDirectory(dirPath string, options Options, licenseLibrary return ret, err } -func findAllLicensesInNormalizedData(licenseLibrary *licenses.LicenseLibrary, normalizedData normalizer.NormalizationData) (IdentifierResults, error) { +func findAllLicensesInNormalizedData(identifierResults *IdentifierResults, licenseLibrary *licenses.LicenseLibrary, normalizedData normalizer.NormalizationData) (err error) { // initialize the result with original license text, normalized license text, and hash (md5, sha256, and sha512) - ret := IdentifierResults{ - OriginalText: normalizedData.OriginalText, - NormalizedText: normalizedData.NormalizedText, - Hash: normalizedData.Hash, - } + identifierResults.OriginalText = normalizedData.OriginalText + identifierResults.NormalizedText = normalizedData.NormalizedText + identifierResults.Hash = normalizedData.Hash // LicenseID-to-matches map to return - ret.Matches = make(map[string][]Match) + if identifierResults.Matches == nil { + identifierResults.Matches = make(map[string][]Match) + } + // List with LicenseID and indexes for generating text blocks var licensesMatched []licenseMatch for id, lic := range licenseLibrary.LicenseMap { - matches, err := findLicenseInNormalizedData(lic, normalizedData, licenseLibrary) - if err != nil { - return ret, err + matches, errFind := findLicenseInNormalizedData(lic, normalizedData, licenseLibrary) + if errFind != nil { + return errFind } // Sort the matches slice by start and end index. @@ -217,18 +309,17 @@ func findAllLicensesInNormalizedData(licenseLibrary *licenses.LicenseLibrary, no continue // remove duplicates } licensesMatched = append(licensesMatched, licenseMatch{LicenseId: id, Match: matches[i]}) - ret.Matches[id] = append(ret.Matches[id], matches[i]) + identifierResults.Matches[id] = append(identifierResults.Matches[id], matches[i]) } } // Generate Blocks. - blocks, err := generateTextBlocks(normalizedData.OriginalText, licensesMatched) + identifierResults.Blocks, err = generateTextBlocks(normalizedData.OriginalText, licensesMatched) if err != nil { - return ret, err + return err } - ret.Blocks = blocks - return ret, nil + return } func findLicenseInNormalizedData(lic licenses.License, normalizedData normalizer.NormalizationData, ll *licenses.LicenseLibrary) (licenseMatches []Match, err error) { diff --git a/identifier/identifier_spdx_test.go b/identifier/identifier_spdx_test.go index 424ebeb..1455edb 100644 --- a/identifier/identifier_spdx_test.go +++ b/identifier/identifier_spdx_test.go @@ -12,6 +12,7 @@ import ( "strings" "testing" + "github.com/CycloneDX/license-scanner/configurer" "github.com/CycloneDX/license-scanner/licenses" ) @@ -41,6 +42,8 @@ func Test_identifyLicensesInSPDXTestDataDirectory(t *testing.T) { if err := licenseLibrary.AddAllSPDX(); err != nil { t.Fatalf("licenseLibrary.AddAllSPDX() error = %v", err) } + // Initialize the match pattern options to include all functions + options.ParsePatternsFromSlice(configurer.SUPPORTED_MATCH_PATTERNS) results, err := IdentifyLicensesInDirectory(testDataDir, options, licenseLibrary) if err != nil { diff --git a/identifier/identifier_test.go b/identifier/identifier_test.go index 6cdf004..38f61fb 100644 --- a/identifier/identifier_test.go +++ b/identifier/identifier_test.go @@ -511,16 +511,17 @@ func Test_identifyLicensesInString(t *testing.T) { tt := tt t.Run(tt.name, func(t *testing.T) { t.Parallel() - got, err := IdentifyLicensesInString(tt.args.input, options, licenseLibrary) + identifierResults := IdentifierResults{} + err := IdentifyLicensesInString(&identifierResults, tt.args.input, options, licenseLibrary) if (err != nil) != tt.wantErr { t.Errorf("identifyLicensesInString() error = %v, wantErr %v", err, tt.wantErr) - } else if d := cmp.Diff(tt.want.Matches, got.Matches, cmp.AllowUnexported(Match{})); d != "" { + } else if d := cmp.Diff(tt.want.Matches, identifierResults.Matches, cmp.AllowUnexported(Match{})); d != "" { t.Errorf("Didn't get expected result: (-want, +got): %v", d) - } else if d := cmp.Diff(tt.want.CopyRightStatements, got.CopyRightStatements); d != "" { + } else if d := cmp.Diff(tt.want.CopyRightStatements, identifierResults.CopyRightStatements); d != "" { t.Errorf("Didn't get expected result: (-want, +got): %v", d) - } else if d := cmp.Diff(tt.want.Blocks, got.Blocks); d != "" { + } else if d := cmp.Diff(tt.want.Blocks, identifierResults.Blocks); d != "" { t.Errorf("Didn't get expected result: (-want, +got): %v", d) - } else if d := cmp.Diff(tt.want.Hash, got.Hash); d != "" { + } else if d := cmp.Diff(tt.want.Hash, identifierResults.Hash); d != "" { t.Errorf("Didn't get expected result: (-want, +got): %v", d) } }) @@ -604,12 +605,13 @@ func Test_identifyLicensesInStringPreChecks(t *testing.T) { if err := ll.AddAll(); err != nil { t.Fatalf("AddAll() error = %v", err) } - got, err := IdentifyLicensesInString(tt.input, options, ll) + identifierResults := IdentifierResults{} + err = IdentifyLicensesInString(&identifierResults, tt.input, options, ll) if err != nil { t.Errorf("identifyLicensesInString() error = %v", err) - } else if d := cmp.Diff(tt.want.Matches, got.Matches, cmp.AllowUnexported(Match{})); d != "" { + } else if d := cmp.Diff(tt.want.Matches, identifierResults.Matches, cmp.AllowUnexported(Match{})); d != "" { t.Errorf("Didn't get expected result: (-want, +got): %v", d) - } else if d := cmp.Diff(tt.want.Blocks, got.Blocks); d != "" { + } else if d := cmp.Diff(tt.want.Blocks, identifierResults.Blocks); d != "" { t.Errorf("Didn't get expected result: (-want, +got): %v", d) } }) diff --git a/normalizer/normalizer.go b/normalizer/normalizer.go index 38839eb..1d769d2 100644 --- a/normalizer/normalizer.go +++ b/normalizer/normalizer.go @@ -145,8 +145,8 @@ func NewNormalizationData(originalText string, isTemplate bool) *NormalizationDa func (n *NormalizationData) NormalizeText() error { // verify that the original text is a string with a length of at least one. if len(n.OriginalText) < 1 { - Logger.Error("Invalid text") - return fmt.Errorf("failed to normalize data: invalid input text with length %d", len(n.OriginalText)) + err := Logger.Errorf("failed to normalize data: invalid input text with length %d", len(n.OriginalText)) + return err } // Check if the text contains control characters indicative of binary or non-text files.