-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathaws_key_scanner.go
171 lines (118 loc) · 4.52 KB
/
aws_key_scanner.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
// Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements;
// and to You under the Apache License, Version 2.0. See LICENSE in project root for full license + copyright.
package keynuker
import (
"regexp"
"log"
"github.com/derekparker/trie"
)
type ScannerState int
const (
ScannerStateInToken ScannerState = iota
ScannerStateOutsideToken
)
func Scan(accessKeysToScan []FetchedAwsAccessKey, content []byte) (leaks []FetchedAwsAccessKey, err error) {
return ScanViaRegexLoop(accessKeysToScan, content)
}
// This is grossly inefficient but it passes all of the tests
func ScanViaRegexLoop(accessKeysToScan []FetchedAwsAccessKey, content []byte) (leaks []FetchedAwsAccessKey, err error) {
for _, keyMetadata := range accessKeysToScan {
key := *keyMetadata.AccessKeyId
r, err := regexp.Compile(key)
if err != nil {
return []FetchedAwsAccessKey{}, err
}
if r.Match(content) {
leaks = append(leaks, keyMetadata)
}
}
return leaks,nil
}
// Scan the input in a single pass and use a trie prefix match to figure out if any aws keys match.
// This is approx 2x faster than ScanViaRegexLoop, but it still feels slow.
// TODO: try using lexmachine and see if it's a lot faster. Do some post-processing to deal with nested tokens (where one token contains another, which is one of the unit tests)
func ScanViaTrie(accessKeysToScan []FetchedAwsAccessKey, content []byte) (leaks []FetchedAwsAccessKey, err error) {
debug := false
uniqueLeaks := map[string]FetchedAwsAccessKey{}
leaks = []FetchedAwsAccessKey{}
trie := trie.New()
exactMatch := map[string]FetchedAwsAccessKey{}
for _, keyMetadata := range accessKeysToScan {
// Get the access key id
key := *keyMetadata.AccessKeyId
// Add it to the trie to be indexed
trie.Add(key, keyMetadata)
exactMatch[key] = keyMetadata
}
runes := []rune(string(content))
currentScannerState := ScannerStateOutsideToken
scanStartPointer := 0
scanEndPointer := 1
for {
if scanStartPointer >= len(runes) {
break
}
if scanEndPointer > len(runes) {
scanEndPointer = len(runes)
}
currentStateName := ""
if currentScannerState == ScannerStateInToken {
currentStateName = "ScannerStateInToken"
} else {
currentStateName = "ScannerStateOutsideToken"
}
if debug {
log.Printf("runes[%d:%d] - state: %s", scanStartPointer, scanEndPointer, currentStateName)
}
currentRunes := runes[scanStartPointer:scanEndPointer]
if debug {
log.Printf("currentRunes: |%v|", string(currentRunes))
}
switch trie.HasKeysWithPrefix(string(currentRunes)) {
case true:
if debug {
log.Printf("trie.HasKeysWithPrefix: |%v|. Switching state -> ScannerStateInToken", string(currentRunes))
}
currentScannerState = ScannerStateInToken
accessKeyMeta, found := exactMatch[string(currentRunes)]
if found {
if debug {
log.Printf("trie exact match: %v", *accessKeyMeta.AccessKeyId)
}
uniqueLeaks[string(currentRunes)] = accessKeyMeta
}
// Keep consuming runes by expanding the scanning window to be one rune larger
scanEndPointer += 1
case false: // trie.HasKeysWithPrefix(string(currentRunes)) == false
switch currentScannerState {
case ScannerStateInToken:
// There are no prefix matches, and we were in a possible token, so we're out of the token now
currentScannerState = ScannerStateOutsideToken
// Suppose the goal is to find these two keys [ROX, OA5]
// And the input text is: "ROA5XYZ"
// It will be in a token match state for the "RO" since the prefix will match "ROX",
// but when it gets to the "A" in "ROA5", the "ROA" prefix will not match anything.
// At this point, slide the start of the window one rune to the right so it's now aligned to "OA5"
scanStartPointer += 1
case ScannerStateOutsideToken:
// There are no prefix matches, and we weren't in a token
if scanEndPointer > (scanStartPointer + 1) {
// This can happen if we just recently partially or fully matched a token, and have a wide scanning window,
// but then on reading the next rune it doesn't match any tokens, but still has this wide scanning window.
// Shrink the window back down until it's reading a single character. It does do some (probably)
// unecessary scanning
scanEndPointer -= 1
} else {
// Normal case
// Slide the scanning window one rune to the right
scanStartPointer += 1
scanEndPointer += 1
}
}
}
}
for _, accessKeyMeta := range uniqueLeaks {
leaks = append(leaks, accessKeyMeta)
}
return leaks, nil
}