forked from ooni/probe-cli
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(minipipeline): introduce "classic" observations filtering (ooni#…
…1402) Classic filtering is an `WebObservationsContainer` filtering technique that takes in input a `WebObservationsContainer` and only keeps DNS lookups using `getaddrinfo` and endpoints whose IP address has been discovered using `getaddrinfo`. By applying this technique, we reduce the richer dataset produced by Web Connectivity LTE to a smaller dataset comparable to what Web Connectivity v0.4 would return. In turn, by focusing the analysis on the reduced dataset, we hope to emulate the results produced by v0.4 for backward compatible test keys. I named this feature "classic" because it's what we used to do and I don't want to call it legacy. Part of ooni/probe#2634.
- Loading branch information
1 parent
2fc7125
commit 23a6844
Showing
74 changed files
with
7,023 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
{ | ||
"DNSExperimentFailure": null, | ||
"DNSTransactionsWithBogons": {}, | ||
"DNSTransactionsWithUnexpectedFailures": null, | ||
"DNSPossiblyInvalidAddrs": {}, | ||
"DNSPossiblyInvalidAddrsClassic": {}, | ||
"DNSPossiblyNonexistingDomains": null, | ||
"HTTPDiffBodyProportionFactor": 1, | ||
"HTTPDiffStatusCodeMatch": true, | ||
"HTTPDiffTitleDifferentLongWords": {}, | ||
"HTTPDiffUncommonHeadersIntersection": { | ||
"x-drupal-cache": true, | ||
"x-generator": true | ||
}, | ||
"HTTPFinalResponsesWithControl": { | ||
"4": true | ||
}, | ||
"HTTPFinalResponsesWithTLS": { | ||
"4": true | ||
}, | ||
"TCPTransactionsWithUnexpectedTCPConnectFailures": {}, | ||
"TCPTransactionsWithUnexpectedTLSHandshakeFailures": {}, | ||
"TCPTransactionsWithUnexpectedHTTPFailures": {}, | ||
"TCPTransactionsWithUnexplainedUnexpectedFailures": {} | ||
} |
111 changes: 111 additions & 0 deletions
111
internal/cmd/minipipeline/testdata/observations_classic.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
{ | ||
"DNSLookupFailures": [], | ||
"DNSLookupSuccesses": [ | ||
{ | ||
"DNSTransactionID": 1, | ||
"DNSDomain": "nexa.polito.it", | ||
"DNSLookupFailure": "", | ||
"DNSQueryType": "ANY", | ||
"DNSEngine": "getaddrinfo", | ||
"IPAddress": "130.192.16.171", | ||
"IPAddressASN": 137, | ||
"IPAddressBogon": false, | ||
"EndpointTransactionID": null, | ||
"EndpointProto": null, | ||
"EndpointPort": null, | ||
"EndpointAddress": null, | ||
"TCPConnectFailure": null, | ||
"TLSHandshakeFailure": null, | ||
"TLSServerName": null, | ||
"HTTPRequestURL": null, | ||
"HTTPFailure": null, | ||
"HTTPResponseStatusCode": null, | ||
"HTTPResponseBodyLength": null, | ||
"HTTPResponseBodyIsTruncated": null, | ||
"HTTPResponseHeadersKeys": null, | ||
"HTTPResponseLocation": null, | ||
"HTTPResponseTitle": null, | ||
"HTTPResponseIsFinal": null, | ||
"ControlDNSDomain": null, | ||
"ControlDNSLookupFailure": null, | ||
"ControlTCPConnectFailure": null, | ||
"MatchWithControlIPAddress": null, | ||
"MatchWithControlIPAddressASN": null, | ||
"ControlTLSHandshakeFailure": null, | ||
"ControlHTTPFailure": null, | ||
"ControlHTTPResponseStatusCode": null, | ||
"ControlHTTPResponseBodyLength": null, | ||
"ControlHTTPResponseHeadersKeys": null, | ||
"ControlHTTPResponseTitle": null | ||
} | ||
], | ||
"KnownTCPEndpoints": { | ||
"4": { | ||
"DNSTransactionID": 3, | ||
"DNSDomain": "nexa.polito.it", | ||
"DNSLookupFailure": "", | ||
"DNSQueryType": null, | ||
"DNSEngine": null, | ||
"IPAddress": "130.192.16.171", | ||
"IPAddressASN": 137, | ||
"IPAddressBogon": false, | ||
"EndpointTransactionID": 4, | ||
"EndpointProto": "tcp", | ||
"EndpointPort": "443", | ||
"EndpointAddress": "130.192.16.171:443", | ||
"TCPConnectFailure": "", | ||
"TLSHandshakeFailure": "", | ||
"TLSServerName": "nexa.polito.it", | ||
"HTTPRequestURL": "https://nexa.polito.it/", | ||
"HTTPFailure": "", | ||
"HTTPResponseStatusCode": 200, | ||
"HTTPResponseBodyLength": 36564, | ||
"HTTPResponseBodyIsTruncated": false, | ||
"HTTPResponseHeadersKeys": { | ||
"Cache-Control": true, | ||
"Content-Language": true, | ||
"Content-Type": true, | ||
"Date": true, | ||
"Etag": true, | ||
"Expires": true, | ||
"Last-Modified": true, | ||
"Link": true, | ||
"Server": true, | ||
"Vary": true, | ||
"X-Content-Type-Options": true, | ||
"X-Drupal-Cache": true, | ||
"X-Frame-Options": true, | ||
"X-Generator": true | ||
}, | ||
"HTTPResponseLocation": null, | ||
"HTTPResponseTitle": "Nexa Center for Internet \u0026 Society | Il centro Nexa è un centro di ricerca del Dipartimento di Automatica e Informatica del Politecnico di Torino", | ||
"HTTPResponseIsFinal": true, | ||
"ControlDNSDomain": null, | ||
"ControlDNSLookupFailure": null, | ||
"ControlTCPConnectFailure": "", | ||
"MatchWithControlIPAddress": true, | ||
"MatchWithControlIPAddressASN": true, | ||
"ControlTLSHandshakeFailure": "", | ||
"ControlHTTPFailure": "", | ||
"ControlHTTPResponseStatusCode": 200, | ||
"ControlHTTPResponseBodyLength": 36564, | ||
"ControlHTTPResponseHeadersKeys": { | ||
"Cache-Control": true, | ||
"Content-Language": true, | ||
"Content-Type": true, | ||
"Date": true, | ||
"Etag": true, | ||
"Expires": true, | ||
"Last-Modified": true, | ||
"Link": true, | ||
"Server": true, | ||
"Vary": true, | ||
"X-Content-Type-Options": true, | ||
"X-Drupal-Cache": true, | ||
"X-Frame-Options": true, | ||
"X-Generator": true | ||
}, | ||
"ControlHTTPResponseTitle": "Nexa Center for Internet \u0026 Society | Il centro Nexa è un centro di ricerca del Dipartimento di Automatica e Informatica del Politecnico di Torino" | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
package minipipeline | ||
|
||
// ClassicFilter takes in input a [*WebObservationsContainer] and returns in output | ||
// another [*WebObservationsContainer] where we only keep: | ||
// | ||
// 1. DNS lookups using getaddrinfo; | ||
// | ||
// 2. IP addresses discovered using getaddrinfo; | ||
// | ||
// 3. endpoints using such IP addresses. | ||
// | ||
// We use this filter to produce a backward compatible Web Connectivity analysis | ||
// when the input [*WebObservationsContainer] was built using LTE. | ||
// | ||
// The result should approximate what v0.4 would have measured. | ||
func ClassicFilter(input *WebObservationsContainer) (output *WebObservationsContainer) { | ||
output = &WebObservationsContainer{ | ||
DNSLookupFailures: []*WebObservation{}, | ||
DNSLookupSuccesses: []*WebObservation{}, | ||
KnownTCPEndpoints: map[int64]*WebObservation{}, | ||
knownIPAddresses: map[string]*WebObservation{}, | ||
} | ||
|
||
// DNSLookupFailures | ||
for _, entry := range input.DNSLookupFailures { | ||
if !utilsEngineIsGetaddrinfo(entry.DNSEngine) { | ||
continue | ||
} | ||
output.DNSLookupFailures = append(output.DNSLookupFailures, entry) | ||
} | ||
|
||
// DNSLookupSuccesses & knownIPAddresses | ||
for _, entry := range input.DNSLookupSuccesses { | ||
if !utilsEngineIsGetaddrinfo(entry.DNSEngine) { | ||
continue | ||
} | ||
ipAddr := entry.IPAddress.Unwrap() // it MUST be there | ||
output.DNSLookupSuccesses = append(output.DNSLookupSuccesses, entry) | ||
output.knownIPAddresses[ipAddr] = entry | ||
} | ||
|
||
// KnownTCPEndpoints | ||
for _, entry := range input.KnownTCPEndpoints { | ||
ipAddr := entry.IPAddress.Unwrap() // it MUST be there | ||
txid := entry.EndpointTransactionID.Unwrap() | ||
if output.knownIPAddresses[ipAddr] == nil { | ||
continue | ||
} | ||
output.KnownTCPEndpoints[txid] = entry | ||
} | ||
|
||
return | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
18 changes: 18 additions & 0 deletions
18
...ine/testdata/webconnectivity/generated/badSSLWithExpiredCertificate/analysis_classic.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
{ | ||
"DNSExperimentFailure": null, | ||
"DNSTransactionsWithBogons": {}, | ||
"DNSTransactionsWithUnexpectedFailures": null, | ||
"DNSPossiblyInvalidAddrs": {}, | ||
"DNSPossiblyInvalidAddrsClassic": {}, | ||
"DNSPossiblyNonexistingDomains": null, | ||
"HTTPDiffBodyProportionFactor": null, | ||
"HTTPDiffStatusCodeMatch": null, | ||
"HTTPDiffTitleDifferentLongWords": null, | ||
"HTTPDiffUncommonHeadersIntersection": null, | ||
"HTTPFinalResponsesWithControl": null, | ||
"HTTPFinalResponsesWithTLS": null, | ||
"TCPTransactionsWithUnexpectedTCPConnectFailures": {}, | ||
"TCPTransactionsWithUnexpectedTLSHandshakeFailures": {}, | ||
"TCPTransactionsWithUnexpectedHTTPFailures": null, | ||
"TCPTransactionsWithUnexplainedUnexpectedFailures": null | ||
} |
Oops, something went wrong.