From 0d55aa76cff70d3f0848bed4333edb0b79b89592 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Fri, 1 Dec 2023 00:38:11 +0100 Subject: [PATCH] feat(webconnectivitylte): introduce classic analysis (#1420) This diff modifies Web Connectivity LTE to support different data analysis engines, refactors existing code as the "orig" engine, sets "orig" as the default, and introduces the "classic" analysis engine. Such a new engine aims to produce the same results of Web Connectivity v0.4 (as far as our test suite can test). Part of https://github.com/ooni/probe/issues/2634 --- .../webconnectivitylte/analysisclassic.go | 339 ++++++++++++++++++ .../webconnectivitylte/analysiscore.go | 15 + .../webconnectivitylte/analysisdns.go | 5 +- .../experiment/webconnectivitylte/testkeys.go | 5 +- internal/minipipeline/analysis.go | 2 +- 5 files changed, 361 insertions(+), 5 deletions(-) create mode 100644 internal/experiment/webconnectivitylte/analysisclassic.go diff --git a/internal/experiment/webconnectivitylte/analysisclassic.go b/internal/experiment/webconnectivitylte/analysisclassic.go new file mode 100644 index 000000000..937054e15 --- /dev/null +++ b/internal/experiment/webconnectivitylte/analysisclassic.go @@ -0,0 +1,339 @@ +package webconnectivitylte + +import ( + "github.com/ooni/probe-cli/v3/internal/minipipeline" + "github.com/ooni/probe-cli/v3/internal/model" + "github.com/ooni/probe-cli/v3/internal/optional" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +// AnalysisEngineClassic is an alternative analysis engine that aims to produce +// results that are backward compatible with Web Connectivity v0.4. +func AnalysisEngineClassic(tk *TestKeys, logger model.Logger) { + tk.analysisClassic(logger) +} + +func (tk *TestKeys) analysisClassic(logger model.Logger) { + // Since we run after all tasks have completed (or so we assume) we're + // not going to use any form of locking here. + + // 1. produce web observations + container := minipipeline.NewWebObservationsContainer() + container.IngestDNSLookupEvents(tk.Queries...) + container.IngestTCPConnectEvents(tk.TCPConnect...) + container.IngestTLSHandshakeEvents(tk.TLSHandshakes...) + container.IngestHTTPRoundTripEvents(tk.Requests...) + + // be defensive in case the control request or response are not defined + if tk.ControlRequest != nil && tk.Control != nil { + // Implementation note: the only error that can happen here is when the input + // doesn't parse as a URL, which should have caused measurer.go to fail + runtimex.Try0(container.IngestControlMessages(tk.ControlRequest, tk.Control)) + } + + // 2. filter observations to only include results collected by the + // system resolver, which approximates v0.4's results + classic := minipipeline.ClassicFilter(container) + + // 3. produce a web observations analysis based on the web observations + woa := minipipeline.AnalyzeWebObservations(classic) + + // 4. determine the DNS consistency + tk.DNSConsistency = analysisClassicDNSConsistency(woa) + + // 5. compute the HTTPDiff values + tk.setHTTPDiffValues(woa) + + // 6. compute blocking & accessible + analysisClassicComputeBlockingAccessible(woa, tk) +} + +func analysisClassicDNSConsistency(woa *minipipeline.WebAnalysis) optional.Value[string] { + switch { + case woa.DNSLookupUnexpectedFailure.Len() <= 0 && // no unexpected failures; and + woa.DNSLookupSuccessWithInvalidAddressesClassic.Len() <= 0 && // no invalid addresses; and + (woa.DNSLookupSuccessWithValidAddressClassic.Len() > 0 || // good addrs; or + woa.DNSLookupExpectedFailure.Len() > 0): // expected failures + return optional.Some("consistent") + + case woa.DNSLookupSuccessWithInvalidAddressesClassic.Len() > 0 || // unexpected addrs; or + woa.DNSLookupUnexpectedFailure.Len() > 0: // unexpected failures + return optional.Some("inconsistent") + + default: + return optional.None[string]() // none of the above + } +} + +func (tk *TestKeys) setHTTPDiffValues(woa *minipipeline.WebAnalysis) { + const bodyProportionFactor = 0.7 + if !woa.HTTPFinalResponseDiffBodyProportionFactor.IsNone() { + value := woa.HTTPFinalResponseDiffBodyProportionFactor.Unwrap() > bodyProportionFactor + tk.BodyLengthMatch = &value + } + + if !woa.HTTPFinalResponseDiffUncommonHeadersIntersection.IsNone() { + value := len(woa.HTTPFinalResponseDiffUncommonHeadersIntersection.Unwrap()) > 0 + tk.HeadersMatch = &value + } + + if !woa.HTTPFinalResponseDiffStatusCodeMatch.IsNone() { + value := woa.HTTPFinalResponseDiffStatusCodeMatch.Unwrap() + tk.StatusCodeMatch = &value + } + + if !woa.HTTPFinalResponseDiffTitleDifferentLongWords.IsNone() { + value := len(woa.HTTPFinalResponseDiffTitleDifferentLongWords.Unwrap()) <= 0 + tk.TitleMatch = &value + } +} + +type analysisClassicTestKeysProxy interface { + // setBlockingString sets blocking to a string. + setBlockingString(value string) + + // setBlockingNil sets blocking to nil. + setBlockingNil() + + // setBlockingFalse sets Blocking to false. + setBlockingFalse() + + // httpDiff returns true if there's an http-diff. + httpDiff() bool +} + +var _ analysisClassicTestKeysProxy = &TestKeys{} + +// httpDiff implements analysisClassicTestKeysProxy. +func (tk *TestKeys) httpDiff() bool { + if tk.StatusCodeMatch != nil && *tk.StatusCodeMatch { + if tk.BodyLengthMatch != nil && *tk.BodyLengthMatch { + return false + } + if tk.HeadersMatch != nil && *tk.HeadersMatch { + return false + } + if tk.TitleMatch != nil && *tk.TitleMatch { + return false + } + // fallthrough + } + return true +} + +// setBlockingFalse implements analysisClassicTestKeysProxy. +func (tk *TestKeys) setBlockingFalse() { + tk.Blocking = false + tk.Accessible = true +} + +// setBlockingNil implements analysisClassicTestKeysProxy. +func (tk *TestKeys) setBlockingNil() { + if !tk.DNSConsistency.IsNone() && tk.DNSConsistency.Unwrap() == "inconsistent" { + tk.Blocking = "dns" + tk.Accessible = false + } else { + tk.Blocking = nil + tk.Accessible = nil + } +} + +// setBlockingString implements analysisClassicTestKeysProxy. +func (tk *TestKeys) setBlockingString(value string) { + if !tk.DNSConsistency.IsNone() && tk.DNSConsistency.Unwrap() == "inconsistent" { + tk.Blocking = "dns" + } else { + tk.Blocking = value + } + tk.Accessible = false +} + +func analysisClassicComputeBlockingAccessible(woa *minipipeline.WebAnalysis, tk analysisClassicTestKeysProxy) { + // minipipeline.NewLinearWebAnalysis produces a woa.Linear sorted + // + // 1. by descending TagDepth; + // + // 2. with TagDepth being equal, by descending [WebObservationType]; + // + // 3. with [WebObservationType] being equal, by ascending failure string; + // + // This means that you divide the list in groups like this: + // + // +------------+------------+------------+------------+ + // | TagDepth=3 | TagDepth=2 | TagDepth=1 | TagDepth=0 | + // +------------+------------+------------+------------+ + // + // Where TagDepth=3 is the last redirect and TagDepth=0 is the initial request. + // + // Each group is further divided as follows: + // + // +------+-----+-----+-----+ + // | HTTP | TLS | TCP | DNS | + // +------+-----+-----+-----+ + // + // Where each group may be empty. The first non-empty group is about the + // operation that failed for the current TagDepth. + // + // Within each group, successes sort before failures because the empty + // string has priority over non-empty strings. + // + // So, when walking the list from index 0 to index N, you encounter the + // latest redirects first, you observe the more complex operations first, + // and you see errors before failures. + for _, entry := range woa.Linear { + + // 1. As a special case, handle a "final" response first. We define "final" a + // successful response whose status code is like 2xx, 4xx, or 5xx. + if !entry.HTTPResponseIsFinal.IsNone() && entry.HTTPResponseIsFinal.Unwrap() { + + // 1.1. Handle the case of succesful response over TLS. + if !entry.TLSHandshakeFailure.IsNone() && entry.TLSHandshakeFailure.Unwrap() == "" { + tk.setBlockingFalse() + return + } + + // 1.2. Handle the case of missing HTTP control. + if entry.ControlHTTPFailure.IsNone() { + tk.setBlockingNil() + return + } + + // 1.3. Figure out whether the measurement and the control are close enough. + if !tk.httpDiff() { + tk.setBlockingFalse() + return + } + + // 1.4. There's something different in the two responses. + tk.setBlockingString("http-diff") + return + } + + // 2. Let's now focus on failed HTTP round trips. + if entry.Type == minipipeline.WebObservationTypeHTTPRoundTrip && + !entry.Failure.IsNone() && entry.Failure.Unwrap() != "" { + + // 2.1. Handle the case of a missing HTTP control. Maybe + // the control server is unreachable or blocked. + if entry.ControlHTTPFailure.IsNone() { + tk.setBlockingNil() + return + } + + // 2.2. Handle the case where both the probe and the control failed. + if entry.ControlHTTPFailure.Unwrap() != "" { + // TODO(bassosimone): returning this result is wrong and we + // should also set Accessible to false. However, v0.4 + // does this and we should play along for the A/B testing. + tk.setBlockingFalse() + return + } + + // 2.3. Handle the case where just the probe failed. + tk.setBlockingString("http-failure") + return + } + + // 3. Handle the case of TLS failure. + if entry.Type == minipipeline.WebObservationTypeTLSHandshake && + !entry.Failure.IsNone() && entry.Failure.Unwrap() != "" { + + // 3.1. Handle the case of missing TLS control information. The control + // only provides information for the first request. Once we start following + // redirects we do not have TLS/TCP/DNS control. + if entry.ControlTLSHandshakeFailure.IsNone() { + + // 3.1.1 Handle the case of missing an expectation about what + // accessing the website should lead to, which is set forth by + // the control accessing the website and telling us. + if entry.ControlHTTPFailure.IsNone() { + tk.setBlockingNil() + return + } + + // 3.1.2. Otherwise, if the control worked, that's blocking. + tk.setBlockingString("http-failure") + return + } + + // 3.2. Handle the case where both probe and control failed. + if entry.ControlTLSHandshakeFailure.Unwrap() != "" { + // TODO(bassosimone): returning this result is wrong and we + // should set Accessible and Blocking to false. However, v0.4 + // does this and we should play along for the A/B testing. + tk.setBlockingNil() + return + } + + // 3.3. Handle the case where just the probe failed. + tk.setBlockingString("http-failure") + return + } + + // 4. Handle the case of TCP failure. + if entry.Type == minipipeline.WebObservationTypeTCPConnect && + !entry.Failure.IsNone() && entry.Failure.Unwrap() != "" { + + // 4.1. Handle the case of missing TCP control info. + if entry.ControlTCPConnectFailure.IsNone() { + + // 4.1.1 Handle the case of missing an expectation about what + // accessing the website should lead to. + if entry.ControlHTTPFailure.IsNone() { + tk.setBlockingNil() + return + } + + // 4.1.2. Otherwise, if the control worked, that's blocking. + tk.setBlockingString("http-failure") + return + } + + // 4.2. Handle the case where both probe and control failed. + if entry.ControlTCPConnectFailure.Unwrap() != "" { + // TODO(bassosimone): returning this result is wrong and we + // should set Accessible and Blocking to false. However, v0.4 + // does this and we should play along for the A/B testing. + tk.setBlockingFalse() + return + } + + // 4.3. Handle the case where just the probe failed. + tk.setBlockingString("tcp_ip") + return + } + + // 5. Handle the case of DNS failure + if entry.Type == minipipeline.WebObservationTypeDNSLookup && + !entry.Failure.IsNone() && entry.Failure.Unwrap() != "" { + + // 5.1. Handle the case of missing DNS control info. + if entry.ControlDNSLookupFailure.IsNone() { + + // 5.1.1 Handle the case of missing an expectation about what + // accessing the website should lead to. + if entry.ControlHTTPFailure.IsNone() { + tk.setBlockingFalse() + return + } + + // 5.1.2. Otherwise, if the control worked, that's blocking. + tk.setBlockingString("dns") + return + } + + // 5.2. Handle the case where both probe and control failed. + if entry.ControlDNSLookupFailure.Unwrap() != "" { + // TODO(bassosimone): returning this result is wrong and we + // should set Accessible and Blocking to false. However, v0.4 + // does this and we should play along for the A/B testing. + tk.setBlockingFalse() + return + } + + // 5.3. Handle the case where just the probe failed. + tk.setBlockingString("dns") + return + } + } +} diff --git a/internal/experiment/webconnectivitylte/analysiscore.go b/internal/experiment/webconnectivitylte/analysiscore.go index 3c745ccd0..cc399ddb8 100644 --- a/internal/experiment/webconnectivitylte/analysiscore.go +++ b/internal/experiment/webconnectivitylte/analysiscore.go @@ -36,6 +36,10 @@ const ( analysisFlagSuccess ) +// AnalysisEngineFn is the function that runs the analysis engine for +// processing and scoring measurements collected by LTE. +var AnalysisEngineFn func(tk *TestKeys, logger model.Logger) = AnalysisEngineOrig + // analysisToplevel is the toplevel function that analyses the results // of the experiment once all network tasks have completed. // @@ -95,6 +99,17 @@ const ( // As an improvement over Web Connectivity v0.4, we also attempt to identify // special subcases of a null, null result to provide the user with more information. func (tk *TestKeys) analysisToplevel(logger model.Logger) { + AnalysisEngineFn(tk, logger) +} + +// AnalysisEngineOrig is the original analysis engine we wrote for LTE. This engine +// aims to detect and report about all the possible ways in which the measured website +// is blocked. As of 2023-11-30, we still consider this engine experimental. +func AnalysisEngineOrig(tk *TestKeys, logger model.Logger) { + tk.analysisOrig(logger) +} + +func (tk *TestKeys) analysisOrig(logger model.Logger) { // Since we run after all tasks have completed (or so we assume) we're // not going to use any form of locking here. diff --git a/internal/experiment/webconnectivitylte/analysisdns.go b/internal/experiment/webconnectivitylte/analysisdns.go index d68c6e0f1..06f456744 100644 --- a/internal/experiment/webconnectivitylte/analysisdns.go +++ b/internal/experiment/webconnectivitylte/analysisdns.go @@ -10,6 +10,7 @@ import ( "github.com/ooni/probe-cli/v3/internal/model" "github.com/ooni/probe-cli/v3/internal/netxlite" + "github.com/ooni/probe-cli/v3/internal/optional" ) const ( @@ -62,11 +63,11 @@ func (tk *TestKeys) analysisDNSToplevel(logger model.Logger, lookupper model.Geo tk.analysisDNSUnexpectedAddrs(logger, lookupper) if tk.DNSFlags != 0 { logger.Warn("DNSConsistency: inconsistent") - tk.DNSConsistency = "inconsistent" + tk.DNSConsistency = optional.Some("inconsistent") tk.BlockingFlags |= analysisFlagDNSBlocking } else { logger.Info("DNSConsistency: consistent") - tk.DNSConsistency = "consistent" + tk.DNSConsistency = optional.Some("consistent") } } diff --git a/internal/experiment/webconnectivitylte/testkeys.go b/internal/experiment/webconnectivitylte/testkeys.go index b58065911..03e14f574 100644 --- a/internal/experiment/webconnectivitylte/testkeys.go +++ b/internal/experiment/webconnectivitylte/testkeys.go @@ -14,6 +14,7 @@ import ( "github.com/ooni/probe-cli/v3/internal/experiment/webconnectivity" "github.com/ooni/probe-cli/v3/internal/legacy/tracex" "github.com/ooni/probe-cli/v3/internal/model" + "github.com/ooni/probe-cli/v3/internal/optional" ) // TestKeys contains the results produced by web_connectivity. @@ -81,7 +82,7 @@ type TestKeys struct { // DNSConsistency indicates whether there is consistency between // the TH's DNS results and the probe's DNS results. - DNSConsistency string `json:"dns_consistency"` + DNSConsistency optional.Value[string] `json:"dns_consistency"` // HTTPExperimentFailure indicates whether there was a failure in // the final HTTP request that we recorded. @@ -354,7 +355,7 @@ func NewTestKeys() *TestKeys { ControlFailure: nil, DNSFlags: 0, DNSExperimentFailure: nil, - DNSConsistency: "", + DNSConsistency: optional.None[string](), HTTPExperimentFailure: nil, BlockingFlags: 0, NullNullFlags: 0, diff --git a/internal/minipipeline/analysis.go b/internal/minipipeline/analysis.go index 3c29b39ef..f538fcd07 100644 --- a/internal/minipipeline/analysis.go +++ b/internal/minipipeline/analysis.go @@ -36,7 +36,7 @@ import ( // operation that failed for the current TagDepth. // // Within each group, successes sort before failures because the empty -// string has priority over nøn-empty strings. +// string has priority over non-empty strings. // // So, when walking the list from index 0 to index N, you encounter the // latest redirects first, you observe the more complex operations first,