diff --git a/internal/experiment/webconnectivitylte/analysiscorev2.go b/internal/experiment/webconnectivitylte/analysiscorev2.go index f2acb629b..5fac262fa 100644 --- a/internal/experiment/webconnectivitylte/analysiscorev2.go +++ b/internal/experiment/webconnectivitylte/analysiscorev2.go @@ -45,6 +45,9 @@ func (tk *TestKeys) analysisToplevelV2(logger model.Logger) { analysis.ComputeHTTPDiffStatusCodeMatch(container) analysis.ComputeHTTPDiffUncommonHeadersIntersection(container) analysis.ComputeHTTPDiffTitleDifferentLongWords(container) + analysis.ComputeHTTPFinalResponses(container) + analysis.ComputeTCPTransactionsWithUnexplainedUnexpectedFailures(container) + analysis.ComputeHTTPFinalResponsesWithTLS(container) // dump the analysis results for debugging purposes fmt.Printf("%s\n", must.MarshalJSON(analysis)) @@ -140,6 +143,39 @@ func (tk *TestKeys) analysisHTTPToplevelV2(analysis *minipipeline.WebAnalysis, l tk.BlockingFlags |= analysisFlagHTTPBlocking } + // Detect cases where an error occurred during a redirect. For this to happen, we + // need to observe (1) no "final" responses and (2) unexpected, unexplained failures + numFinals := len(analysis.HTTPFinalResponses.UnwrapOr(nil)) + numUnexpectedUnexplained := len(analysis.TCPTransactionsWithUnexplainedUnexpectedFailures.UnwrapOr(nil)) + if numFinals <= 0 && numUnexpectedUnexplained > 0 { + tk.BlockingFlags |= analysisFlagHTTPBlocking + } + + // Special case for HTTPS + if len(analysis.HTTPFinalResponsesWithTLS.UnwrapOr(nil)) > 0 { + tk.BlockingFlags |= analysisFlagSuccess + } + + // attempt to fill the comparisons about the body + // + // XXX this code should probably always run + if !analysis.HTTPDiffStatusCodeMatch.IsNone() { + value := analysis.HTTPDiffStatusCodeMatch.Unwrap() + tk.StatusCodeMatch = &value + } + if !analysis.HTTPDiffBodyProportionFactor.IsNone() { + value := analysis.HTTPDiffBodyProportionFactor.UnwrapOr(0) > 0.7 + tk.BodyLengthMatch = &value + } + if !analysis.HTTPDiffUncommonHeadersIntersection.IsNone() { + value := len(analysis.HTTPDiffUncommonHeadersIntersection.Unwrap()) > 0 + tk.HeadersMatch = &value + } + if !analysis.HTTPDiffTitleDifferentLongWords.IsNone() { + value := len(analysis.HTTPDiffTitleDifferentLongWords.Unwrap()) <= 0 + tk.TitleMatch = &value + } + // same code structure as before if !analysis.HTTPDiffStatusCodeMatch.IsNone() { if analysis.HTTPDiffStatusCodeMatch.Unwrap() { @@ -160,6 +196,6 @@ func (tk *TestKeys) analysisHTTPToplevelV2(analysis *minipipeline.WebAnalysis, l return } } + tk.BlockingFlags |= analysisFlagHTTPDiff } - tk.BlockingFlags |= analysisFlagHTTPDiff } diff --git a/internal/experiment/webconnectivityqa/dnsblocking.go b/internal/experiment/webconnectivityqa/dnsblocking.go index dc27caa9b..fd971d6a9 100644 --- a/internal/experiment/webconnectivityqa/dnsblocking.go +++ b/internal/experiment/webconnectivityqa/dnsblocking.go @@ -47,12 +47,15 @@ func dnsBlockingNXDOMAIN() *TestCase { Flags: 0, Input: "https://www.example.com/", Configure: func(env *netemx.QAEnv) { - // remove the record so that the DNS query returns NXDOMAIN, which is then - // converted into android_dns_cache_no_data by the emulation layer + // remove the record so that the DNS query returns NXDOMAIN env.ISPResolverConfig().RemoveRecord("www.example.com") }, ExpectErr: false, ExpectTestKeys: &testKeys{ + BodyLengthMatch: true, + StatusCodeMatch: true, + HeadersMatch: true, + TitleMatch: true, DNSExperimentFailure: "dns_nxdomain_error", DNSConsistency: "inconsistent", XStatus: 2080, // StatusExperimentDNS | StatusAnomalyDNS diff --git a/internal/minipipeline/analysis.go b/internal/minipipeline/analysis.go index 1132445b4..be0404e09 100644 --- a/internal/minipipeline/analysis.go +++ b/internal/minipipeline/analysis.go @@ -54,6 +54,15 @@ type WebAnalysis struct { // The generation algorithm assumes there's a single "final" response. HTTPDiffUncommonHeadersIntersection optional.Value[map[string]bool] + // HTTPFinalResponses contains the transaction IDs of "final" responses (i.e., responses + // that are like 2xx, 4xx, or 5xx). Typically, we expect to have a single response that + // if final when we're analyzing Web Connectivity LTE. + HTTPFinalResponses optional.Value[map[int64]bool] + + // HTTPFinalResponsesWithTLS is like HTTPFinalResponses but only includes the + // cases where we're using TLS to fetch the final response. + HTTPFinalResponsesWithTLS optional.Value[map[int64]bool] + // TCPTransactionsWithUnexpectedTCPConnectFailures contains the TCP transaction IDs that // contain TCP connect failures while the control measurement succeeded. TCPTransactionsWithUnexpectedTCPConnectFailures optional.Value[map[int64]bool] @@ -65,6 +74,11 @@ type WebAnalysis struct { // TCPSTransactionsWithUnexpectedHTTPFailures contains the TCP transaction IDs that // contain HTTP failures while the control measurement succeeded. TCPTransactionsWithUnexpectedHTTPFailures optional.Value[map[int64]bool] + + // TCPTransactionsWithUnexplainedUnexpectedFailures contains the TCP transaction IDs for + // which we cannot explain failures with the control information, but for which we expect + // to see a success because the control succeeded. + TCPTransactionsWithUnexplainedUnexpectedFailures optional.Value[map[int64]bool] } func analysisDNSLookupFailureIsDNSNoAnswerForAAAA(obs *WebObservation) bool { @@ -161,7 +175,7 @@ func (wa *WebAnalysis) ComputeDNSTransactionsWithUnexpectedFailures(c *WebObserv }) } - wa.DNSTransactionsWithBogons = optional.Some(state) + wa.DNSTransactionsWithUnexpectedFailures = optional.Some(state) } // ComputeDNSPossiblyInvalidAddrs computes the DNSPossiblyInvalidAddrs field. @@ -178,11 +192,6 @@ func (wa *WebAnalysis) ComputeDNSPossiblyInvalidAddrs(c *WebObservationsContaine for _, obs := range c.KnownTCPEndpoints { addr := obs.IPAddress.Unwrap() - // if the address was also resolved by the control, we're good - if obs.MatchWithControlIPAddress.UnwrapOr(false) { - continue - } - // if we have a succesful TLS handshake for this addr, we're good if obs.TLSHandshakeFailure.UnwrapOr("unknown_failure") == "" { // just in case another transaction succeded, clear the address from the state @@ -190,8 +199,13 @@ func (wa *WebAnalysis) ComputeDNSPossiblyInvalidAddrs(c *WebObservationsContaine continue } + // if the address was also resolved by the control, we're good + if obs.MatchWithControlIPAddress.UnwrapOr(true) { + continue + } + // if there's an ASN match with the control, we're good - if obs.MatchWithControlIPAddressASN.UnwrapOr(false) { + if obs.MatchWithControlIPAddressASN.UnwrapOr(true) { continue } @@ -271,6 +285,11 @@ func (wa *WebAnalysis) ComputeTCPTransactionsWithUnexpectedHTTPFailures(c *WebOb // ComputeHTTPDiffBodyProportionFactor computes the HTTPDiffBodyProportionFactor field. func (wa *WebAnalysis) ComputeHTTPDiffBodyProportionFactor(c *WebObservationsContainer) { for _, obs := range c.KnownTCPEndpoints { + // we should only perform the comparison for a final response + if !obs.HTTPResponseIsFinal.UnwrapOr(false) { + continue + } + // we need a valid body length and the body must not be truncated measurement := obs.HTTPResponseBodyLength.UnwrapOr(0) if measurement <= 0 || obs.HTTPResponseBodyIsTruncated.UnwrapOr(true) { @@ -304,6 +323,11 @@ func (wa *WebAnalysis) ComputeHTTPDiffBodyProportionFactor(c *WebObservationsCon // ComputeHTTPDiffStatusCodeMatch computes the HTTPDiffStatusCodeMatch field. func (wa *WebAnalysis) ComputeHTTPDiffStatusCodeMatch(c *WebObservationsContainer) { for _, obs := range c.KnownTCPEndpoints { + // we should only perform the comparison for a final response + if !obs.HTTPResponseIsFinal.UnwrapOr(false) { + continue + } + // we need a positive status code for both measurement := obs.HTTPResponseStatusCode.UnwrapOr(0) if measurement <= 0 { @@ -380,7 +404,11 @@ func (wa *WebAnalysis) ComputeHTTPDiffUncommonHeadersIntersection(c *WebObservat state := make(map[string]bool) for _, obs := range c.KnownTCPEndpoints { - // we should only have control headers for the "final" response + // we should only perform the comparison for a final response + if !obs.HTTPResponseIsFinal.UnwrapOr(false) { + continue + } + measurement := obs.HTTPResponseHeadersKeys.UnwrapOr(nil) if len(measurement) <= 0 { continue @@ -420,10 +448,9 @@ func (wa *WebAnalysis) ComputeHTTPDiffUncommonHeadersIntersection(c *WebObservat // Implementation note: we only process the first observation that matches. // // This is fine(TM) as long as we have a single "final" request. + wa.HTTPDiffUncommonHeadersIntersection = optional.Some(state) break } - - wa.HTTPDiffUncommonHeadersIntersection = optional.Some(state) } // ComputeHTTPDiffTitleDifferentLongWords computes the HTTPDiffTitleDifferentLongWords field. @@ -431,7 +458,11 @@ func (wa *WebAnalysis) ComputeHTTPDiffTitleDifferentLongWords(c *WebObservations state := make(map[string]bool) for _, obs := range c.KnownTCPEndpoints { - // we should only have control headers for the "final" response + // we should only perform the comparison for a final response + if !obs.HTTPResponseIsFinal.UnwrapOr(false) { + continue + } + measurement := obs.HTTPResponseTitle.UnwrapOr("") if measurement == "" { continue @@ -478,8 +509,98 @@ func (wa *WebAnalysis) ComputeHTTPDiffTitleDifferentLongWords(c *WebObservations // Implementation note: we only process the first observation that matches. // // This is fine(TM) as long as we have a single "final" request. + wa.HTTPDiffTitleDifferentLongWords = optional.Some(state) break } +} + +// ComputeHTTPFinalResponses computes the HTTPFinalResponses field. +func (wa *WebAnalysis) ComputeHTTPFinalResponses(c *WebObservationsContainer) { + state := make(map[int64]bool) + + for _, obs := range c.KnownTCPEndpoints { + txid := obs.EndpointTransactionID.UnwrapOr(0) + if txid <= 0 { + continue + } + if obs.HTTPResponseIsFinal.UnwrapOr(false) { + state[txid] = true + continue + } + } + + wa.HTTPFinalResponses = optional.Some(state) +} + +// ComputeTCPTransactionsWithUnexplainedUnexpectedFailures computes the TCPTransactionsWithUnexplainedUnexpectedFailures field. +func (wa *WebAnalysis) ComputeTCPTransactionsWithUnexplainedUnexpectedFailures(c *WebObservationsContainer) { + state := make(map[int64]bool) + + for _, obs := range c.KnownTCPEndpoints { + // exclude the cases where we have an expectation for TCP connect + // because in those cases we can provide an explanation + if !obs.ControlTCPConnectFailure.IsNone() { + continue + } + + // exclude the cases where we have an expectation for TLS handshake + // because in those cases we can provide an explanation + if !obs.ControlTLSHandshakeFailure.IsNone() { + continue + } + + // exclude the cases where the control failed because we are + // only interested into unexpected failures + if obs.ControlHTTPFailure.UnwrapOr("unknown_error") != "" { + continue + } + + // obtain the transaction ID + txid := obs.EndpointTransactionID.UnwrapOr(0) + if txid <= 0 { + continue + } + + // TODO(bassosimone): we need to remember about broken IPv6 here + + // include the cases where there was a TCP connect failure + if obs.TCPConnectFailure.UnwrapOr("") != "" { + state[txid] = true + continue + } + + // include the cases where there was a TLS handshake failure + if obs.TLSHandshakeFailure.UnwrapOr("") != "" { + state[txid] = true + continue + } + + // include the cases where there was an HTTP failure + if obs.HTTPFailure.UnwrapOr("") != "" { + state[txid] = true + continue + } + } + + wa.TCPTransactionsWithUnexplainedUnexpectedFailures = optional.Some(state) +} + +// ComputeHTTPFinalResponsesWithTLS computes the HTTPFinalResponsesWithTLS field. +func (wa *WebAnalysis) ComputeHTTPFinalResponsesWithTLS(c *WebObservationsContainer) { + state := make(map[int64]bool) + + for _, obs := range c.KnownTCPEndpoints { + txid := obs.EndpointTransactionID.UnwrapOr(0) + if txid <= 0 { + continue + } + isFinal := obs.HTTPResponseIsFinal.UnwrapOr(false) + tlsSuccess := obs.TLSHandshakeFailure.UnwrapOr("unknown_failure") == "" + if isFinal && tlsSuccess { + state[txid] = true + continue + } + } - wa.HTTPDiffTitleDifferentLongWords = optional.Some(state) + wa.HTTPFinalResponsesWithTLS = optional.Some(state) } diff --git a/internal/minipipeline/observation.go b/internal/minipipeline/observation.go index 8b1d8c0cf..ac8af9cda 100644 --- a/internal/minipipeline/observation.go +++ b/internal/minipipeline/observation.go @@ -106,6 +106,9 @@ type WebObservation struct { // HTTPResponseTitle contains the response title. HTTPResponseTitle optional.Value[string] + // HTTPResponseIsFinal is true if the status code is 2xx, 4xx, or 5xx. + HTTPResponseIsFinal optional.Value[bool] + // ControlDNSLookupFailure is the corresponding control DNS lookup failure. ControlDNSLookupFailure optional.Value[string] @@ -312,6 +315,15 @@ func (c *WebObservationsContainer) NoteHTTPRoundTripResults(evs ...*model.Archiv obs.HTTPResponseLocation = optional.Some(string(value)) } } + + obs.HTTPResponseIsFinal = optional.Some((func() bool { + switch ev.Response.Code / 100 { + case 2, 4, 5: + return true + default: + return false + } + }())) } } @@ -328,7 +340,7 @@ func (c *WebObservationsContainer) NoteControlResults(req *model.THRequest, resp c.controlMatchDNSLookupResults(inputDomain, resp) c.controlXrefTCPIPFailures(resp) c.controlXrefTLSFailures(resp) - c.controlXrefFinalHTTPResponse(resp) + c.controlSetHTTPFinalResponseExpectation(resp) return nil } @@ -429,56 +441,28 @@ func (c *WebObservationsContainer) controlXrefTLSFailures(resp *model.THResponse } } -func (c *WebObservationsContainer) controlXrefFinalHTTPResponse(resp *model.THResponse) { - obsx := c.findFinalHTTPResponse() - if obsx.IsNone() { - return - } - obs := obsx.Unwrap() - +func (c *WebObservationsContainer) controlSetHTTPFinalResponseExpectation(resp *model.THResponse) { // Implementation note: the TH response does not have a clear semantics for "missing" values // therefore we are accepting as valid only values within the correct range + for _, obs := range c.KnownTCPEndpoints { + obs.ControlHTTPFailure = optional.Some(utilsStringPointerToString(resp.HTTPRequest.Failure)) + if value := resp.HTTPRequest.StatusCode; value > 0 { + obs.ControlHTTPResponseStatusCode = optional.Some(value) + } + if value := resp.HTTPRequest.BodyLength; value >= 0 { + obs.ControlHTTPResponseBodyLength = optional.Some(value) + } - obs.ControlHTTPFailure = optional.Some(utilsStringPointerToString(resp.HTTPRequest.Failure)) - if value := resp.HTTPRequest.StatusCode; value > 0 { - obs.ControlHTTPResponseStatusCode = optional.Some(value) - } - if value := resp.HTTPRequest.BodyLength; value >= 0 { - obs.ControlHTTPResponseBodyLength = optional.Some(value) - } - - controlHTTPResponseHeadersKeys := make(map[string]bool) - for key := range resp.HTTPRequest.Headers { - controlHTTPResponseHeadersKeys[key] = true - } - if len(controlHTTPResponseHeadersKeys) > 0 { - obs.ControlHTTPResponseHeadersKeys = optional.Some(controlHTTPResponseHeadersKeys) - } - - if v := resp.HTTPRequest.Title; v != "" { - obs.ControlHTTPResponseTitle = optional.Some(v) - } -} - -func (c *WebObservationsContainer) findFinalHTTPResponse() optional.Value[*WebObservation] { - // find all the possible final request candidates - var candidates []*WebObservation - for _, wobs := range c.KnownTCPEndpoints { - switch code := wobs.HTTPResponseStatusCode.UnwrapOr(0); code { - case 0, 301, 302, 307, 308: - // this is a redirect or a nonexisting response in the case of zero - - default: - // found candidate - candidates = append(candidates, wobs) + controlHTTPResponseHeadersKeys := make(map[string]bool) + for key := range resp.HTTPRequest.Headers { + controlHTTPResponseHeadersKeys[key] = true + } + if len(controlHTTPResponseHeadersKeys) > 0 { + obs.ControlHTTPResponseHeadersKeys = optional.Some(controlHTTPResponseHeadersKeys) } - } - // Implementation note: the final request is a request that is not a redirect and - // we expect to see just one of them. This code is written assuming we will have - // more than a final request in the future and to fail in such a case. - if len(candidates) != 1 { - return optional.None[*WebObservation]() + if v := resp.HTTPRequest.Title; v != "" { + obs.ControlHTTPResponseTitle = optional.Some(v) + } } - return optional.Some(candidates[0]) }