Skip to content

Commit

Permalink
feat: add and process depth and fetch_body tags (#1403)
Browse files Browse the repository at this point in the history
This commit modifies Web Connectivity LTE to include the depth and
fetch_body tags. Together, these tags allow us to know the redirect
depth of related transactions and whether a transaction intends to fetch
the body. Then, we modify the minipipeline to process these tags and
include them into observations.

In turn, the availability of these bits of information inside of the
observations will allow us to more correctly parse and split data and
more easily produce an analysis from which it's easier to emulate Web
Connectivity v0.4.

While there, make sure we extract `ControlDNSDomain` and
`ControlDNSLookupFailure` for TCP endpoints. Also extracting these
fields is going to be useful to more correctly emulate v0.4.

Reference issue: ooni/probe#2634
  • Loading branch information
bassosimone authored Nov 29, 2023
1 parent e0c742c commit 78c7e72
Show file tree
Hide file tree
Showing 77 changed files with 1,264 additions and 271 deletions.
16 changes: 14 additions & 2 deletions internal/cmd/minipipeline/testdata/observations.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
"HTTPResponseLocation": null,
"HTTPResponseTitle": null,
"HTTPResponseIsFinal": null,
"TagDepth": null,
"TagFetchBody": null,
"ControlDNSDomain": "nexa.polito.it",
"ControlDNSLookupFailure": "",
"ControlTCPConnectFailure": null,
Expand Down Expand Up @@ -62,6 +64,8 @@
"HTTPResponseLocation": null,
"HTTPResponseTitle": null,
"HTTPResponseIsFinal": null,
"TagDepth": null,
"TagFetchBody": null,
"ControlDNSDomain": "nexa.polito.it",
"ControlDNSLookupFailure": "",
"ControlTCPConnectFailure": null,
Expand Down Expand Up @@ -101,6 +105,8 @@
"HTTPResponseLocation": null,
"HTTPResponseTitle": null,
"HTTPResponseIsFinal": null,
"TagDepth": null,
"TagFetchBody": null,
"ControlDNSDomain": null,
"ControlDNSLookupFailure": null,
"ControlTCPConnectFailure": null,
Expand Down Expand Up @@ -138,6 +144,8 @@
"HTTPResponseLocation": null,
"HTTPResponseTitle": null,
"HTTPResponseIsFinal": null,
"TagDepth": null,
"TagFetchBody": null,
"ControlDNSDomain": null,
"ControlDNSLookupFailure": null,
"ControlTCPConnectFailure": null,
Expand Down Expand Up @@ -175,6 +183,8 @@
"HTTPResponseLocation": null,
"HTTPResponseTitle": null,
"HTTPResponseIsFinal": null,
"TagDepth": null,
"TagFetchBody": null,
"ControlDNSDomain": null,
"ControlDNSLookupFailure": null,
"ControlTCPConnectFailure": null,
Expand Down Expand Up @@ -229,8 +239,10 @@
"HTTPResponseLocation": null,
"HTTPResponseTitle": "Nexa Center for Internet \u0026 Society | Il centro Nexa è un centro di ricerca del Dipartimento di Automatica e Informatica del Politecnico di Torino",
"HTTPResponseIsFinal": true,
"ControlDNSDomain": null,
"ControlDNSLookupFailure": null,
"TagDepth": null,
"TagFetchBody": null,
"ControlDNSDomain": "nexa.polito.it",
"ControlDNSLookupFailure": "",
"ControlTCPConnectFailure": "",
"MatchWithControlIPAddress": true,
"MatchWithControlIPAddressASN": true,
Expand Down
8 changes: 6 additions & 2 deletions internal/cmd/minipipeline/testdata/observations_classic.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
"HTTPResponseLocation": null,
"HTTPResponseTitle": null,
"HTTPResponseIsFinal": null,
"TagDepth": null,
"TagFetchBody": null,
"ControlDNSDomain": null,
"ControlDNSLookupFailure": null,
"ControlTCPConnectFailure": null,
Expand Down Expand Up @@ -80,8 +82,10 @@
"HTTPResponseLocation": null,
"HTTPResponseTitle": "Nexa Center for Internet \u0026 Society | Il centro Nexa è un centro di ricerca del Dipartimento di Automatica e Informatica del Politecnico di Torino",
"HTTPResponseIsFinal": true,
"ControlDNSDomain": null,
"ControlDNSLookupFailure": null,
"TagDepth": null,
"TagFetchBody": null,
"ControlDNSDomain": "nexa.polito.it",
"ControlDNSLookupFailure": "",
"ControlTCPConnectFailure": "",
"MatchWithControlIPAddress": true,
"MatchWithControlIPAddressASN": true,
Expand Down
13 changes: 10 additions & 3 deletions internal/experiment/webconnectivitylte/cleartextflow.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package webconnectivitylte

import (
"context"
"fmt"
"io"
"net"
"net/http"
Expand All @@ -34,6 +35,9 @@ type CleartextFlow struct {
// DNSCache is the MANDATORY DNS cache.
DNSCache *DNSCache

// Depth is the OPTIONAL current redirect depth.
Depth int64

// IDGenerator is the MANDATORY atomic int64 to generate task IDs.
IDGenerator *atomic.Int64

Expand Down Expand Up @@ -98,7 +102,8 @@ func (t *CleartextFlow) Run(parentCtx context.Context, index int64) error {
}

// create trace
trace := measurexlite.NewTrace(index, t.ZeroTime)
trace := measurexlite.NewTrace(index, t.ZeroTime, fmt.Sprintf("depth=%d", t.Depth),
fmt.Sprintf("fetch_body=%v", t.PrioSelector != nil))

// TODO(bassosimone): the DSL starts measuring for throttling when we start
// fetching the body while here we start immediately. We should come up with
Expand Down Expand Up @@ -252,7 +257,7 @@ func (t *CleartextFlow) httpTransaction(ctx context.Context, network, address, a
// bit torn about what is the best approach to follow here. Maybe it does not
// even matter to emit transaction_start/end events given that we have transaction ID.
t.TestKeys.AppendNetworkEvents(measurexlite.NewAnnotationArchivalNetworkEvent(
trace.Index(), started, "http_transaction_start",
trace.Index(), started, "http_transaction_start", trace.Tags()...,
))
resp, err := txp.RoundTrip(req)
var body []byte
Expand All @@ -266,7 +271,7 @@ func (t *CleartextFlow) httpTransaction(ctx context.Context, network, address, a
}
finished := trace.TimeSince(trace.ZeroTime())
t.TestKeys.AppendNetworkEvents(measurexlite.NewAnnotationArchivalNetworkEvent(
trace.Index(), finished, "http_transaction_done",
trace.Index(), finished, "http_transaction_done", trace.Tags()...,
))
ev := measurexlite.NewArchivalHTTPRequestResult(
trace.Index(),
Expand All @@ -281,6 +286,7 @@ func (t *CleartextFlow) httpTransaction(ctx context.Context, network, address, a
body,
err,
finished,
trace.Tags()...,
)
t.TestKeys.PrependRequests(ev)
return resp, body, err
Expand All @@ -302,6 +308,7 @@ func (t *CleartextFlow) maybeFollowRedirects(ctx context.Context, resp *http.Res
t.Logger.Infof("redirect to: %s", location.String())
resolvers := &DNSResolvers{
CookieJar: t.CookieJar,
Depth: t.Depth + 1,
DNSCache: t.DNSCache,
Domain: location.Hostname(),
IDGenerator: t.IDGenerator,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ func TestCleartextFlow_Run(t *testing.T) {
t.Run(tt.name, func(t *testing.T) {
tr := &CleartextFlow{
Address: tt.fields.Address,
Depth: 0,
DNSCache: tt.fields.DNSCache,
IDGenerator: tt.fields.IDGenerator,
Logger: tt.fields.Logger,
Expand Down
12 changes: 9 additions & 3 deletions internal/experiment/webconnectivitylte/dnsresolvers.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package webconnectivitylte

import (
"context"
"fmt"
"math/rand"
"net"
"net/http"
Expand All @@ -33,6 +34,9 @@ type DNSResolvers struct {
// Domain is the MANDATORY domain to resolve.
Domain string

// Depth is the OPTIONAL current redirect depth.
Depth int64

// IDGenerator is the MANDATORY atomic int64 to generate task IDs.
IDGenerator *atomic.Int64

Expand Down Expand Up @@ -210,7 +214,7 @@ func (t *DNSResolvers) lookupHostSystem(parentCtx context.Context, out chan<- []
index := t.IDGenerator.Add(1)

// create trace
trace := measurexlite.NewTrace(index, t.ZeroTime)
trace := measurexlite.NewTrace(index, t.ZeroTime, fmt.Sprintf("depth=%d", t.Depth))

// start the operation logger
ol := logx.NewOperationLogger(
Expand All @@ -237,7 +241,7 @@ func (t *DNSResolvers) lookupHostUDP(parentCtx context.Context, udpAddress strin
index := t.IDGenerator.Add(1)

// create trace
trace := measurexlite.NewTrace(index, t.ZeroTime)
trace := measurexlite.NewTrace(index, t.ZeroTime, fmt.Sprintf("depth=%d", t.Depth))

// start the operation logger
ol := logx.NewOperationLogger(
Expand Down Expand Up @@ -375,7 +379,7 @@ func (t *DNSResolvers) lookupHostDNSOverHTTPS(parentCtx context.Context, out cha
index := t.IDGenerator.Add(1)

// create trace
trace := measurexlite.NewTrace(index, t.ZeroTime)
trace := measurexlite.NewTrace(index, t.ZeroTime, fmt.Sprintf("depth=%d", t.Depth))

// start the operation logger
ol := logx.NewOperationLogger(
Expand Down Expand Up @@ -433,6 +437,7 @@ func (t *DNSResolvers) startCleartextFlows(
for _, addr := range addresses {
task := &CleartextFlow{
Address: net.JoinHostPort(addr.Addr, port),
Depth: t.Depth,
DNSCache: t.DNSCache,
IDGenerator: t.IDGenerator,
Logger: t.Logger,
Expand Down Expand Up @@ -475,6 +480,7 @@ func (t *DNSResolvers) startSecureFlows(
for _, addr := range addresses {
task := &SecureFlow{
Address: net.JoinHostPort(addr.Addr, port),
Depth: t.Depth,
DNSCache: t.DNSCache,
IDGenerator: t.IDGenerator,
Logger: t.Logger,
Expand Down
1 change: 1 addition & 0 deletions internal/experiment/webconnectivitylte/measurer.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ func (m *Measurer) Run(ctx context.Context, args *model.ExperimentArgs) error {
// start background tasks
resos := &DNSResolvers{
DNSCache: NewDNSCache(),
Depth: 0,
Domain: URL.Hostname(),
IDGenerator: idGenerator,
Logger: sess.Logger(),
Expand Down
13 changes: 10 additions & 3 deletions internal/experiment/webconnectivitylte/secureflow.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ package webconnectivitylte
import (
"context"
"crypto/tls"
"fmt"
"io"
"net"
"net/http"
Expand All @@ -35,6 +36,9 @@ type SecureFlow struct {
// DNSCache is the MANDATORY DNS cache.
DNSCache *DNSCache

// Depth is the OPTIONAL current redirect depth.
Depth int64

// IDGenerator is the MANDATORY atomic int64 to generate task IDs.
IDGenerator *atomic.Int64

Expand Down Expand Up @@ -105,7 +109,8 @@ func (t *SecureFlow) Run(parentCtx context.Context, index int64) error {
}

// create trace
trace := measurexlite.NewTrace(index, t.ZeroTime)
trace := measurexlite.NewTrace(index, t.ZeroTime, fmt.Sprintf("depth=%d", t.Depth),
fmt.Sprintf("fetch_body=%v", t.PrioSelector != nil))

// TODO(bassosimone): the DSL starts measuring for throttling when we start
// fetching the body while here we start immediately. We should come up with
Expand Down Expand Up @@ -307,7 +312,7 @@ func (t *SecureFlow) httpTransaction(ctx context.Context, network, address, alpn
// bit torn about what is the best approach to follow here. Maybe it does not
// even matter to emit transaction_start/end events given that we have transaction ID.
t.TestKeys.AppendNetworkEvents(measurexlite.NewAnnotationArchivalNetworkEvent(
trace.Index(), started, "http_transaction_start",
trace.Index(), started, "http_transaction_start", trace.Tags()...,
))
resp, err := txp.RoundTrip(req)
var body []byte
Expand All @@ -321,7 +326,7 @@ func (t *SecureFlow) httpTransaction(ctx context.Context, network, address, alpn
}
finished := trace.TimeSince(trace.ZeroTime())
t.TestKeys.AppendNetworkEvents(measurexlite.NewAnnotationArchivalNetworkEvent(
trace.Index(), finished, "http_transaction_done",
trace.Index(), finished, "http_transaction_done", trace.Tags()...,
))
ev := measurexlite.NewArchivalHTTPRequestResult(
trace.Index(),
Expand All @@ -336,6 +341,7 @@ func (t *SecureFlow) httpTransaction(ctx context.Context, network, address, alpn
body,
err,
finished,
trace.Tags()...,
)
t.TestKeys.PrependRequests(ev)
return resp, body, err
Expand All @@ -357,6 +363,7 @@ func (t *SecureFlow) maybeFollowRedirects(ctx context.Context, resp *http.Respon
t.Logger.Infof("redirect to: %s", location.String())
resolvers := &DNSResolvers{
CookieJar: t.CookieJar,
Depth: t.Depth + 1,
DNSCache: t.DNSCache,
Domain: location.Hostname(),
IDGenerator: t.IDGenerator,
Expand Down
1 change: 1 addition & 0 deletions internal/experiment/webconnectivitylte/secureflow_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ func TestSecureFlow_Run(t *testing.T) {
t.Run(tt.name, func(t *testing.T) {
tr := &SecureFlow{
Address: tt.fields.Address,
Depth: 0,
DNSCache: tt.fields.DNSCache,
IDGenerator: tt.fields.IDGenerator,
Logger: tt.fields.Logger,
Expand Down
31 changes: 31 additions & 0 deletions internal/minipipeline/observation.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,24 @@ type WebObservation struct {
// HTTPResponseIsFinal is true if the status code is 2xx, 4xx, or 5xx.
HTTPResponseIsFinal optional.Value[bool]

// The following fields are extracted from tags (if available):

// TagDepth is the value of the depth=<int64> tag. We use this tag
// in Web Connectivity LTE to know the current redirect depth. We start
// from zero for the first set of requests and increement this value
// every time we follow a redirect. (Because just one transaction
// is allowed to fetch the body and follow redirects, everything should
// work as intended and it's possible to use this tag to group related
// DNS lookups and endpoints operations, which can then be further break
// down using the transaction ID to isolate transactions.)
TagDepth optional.Value[int64]

// TagFetchBody is the value of the fetch_body=<bool> tag. We use this tag
// in Web Connectivity LTE to indicate that the current transaction will
// attempt to fetch the webpage body. (Potentially, more than one transaction
// tries fetching the body and only one will actually do it.)
TagFetchBody optional.Value[bool]

// The following fields are optional.Some when you process the control information
// contained inside a measurement and there's information available:

Expand Down Expand Up @@ -263,6 +281,7 @@ func (c *WebObservationsContainer) ingestDNSLookupFailures(evs ...*model.Archiva
DNSLookupFailure: optional.Some(utilsStringPointerToString(ev.Failure)),
DNSQueryType: optional.Some(ev.QueryType),
DNSEngine: optional.Some(ev.Engine),
TagDepth: utilsExtractTagDepth(ev.Tags),
}

// add record
Expand All @@ -289,6 +308,7 @@ func (c *WebObservationsContainer) ingestDNSLookupSuccesses(evs ...*model.Archiv
IPAddress: optional.Some(ipAddr),
IPAddressASN: utilsGeoipxLookupASN(ipAddr),
IPAddressBogon: optional.Some(netxlite.IsBogon(ipAddr)),
TagDepth: utilsExtractTagDepth(ev.Tags),
}

// add record
Expand Down Expand Up @@ -333,6 +353,8 @@ func (c *WebObservationsContainer) IngestTCPConnectEvents(evs ...*model.Archival
EndpointPort: optional.Some(portString),
EndpointAddress: optional.Some(net.JoinHostPort(ev.IP, portString)),
TCPConnectFailure: optional.Some(utilsStringPointerToString(ev.Status.Failure)),
TagDepth: utilsExtractTagDepth(ev.Tags),
TagFetchBody: utilsExtractTagFetchBody(ev.Tags),
}

// register the observation
Expand Down Expand Up @@ -455,6 +477,15 @@ func (c *WebObservationsContainer) controlMatchDNSLookupResults(inputDomain stri
continue
}

// register the control DNS domain
obs.ControlDNSDomain = optional.Some(domain)

// register whether the control failed and skip in such a case
obs.ControlDNSLookupFailure = optional.Some(utilsStringPointerToString(resp.DNS.Failure))
if resp.DNS.Failure != nil {
continue
}

// compute whether also the TH observed this addr
obs.MatchWithControlIPAddress = optional.Some(thAddrMap[addr])

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
"HTTPResponseLocation": null,
"HTTPResponseTitle": null,
"HTTPResponseIsFinal": null,
"TagDepth": null,
"TagFetchBody": null,
"ControlDNSDomain": "expired.badssl.com",
"ControlDNSLookupFailure": "",
"ControlTCPConnectFailure": null,
Expand Down Expand Up @@ -64,6 +66,8 @@
"HTTPResponseLocation": null,
"HTTPResponseTitle": null,
"HTTPResponseIsFinal": null,
"TagDepth": null,
"TagFetchBody": null,
"ControlDNSDomain": null,
"ControlDNSLookupFailure": null,
"ControlTCPConnectFailure": null,
Expand Down Expand Up @@ -101,6 +105,8 @@
"HTTPResponseLocation": null,
"HTTPResponseTitle": null,
"HTTPResponseIsFinal": null,
"TagDepth": null,
"TagFetchBody": null,
"ControlDNSDomain": null,
"ControlDNSLookupFailure": null,
"ControlTCPConnectFailure": null,
Expand Down Expand Up @@ -140,8 +146,10 @@
"HTTPResponseLocation": null,
"HTTPResponseTitle": null,
"HTTPResponseIsFinal": null,
"ControlDNSDomain": null,
"ControlDNSLookupFailure": null,
"TagDepth": null,
"TagFetchBody": null,
"ControlDNSDomain": "expired.badssl.com",
"ControlDNSLookupFailure": "",
"ControlTCPConnectFailure": "",
"MatchWithControlIPAddress": true,
"MatchWithControlIPAddressASN": true,
Expand Down
Loading

0 comments on commit 78c7e72

Please sign in to comment.