Skip to content

Commit

Permalink
fix(minipipeline): DNSLookupFailures must be a slice (ooni#1395)
Browse files Browse the repository at this point in the history
`WebObservationsContainer.DNSLookupFailures` MUST be a slice because of
ooni/probe#2624. We have the same transaction
IDs for both A and AAAA queries. Until we fix this bug, we must use a
list because that allows for duplication.

While there, create `measurexlite.webTitleRegexp` just once.

While there, implement an `-help` flag for
`./internal/cmd/minipipeline`.
  • Loading branch information
bassosimone authored and Murphy-OrangeMud committed Feb 13, 2024
1 parent dd12ea5 commit 7e10550
Show file tree
Hide file tree
Showing 72 changed files with 10,111 additions and 10,022 deletions.
9 changes: 6 additions & 3 deletions internal/cmd/minipipeline/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ var (
// destdirFlag is the -destdir flag
destdirFlag = flag.String("destdir", ".", "destination directory to use")

// helpFlag is the -help flag
helpFlag = flag.Bool("help", false, "Show the help message")

// measurementFlag is the -measurement flag
measurementFlag = flag.String("measurement", "", "measurement file to analyze")

Expand All @@ -30,15 +33,15 @@ var (

func main() {
flag.Parse()
if *measurementFlag == "" {
if *helpFlag || *measurementFlag == "" {
fmt.Fprintf(os.Stderr, "\n")
fmt.Fprintf(os.Stderr, "usage: %s -measurement <file> [-prefix <prefix>]\n", filepath.Base(os.Args[0]))
fmt.Fprintf(os.Stderr, "usage: %s -measurement <file> [-destdir <dir>] [-prefix <prefix>]\n", filepath.Base(os.Args[0]))
fmt.Fprintf(os.Stderr, "\n")
fmt.Fprintf(os.Stderr, "Mini measurement processing pipeline to reprocess recent probe measurements\n")
fmt.Fprintf(os.Stderr, "and align results calculation with ooni/data.\n")
fmt.Fprintf(os.Stderr, "\n")
fmt.Fprintf(os.Stderr, "Analyzes the <file> provided using -measurement <file> and writes the\n")
fmt.Fprintf(os.Stderr, "observations.json and analysis.json files in the -destdir <destdir> directory,\n")
fmt.Fprintf(os.Stderr, "observations.json and analysis.json files in the -destdir <dir> directory,\n")
fmt.Fprintf(os.Stderr, "which must already exist.\n")
fmt.Fprintf(os.Stderr, "\n")
fmt.Fprintf(os.Stderr, "Use -prefix <prefix> to add <prefix> in front of the generated files names.\n")
Expand Down
20 changes: 10 additions & 10 deletions internal/cmd/minipipeline/testdata/observations.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{
"DNSLookupFailures": {
"2": {
"DNSTransactionID": 2,
"DNSLookupFailures": [
{
"DNSTransactionID": 3,
"DNSDomain": "nexa.polito.it",
"DNSLookupFailure": "dns_no_answer",
"DNSQueryType": "AAAA",
"DNSEngine": "doh",
"DNSEngine": "udp",
"IPAddress": null,
"IPAddressASN": null,
"IPAddressOrg": null,
Expand Down Expand Up @@ -38,12 +38,12 @@
"ControlHTTPResponseHeadersKeys": null,
"ControlHTTPResponseTitle": null
},
"3": {
"DNSTransactionID": 3,
{
"DNSTransactionID": 2,
"DNSDomain": "nexa.polito.it",
"DNSLookupFailure": "dns_no_answer",
"DNSQueryType": "AAAA",
"DNSEngine": "udp",
"DNSEngine": "doh",
"IPAddress": null,
"IPAddressASN": null,
"IPAddressOrg": null,
Expand Down Expand Up @@ -76,7 +76,7 @@
"ControlHTTPResponseHeadersKeys": null,
"ControlHTTPResponseTitle": null
}
},
],
"DNSLookupSuccesses": [
{
"DNSTransactionID": 3,
Expand Down Expand Up @@ -233,7 +233,7 @@
"X-Generator": true
},
"HTTPResponseLocation": null,
"HTTPResponseTitle": "Nexa Center for Internet & Society | Il centro Nexa è un centro di ricerca del Dipartimento di Automatica e Informatica del Politecnico di Torino",
"HTTPResponseTitle": "Nexa Center for Internet \u0026 Society | Il centro Nexa è un centro di ricerca del Dipartimento di Automatica e Informatica del Politecnico di Torino",
"HTTPResponseIsFinal": true,
"ControlDNSDomain": null,
"ControlDNSLookupFailure": null,
Expand All @@ -260,7 +260,7 @@
"X-Frame-Options": true,
"X-Generator": true
},
"ControlHTTPResponseTitle": "Nexa Center for Internet & Society | Il centro Nexa è un centro di ricerca del Dipartimento di Automatica e Informatica del Politecnico di Torino"
"ControlHTTPResponseTitle": "Nexa Center for Internet \u0026 Society | Il centro Nexa è un centro di ricerca del Dipartimento di Automatica e Informatica del Politecnico di Torino"
}
}
}
11 changes: 7 additions & 4 deletions internal/measurexlite/web.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@ package measurexlite

import "regexp"

// webTitleRegexp is the regexp to extract the title
//
// MK used {1,128} but we're making it larger here to get longer titles
// e.g. <http://www.isa.gov.il/Pages/default.aspx>'s one
var webTitleRegexp = regexp.MustCompile(`(?i)<title>([^<]{1,512})</title>`)

// WebGetTitle returns the title or an empty string.
func WebGetTitle(measurementBody string) string {
// MK used {1,128} but we're making it larger here to get longer titles
// e.g. <http://www.isa.gov.il/Pages/default.aspx>'s one
re := regexp.MustCompile(`(?i)<title>([^<]{1,512})</title>`)
v := re.FindStringSubmatch(measurementBody)
v := webTitleRegexp.FindStringSubmatch(measurementBody)
if len(v) < 2 {
return ""
}
Expand Down
12 changes: 6 additions & 6 deletions internal/minipipeline/analysis_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ import (
func TestWebAnalysisComputeDNSExperimentFailure(t *testing.T) {
t.Run("when there's no DNSDomain", func(t *testing.T) {
container := &WebObservationsContainer{
DNSLookupFailures: map[int64]*WebObservation{
1: {
DNSLookupFailures: []*WebObservation{
{
DNSTransactionID: optional.Some(int64(1)),
DNSDomain: optional.None[string](), // explicitly set
DNSLookupFailure: optional.Some("dns_no_answer"),
Expand All @@ -30,8 +30,8 @@ func TestWebAnalysisComputeDNSExperimentFailure(t *testing.T) {

t.Run("when DNSDomain does not match ControlDNSDomain", func(t *testing.T) {
container := &WebObservationsContainer{
DNSLookupFailures: map[int64]*WebObservation{
1: {
DNSLookupFailures: []*WebObservation{
{
DNSTransactionID: optional.Some(int64(1)),
DNSDomain: optional.Some("dns.google.com"),
DNSLookupFailure: optional.Some("dns_no_answer"),
Expand All @@ -52,8 +52,8 @@ func TestWebAnalysisComputeDNSExperimentFailure(t *testing.T) {

t.Run("when the failure is dns_no_answer for AAAA", func(t *testing.T) {
container := &WebObservationsContainer{
DNSLookupFailures: map[int64]*WebObservation{
1: {
DNSLookupFailures: []*WebObservation{
{
DNSTransactionID: optional.Some(int64(1)),
DNSDomain: optional.Some("dns.google.com"),
DNSLookupFailure: optional.Some("dns_no_answer"),
Expand Down
9 changes: 5 additions & 4 deletions internal/minipipeline/observation.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,9 @@ type WebObservationsContainer struct {
// ID space, i.e., you can't see the same transaction ID in both. Transaction IDs
// are strictly positive unique numbers within the same OONI measurement. Note
// that the A and AAAA events for the same DNS lookup uses the same transaction ID
// until we fix the https://github.com/ooni/probe/issues/2624 issue.
DNSLookupFailures map[int64]*WebObservation
// until we fix the https://github.com/ooni/probe/issues/2624 issue. For this
// reason DNSLookupFailure and DNSLookupSuccesses MUST be slices.
DNSLookupFailures []*WebObservation

// DNSLookupSuccesses contains all the successful transactions.
DNSLookupSuccesses []*WebObservation
Expand All @@ -240,7 +241,7 @@ type WebObservationsContainer struct {
// NewWebObservationsContainer constructs a [*WebObservationsContainer].
func NewWebObservationsContainer() *WebObservationsContainer {
return &WebObservationsContainer{
DNSLookupFailures: map[int64]*WebObservation{},
DNSLookupFailures: []*WebObservation{},
DNSLookupSuccesses: []*WebObservation{},
KnownTCPEndpoints: map[int64]*WebObservation{},
knownIPAddresses: map[string]*WebObservation{},
Expand Down Expand Up @@ -271,7 +272,7 @@ func (c *WebObservationsContainer) ingestDNSLookupFailures(evs ...*model.Archiva
}

// add record
c.DNSLookupFailures[ev.TransactionID] = obs
c.DNSLookupFailures = append(c.DNSLookupFailures, obs)
}
}

Expand Down
8 changes: 4 additions & 4 deletions internal/minipipeline/observation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func TestLoadWebObservations(t *testing.T) {
func TestWebObservationsContainerIngestTLSHandshakeEvents(t *testing.T) {
t.Run("when we don't have any known TCP endpoint", func(t *testing.T) {
container := &WebObservationsContainer{
DNSLookupFailures: map[int64]*WebObservation{},
DNSLookupFailures: []*WebObservation{},
KnownTCPEndpoints: map[int64]*WebObservation{}, // this map must be empty in this test
knownIPAddresses: map[string]*WebObservation{},
}
Expand Down Expand Up @@ -87,7 +87,7 @@ func TestWebObservationsContainerIngestTLSHandshakeEvents(t *testing.T) {
func TestWebObservationsContainerIngestHTTPRoundTripEvents(t *testing.T) {
t.Run("when we don't have any known TCP endpoint", func(t *testing.T) {
container := &WebObservationsContainer{
DNSLookupFailures: map[int64]*WebObservation{},
DNSLookupFailures: []*WebObservation{},
KnownTCPEndpoints: map[int64]*WebObservation{}, // this map must be empty in this test
knownIPAddresses: map[string]*WebObservation{},
}
Expand Down Expand Up @@ -117,7 +117,7 @@ func TestWebObservationsContainerIngestHTTPRoundTripEvents(t *testing.T) {
func TestWebObservationsContainerIngestControlMessages(t *testing.T) {
t.Run("we don't set MatchWithControlIPAddressASN when we don't have probe ASN info", func(t *testing.T) {
container := &WebObservationsContainer{
DNSLookupFailures: map[int64]*WebObservation{},
DNSLookupFailures: []*WebObservation{},
KnownTCPEndpoints: map[int64]*WebObservation{
1: {
DNSDomain: optional.Some("dns.google"),
Expand Down Expand Up @@ -164,7 +164,7 @@ func TestWebObservationsContainerIngestControlMessages(t *testing.T) {

t.Run("we don't save TLS handshake failures when the SNI is different", func(t *testing.T) {
container := &WebObservationsContainer{
DNSLookupFailures: map[int64]*WebObservation{},
DNSLookupFailures: []*WebObservation{},
KnownTCPEndpoints: map[int64]*WebObservation{
1: {
IPAddress: optional.Some("8.8.8.8"),
Expand Down
Loading

0 comments on commit 7e10550

Please sign in to comment.