From 266c5054e3d42a8b7582ab108a25fc67b2bab760 Mon Sep 17 00:00:00 2001 From: hacheigriega Date: Fri, 24 Jan 2025 14:36:48 -0500 Subject: [PATCH] refactor(x/tally): reveal data json parsing based on strings --- go.mod | 1 + go.sum | 2 + x/tally/types/filters.go | 83 +++++------------------------------ x/tally/types/filters_util.go | 19 ++++---- 4 files changed, 25 insertions(+), 80 deletions(-) diff --git a/go.mod b/go.mod index 5c076b6b..f5a61491 100644 --- a/go.mod +++ b/go.mod @@ -172,6 +172,7 @@ require ( github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/jmhodges/levigo v1.0.0 // indirect + github.com/josharian/atox v0.0.0-20220719193034-5fa70815fecc // indirect github.com/klauspost/compress v1.17.9 // indirect github.com/kr/pretty v0.3.1 // indirect github.com/kr/text v0.2.0 // indirect diff --git a/go.sum b/go.sum index e4ff64d5..7d665660 100644 --- a/go.sum +++ b/go.sum @@ -779,6 +779,8 @@ github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfC github.com/jmhodges/levigo v1.0.0 h1:q5EC36kV79HWeTBWsod3mG11EgStG3qArTKcvlksN1U= github.com/jmhodges/levigo v1.0.0/go.mod h1:Q6Qx+uH3RAqyK4rFQroq9RL7mdkABMcfhEI+nNuzMJQ= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= +github.com/josharian/atox v0.0.0-20220719193034-5fa70815fecc h1:IXLfvWHYgu6dRCQPZ2fEx4KP6J+LP1q7hzvrUTFWrbY= +github.com/josharian/atox v0.0.0-20220719193034-5fa70815fecc/go.mod h1:8aqLq+j+a8UzykKUzXfICLW1mCHAYSENiWhfyeAOfV8= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/jrick/logrotate v1.0.0/go.mod h1:LNinyqDIJnpAur+b8yyulnQw/wDuN1+BYKlTRt3OuAQ= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= diff --git a/x/tally/types/filters.go b/x/tally/types/filters.go index 3ab456ad..db08cf22 100644 --- a/x/tally/types/filters.go +++ b/x/tally/types/filters.go @@ -3,11 +3,11 @@ package types import ( "bytes" "encoding/binary" - "encoding/json" "slices" - "strconv" "golang.org/x/exp/constraints" + + "github.com/josharian/atox" ) var ( @@ -98,7 +98,7 @@ func (f FilterMode) ApplyFilter(reveals []RevealBody, errors []bool) ([]bool, bo type FilterStdDev struct { maxSigma Sigma dataPath string // JSON path to reveal data - filterFunc func(dataList []any, maxSigma Sigma, errors []bool, replicationFactor uint16) ([]bool, bool) + filterFunc func(dataList []string, maxSigma Sigma, errors []bool, replicationFactor uint16) ([]bool, bool) replicationFactor uint16 } @@ -126,13 +126,13 @@ func NewFilterStdDev(input []byte, gasCostMultiplier uint64, replicationFactor u switch input[9] { case 0x00: // Int32 - filter.filterFunc = detectOutliersSignedInteger[int32] + filter.filterFunc = detectOutliersInteger[int32] case 0x01: // Int64 - filter.filterFunc = detectOutliersSignedInteger[int64] + filter.filterFunc = detectOutliersInteger[int64] case 0x02: // Uint32 - filter.filterFunc = detectOutliersUnsignedInt[uint32] + filter.filterFunc = detectOutliersInteger[uint32] case 0x03: // Uint64 - filter.filterFunc = detectOutliersUnsignedInt[uint64] + filter.filterFunc = detectOutliersInteger[uint64] default: return filter, ErrInvalidNumberType } @@ -168,82 +168,22 @@ func (f FilterStdDev) ApplyFilter(reveals []RevealBody, errors []bool) ([]bool, return f.filterFunc(dataList, f.maxSigma, errors, f.replicationFactor) } -func detectOutliersSignedInteger[T constraints.Integer](dataList []any, maxSigma Sigma, errors []bool, replicationFactor uint16) ([]bool, bool) { +func detectOutliersInteger[T constraints.Integer](dataList []string, maxSigma Sigma, errors []bool, replicationFactor uint16) ([]bool, bool) { nums := make([]T, 0, len(dataList)) corruptQueue := make([]int, 0, len(dataList)) // queue of corrupt indices in dataList for i, data := range dataList { - if data == nil { - errors[i] = true - corruptQueue = append(corruptQueue, i) - continue - } - num, ok := data.(int64) - if !ok { + if data == "" { errors[i] = true corruptQueue = append(corruptQueue, i) continue } - nums = append(nums, T(num)) - } - - // Construct outliers list. - outliers := make([]bool, len(dataList)) - if len(nums) == 0 { - return outliers, false - } - median := findMedian(nums) - var numsInd, nonOutlierCount int - for i := range outliers { - if len(corruptQueue) > 0 && i == corruptQueue[0] { - outliers[i] = true - corruptQueue = corruptQueue[1:] - } else { - if median.IsWithinSigma(nums[numsInd], maxSigma) { - nonOutlierCount++ - } else { - outliers[i] = true - } - numsInd++ - } - } - - // If less than 2/3 of the numbers fall within max sigma range - // from the median, there is no consensus in reveal data. - if nonOutlierCount*3 < int(replicationFactor)*2 { - return outliers, false - } - return outliers, true -} -func detectOutliersUnsignedInt[T constraints.Unsigned](dataList []any, maxSigma Sigma, errors []bool, replicationFactor uint16) ([]bool, bool) { - nums := make([]T, 0, len(dataList)) - corruptQueue := make([]int, 0, len(dataList)) // queue of corrupt indices in dataList - for i, data := range dataList { - if data == nil { + num, err := atox.N[T](data) + if err != nil { errors[i] = true corruptQueue = append(corruptQueue, i) continue } - var num T - num1, ok := data.(int64) - if !ok { - jsonNum, ok := data.(json.Number) - if !ok { - errors[i] = true - corruptQueue = append(corruptQueue, i) - continue - } - - num2, err := strconv.ParseUint(jsonNum.String(), 10, 64) - if err != nil { - errors[i] = true - corruptQueue = append(corruptQueue, i) - continue - } - num = T(num2) - } else { - num = T(num1) - } nums = append(nums, num) } @@ -252,6 +192,7 @@ func detectOutliersUnsignedInt[T constraints.Unsigned](dataList []any, maxSigma if len(nums) == 0 { return outliers, false } + median := findMedian(nums) var numsInd, nonOutlierCount int for i := range outliers { diff --git a/x/tally/types/filters_util.go b/x/tally/types/filters_util.go index e2c9f529..97b1d825 100644 --- a/x/tally/types/filters_util.go +++ b/x/tally/types/filters_util.go @@ -3,13 +3,13 @@ package types import ( "encoding/base64" + "github.com/ohler55/ojg/gen" "github.com/ohler55/ojg/jp" - "github.com/ohler55/ojg/oj" ) type dataAttributes struct { - freqMap map[any]int // frequency map from data to frequency - maxFreq int // frequency of most frequent data in data list + freqMap map[string]int // frequency map from data to frequency + maxFreq int // frequency of most frequent data in data list } // parseReveals parses a list of RevealBody objects using the given @@ -17,10 +17,11 @@ type dataAttributes struct { // It also updates the given errors list to indicate true for the items // that are corrupted. Note when an i-th reveal is corrupted, the i-th // item in the data list is left as nil. -func parseReveals(reveals []RevealBody, dataPath string, errors []bool) ([]any, dataAttributes) { +func parseReveals(reveals []RevealBody, dataPath string, errors []bool) ([]string, dataAttributes) { + var parser gen.Parser var maxFreq int - freq := make(map[any]int, len(reveals)) - dataList := make([]any, len(reveals)) + freq := make(map[string]int, len(reveals)) + dataList := make([]string, len(reveals)) for i, r := range reveals { if r.ExitCode != 0 { errors[i] = true @@ -32,7 +33,7 @@ func parseReveals(reveals []RevealBody, dataPath string, errors []bool) ([]any, errors[i] = true continue } - obj, err := oj.Parse(revealBytes) + obj, err := parser.Parse(revealBytes) if err != nil { errors[i] = true continue @@ -42,12 +43,12 @@ func parseReveals(reveals []RevealBody, dataPath string, errors []bool) ([]any, errors[i] = true continue } - elems := expr.Get(obj) + elems := expr.GetNodes(obj) if len(elems) < 1 { errors[i] = true continue } - data := elems[0] + data := elems[0].String() freq[data]++ maxFreq = max(freq[data], maxFreq)