Skip to content

Commit

Permalink
refactor(x/tally): reveal data json parsing based on strings
Browse files Browse the repository at this point in the history
  • Loading branch information
hacheigriega committed Jan 28, 2025
1 parent 69cf01f commit 266c505
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 80 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ require (
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/jmespath/go-jmespath v0.4.0 // indirect
github.com/jmhodges/levigo v1.0.0 // indirect
github.com/josharian/atox v0.0.0-20220719193034-5fa70815fecc // indirect
github.com/klauspost/compress v1.17.9 // indirect
github.com/kr/pretty v0.3.1 // indirect
github.com/kr/text v0.2.0 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,8 @@ github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfC
github.com/jmhodges/levigo v1.0.0 h1:q5EC36kV79HWeTBWsod3mG11EgStG3qArTKcvlksN1U=
github.com/jmhodges/levigo v1.0.0/go.mod h1:Q6Qx+uH3RAqyK4rFQroq9RL7mdkABMcfhEI+nNuzMJQ=
github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo=
github.com/josharian/atox v0.0.0-20220719193034-5fa70815fecc h1:IXLfvWHYgu6dRCQPZ2fEx4KP6J+LP1q7hzvrUTFWrbY=
github.com/josharian/atox v0.0.0-20220719193034-5fa70815fecc/go.mod h1:8aqLq+j+a8UzykKUzXfICLW1mCHAYSENiWhfyeAOfV8=
github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
github.com/jrick/logrotate v1.0.0/go.mod h1:LNinyqDIJnpAur+b8yyulnQw/wDuN1+BYKlTRt3OuAQ=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
Expand Down
83 changes: 12 additions & 71 deletions x/tally/types/filters.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ package types
import (
"bytes"
"encoding/binary"
"encoding/json"
"slices"
"strconv"

"golang.org/x/exp/constraints"

"github.com/josharian/atox"
)

var (
Expand Down Expand Up @@ -98,7 +98,7 @@ func (f FilterMode) ApplyFilter(reveals []RevealBody, errors []bool) ([]bool, bo
type FilterStdDev struct {
maxSigma Sigma
dataPath string // JSON path to reveal data
filterFunc func(dataList []any, maxSigma Sigma, errors []bool, replicationFactor uint16) ([]bool, bool)
filterFunc func(dataList []string, maxSigma Sigma, errors []bool, replicationFactor uint16) ([]bool, bool)
replicationFactor uint16
}

Expand Down Expand Up @@ -126,13 +126,13 @@ func NewFilterStdDev(input []byte, gasCostMultiplier uint64, replicationFactor u

switch input[9] {
case 0x00: // Int32
filter.filterFunc = detectOutliersSignedInteger[int32]
filter.filterFunc = detectOutliersInteger[int32]
case 0x01: // Int64
filter.filterFunc = detectOutliersSignedInteger[int64]
filter.filterFunc = detectOutliersInteger[int64]
case 0x02: // Uint32
filter.filterFunc = detectOutliersUnsignedInt[uint32]
filter.filterFunc = detectOutliersInteger[uint32]
case 0x03: // Uint64
filter.filterFunc = detectOutliersUnsignedInt[uint64]
filter.filterFunc = detectOutliersInteger[uint64]
default:
return filter, ErrInvalidNumberType
}
Expand Down Expand Up @@ -168,82 +168,22 @@ func (f FilterStdDev) ApplyFilter(reveals []RevealBody, errors []bool) ([]bool,
return f.filterFunc(dataList, f.maxSigma, errors, f.replicationFactor)
}

func detectOutliersSignedInteger[T constraints.Integer](dataList []any, maxSigma Sigma, errors []bool, replicationFactor uint16) ([]bool, bool) {
func detectOutliersInteger[T constraints.Integer](dataList []string, maxSigma Sigma, errors []bool, replicationFactor uint16) ([]bool, bool) {
nums := make([]T, 0, len(dataList))
corruptQueue := make([]int, 0, len(dataList)) // queue of corrupt indices in dataList
for i, data := range dataList {
if data == nil {
errors[i] = true
corruptQueue = append(corruptQueue, i)
continue
}
num, ok := data.(int64)
if !ok {
if data == "" {
errors[i] = true
corruptQueue = append(corruptQueue, i)
continue
}
nums = append(nums, T(num))
}

// Construct outliers list.
outliers := make([]bool, len(dataList))
if len(nums) == 0 {
return outliers, false
}
median := findMedian(nums)
var numsInd, nonOutlierCount int
for i := range outliers {
if len(corruptQueue) > 0 && i == corruptQueue[0] {
outliers[i] = true
corruptQueue = corruptQueue[1:]
} else {
if median.IsWithinSigma(nums[numsInd], maxSigma) {
nonOutlierCount++
} else {
outliers[i] = true
}
numsInd++
}
}

// If less than 2/3 of the numbers fall within max sigma range
// from the median, there is no consensus in reveal data.
if nonOutlierCount*3 < int(replicationFactor)*2 {
return outliers, false
}
return outliers, true
}

func detectOutliersUnsignedInt[T constraints.Unsigned](dataList []any, maxSigma Sigma, errors []bool, replicationFactor uint16) ([]bool, bool) {
nums := make([]T, 0, len(dataList))
corruptQueue := make([]int, 0, len(dataList)) // queue of corrupt indices in dataList
for i, data := range dataList {
if data == nil {
num, err := atox.N[T](data)
if err != nil {
errors[i] = true
corruptQueue = append(corruptQueue, i)
continue
}
var num T
num1, ok := data.(int64)
if !ok {
jsonNum, ok := data.(json.Number)
if !ok {
errors[i] = true
corruptQueue = append(corruptQueue, i)
continue
}

num2, err := strconv.ParseUint(jsonNum.String(), 10, 64)
if err != nil {
errors[i] = true
corruptQueue = append(corruptQueue, i)
continue
}
num = T(num2)
} else {
num = T(num1)
}
nums = append(nums, num)
}

Expand All @@ -252,6 +192,7 @@ func detectOutliersUnsignedInt[T constraints.Unsigned](dataList []any, maxSigma
if len(nums) == 0 {
return outliers, false
}

median := findMedian(nums)
var numsInd, nonOutlierCount int
for i := range outliers {
Expand Down
19 changes: 10 additions & 9 deletions x/tally/types/filters_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,25 @@ package types
import (
"encoding/base64"

"github.com/ohler55/ojg/gen"
"github.com/ohler55/ojg/jp"
"github.com/ohler55/ojg/oj"
)

type dataAttributes struct {
freqMap map[any]int // frequency map from data to frequency
maxFreq int // frequency of most frequent data in data list
freqMap map[string]int // frequency map from data to frequency
maxFreq int // frequency of most frequent data in data list
}

// parseReveals parses a list of RevealBody objects using the given
// data path and returns a parsed data list along with its attributes.
// It also updates the given errors list to indicate true for the items
// that are corrupted. Note when an i-th reveal is corrupted, the i-th
// item in the data list is left as nil.
func parseReveals(reveals []RevealBody, dataPath string, errors []bool) ([]any, dataAttributes) {
func parseReveals(reveals []RevealBody, dataPath string, errors []bool) ([]string, dataAttributes) {
var parser gen.Parser
var maxFreq int
freq := make(map[any]int, len(reveals))
dataList := make([]any, len(reveals))
freq := make(map[string]int, len(reveals))
dataList := make([]string, len(reveals))
for i, r := range reveals {
if r.ExitCode != 0 {
errors[i] = true
Expand All @@ -32,7 +33,7 @@ func parseReveals(reveals []RevealBody, dataPath string, errors []bool) ([]any,
errors[i] = true
continue
}
obj, err := oj.Parse(revealBytes)
obj, err := parser.Parse(revealBytes)
if err != nil {
errors[i] = true
continue
Expand All @@ -42,12 +43,12 @@ func parseReveals(reveals []RevealBody, dataPath string, errors []bool) ([]any,
errors[i] = true
continue
}
elems := expr.Get(obj)
elems := expr.GetNodes(obj)
if len(elems) < 1 {
errors[i] = true
continue
}
data := elems[0]
data := elems[0].String()

freq[data]++
maxFreq = max(freq[data], maxFreq)
Expand Down

0 comments on commit 266c505

Please sign in to comment.