Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: parser to parse test files and load testcases #69

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ go 1.22.0
toolchain go1.22.3

require (
github.com/alecthomas/participle/v2 v2.0.0
github.com/antlr4-go/antlr/v4 v4.13.1
github.com/cockroachdb/apd/v3 v3.2.1
github.com/creasty/defaults v1.8.0
Expand Down
8 changes: 0 additions & 8 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
github.com/alecthomas/assert/v2 v2.2.2 h1:Z/iVC0xZfWTaFNE6bA3z07T86hd45Xe2eLt6WVy2bbk=
github.com/alecthomas/assert/v2 v2.2.2/go.mod h1:pXcQ2Asjp247dahGEmsZ6ru0UVwnkhktn7S0bBDLxvQ=
github.com/alecthomas/participle/v2 v2.0.0 h1:Fgrq+MbuSsJwIkw3fEj9h75vDP0Er5JzepJ0/HNHv0g=
github.com/alecthomas/participle/v2 v2.0.0/go.mod h1:rAKZdJldHu8084ojcWevWAL8KmEU+AT+Olodb+WoN2Y=
github.com/alecthomas/repr v0.2.0 h1:HAzS41CIzNW5syS8Mf9UwXhNH1J9aix/BvDRf1Ml2Yk=
github.com/alecthomas/repr v0.2.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
github.com/antlr4-go/antlr/v4 v4.13.1 h1:SqQKkuVZ+zWkMMNkjy5FZe5mr5WURWnlpmOuzYWrPrQ=
github.com/antlr4-go/antlr/v4 v4.13.1/go.mod h1:GKmUxMtwp6ZgGwZSva4eWPC5mS6vUAmOABFgjdkM7Nw=
github.com/cockroachdb/apd/v3 v3.2.1 h1:U+8j7t0axsIgvQUqthuNm82HIrYXodOV2iWLWtEaIwg=
Expand Down Expand Up @@ -33,8 +27,6 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
Expand Down
7 changes: 5 additions & 2 deletions grammar/generate.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
package grammar

//go:generate wget -nc https://www.antlr.org/download/antlr-4.13.2-complete.jar
//go:generate wget https://raw.githubusercontent.com/substrait-io/substrait/820085fc913692147d0c8fdfcbf289fb8b348835/grammar/SubstraitLexer.g4
//go:generate wget https://raw.githubusercontent.com/substrait-io/substrait/820085fc913692147d0c8fdfcbf289fb8b348835/grammar/SubstraitType.g4
//go:generate wget https://raw.githubusercontent.com/substrait-io/substrait/c18c0c1347376efa4dfab503bb4db9f820df3cf3/grammar/SubstraitLexer.g4
//go:generate wget https://raw.githubusercontent.com/substrait-io/substrait/c18c0c1347376efa4dfab503bb4db9f820df3cf3/grammar/SubstraitType.g4
//go:generate wget https://raw.githubusercontent.com/substrait-io/substrait/c18c0c1347376efa4dfab503bb4db9f820df3cf3/grammar/FuncTestCaseLexer.g4
//go:generate wget https://raw.githubusercontent.com/substrait-io/substrait/c18c0c1347376efa4dfab503bb4db9f820df3cf3/grammar/FuncTestCaseParser.g4
//go:generate -command antlr java -Xmx500M -cp "./antlr-4.13.2-complete.jar:$CLASSPATH" org.antlr.v4.Tool
//go:generate antlr -Dlanguage=Go -visitor -Dlanguage=Go -package baseparser -o "../types/parser/baseparser" SubstraitLexer.g4 SubstraitType.g4
//go:generate antlr -Dlanguage=Go -visitor -Dlanguage=Go -package baseparser -o "../testcases/parser/baseparser" FuncTestCaseLexer.g4 FuncTestCaseParser.g4
200 changes: 194 additions & 6 deletions literal/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

import (
"fmt"
"reflect"
"regexp"
"strconv"
"time"

"github.com/google/uuid"
Expand All @@ -14,8 +17,8 @@
return expr.NewPrimitiveLiteral[bool](value, false), nil
}

func NewInt8(value int8) (expr.Literal, error) {
return expr.NewPrimitiveLiteral[int8](value, false), nil
func NewInt8(value int8) expr.Literal {
scgkiran marked this conversation as resolved.
Show resolved Hide resolved
return expr.NewPrimitiveLiteral[int8](value, false)
}

func NewInt16(value int16) (expr.Literal, error) {
scgkiran marked this conversation as resolved.
Show resolved Hide resolved
Expand All @@ -30,12 +33,12 @@
return expr.NewPrimitiveLiteral[int64](value, false), nil
}

func NewFloat32(value float32) (expr.Literal, error) {
return expr.NewPrimitiveLiteral[float32](value, false), nil
func NewFloat32(value float32) expr.Literal {
return expr.NewPrimitiveLiteral[float32](value, false)
}

func NewFloat64(value float64) (expr.Literal, error) {
return expr.NewPrimitiveLiteral[float64](value, false), nil
func NewFloat64(value float64) expr.Literal {
return expr.NewPrimitiveLiteral[float64](value, false)
}

func NewString(value string) (expr.Literal, error) {
Expand All @@ -46,6 +49,14 @@
return expr.NewLiteral[types.Date](types.Date(days), false)
}

func NewDateFromString(value string) (expr.Literal, error) {
tm, err := time.Parse("2006-01-02", value)
if err != nil {
return nil, err
}
return NewDate(int(tm.Unix() / 86400))
}

// NewTime creates a new Time literal from the given hours, minutes, seconds and microseconds.
// The total microseconds should be in the range [0, 86400_000_000) to represent a valid time within a day.
func NewTime(hours, minutes, seconds, microseconds int32) (expr.Literal, error) {
Expand All @@ -65,12 +76,47 @@
return expr.NewLiteral[types.Time](types.Time(micros), false)
}

func NewTimeFromString(value string) (expr.Literal, error) {
ts, err := parseTimeFromString(value)
if err != nil {
return nil, err
}
seconds := ts.Hour()*3600 + ts.Minute()*60 + ts.Second()
micros := int64(seconds)*int64(1e6) + int64(ts.Nanosecond())/1e3
return NewTimeFromMicros(micros)
}

func parseTimeFromString(value string) (time.Time, error) {
if t, err := time.Parse(time.RFC3339, value); err == nil {
return t, nil
}

layoutWithoutOffset := "2006-01-02T15:04:05"
if t, err := time.Parse(layoutWithoutOffset, value); err == nil {
return t, nil
}
timeOnlyInMicros := "15:04:05.9999999"
if t, err := time.Parse(timeOnlyInMicros, value); err == nil {
return t, nil
}
timeOnlyInMillis := "15:04:05.999"
return time.Parse(timeOnlyInMillis, value)
}

// NewTimestamp creates a new Timestamp literal from a time.Time timestamp value.
// This uses the number of microseconds elapsed since January 1, 1970 00:00:00 UTC
func NewTimestamp(timestamp time.Time) (expr.Literal, error) {
return expr.NewLiteral[types.Timestamp](types.Timestamp(timestamp.UnixMicro()), false)
}

func NewTimestampFromString(value string) (expr.Literal, error) {
tm, err := parseTimeFromString(value)
if err != nil {
return nil, err
}
return NewTimestamp(tm)
}

func NewTimestampFromMicros(micros int64) (expr.Literal, error) {
return expr.NewLiteral[types.Timestamp](types.Timestamp(micros), false)
}
Expand All @@ -81,14 +127,143 @@
return expr.NewLiteral[types.TimestampTz](types.TimestampTz(timestamp.UnixMicro()), false)
}

func NewTimestampTZFromString(value string) (expr.Literal, error) {
tm, err := parseTimeFromString(value)
if err != nil {
return nil, err
}

Check warning on line 134 in literal/utils.go

View check run for this annotation

Codecov / codecov/patch

literal/utils.go#L133-L134

Added lines #L133 - L134 were not covered by tests
return NewTimestampTZ(tm)
}

func NewTimestampTZFromMicros(micros int64) (expr.Literal, error) {
return expr.NewLiteral[types.TimestampTz](types.TimestampTz(micros), false)
}

func NewIntervalYearsToMonthFromString(yearsToMonth string) (expr.Literal, error) {
years, months, err := parseIntervalYearsToMonth(yearsToMonth)
if err != nil {
return nil, err
}
return NewIntervalYearsToMonth(years, months)
}

func parseIntervalYearsToMonth(interval string) (int32, int32, error) {
if len(interval) < 3 || interval[0] != 'P' {
return 0, 0, fmt.Errorf("invalid interval format: %s", interval)
}
interval = interval[1:]
yIndex := -1
mIndex := -1
for i, c := range interval {
if c == 'Y' {
yIndex = i
} else if c == 'M' {
mIndex = i
}
}
if yIndex == -1 && mIndex == -1 {
return 0, 0, fmt.Errorf("invalid interval format: %s", interval)
}
var months, years int
var err error
if yIndex != -1 {
years, err = strconv.Atoi(interval[:yIndex])
if err != nil {
return 0, 0, err
}
interval = interval[yIndex+1:]
mIndex -= yIndex + 1
}
if mIndex > 0 {
months, err = strconv.Atoi(interval[:mIndex])
if err != nil {
return 0, 0, err
}
}
return int32(years), int32(months), nil
}

func NewIntervalYearsToMonth(years, months int32) (expr.Literal, error) {
return expr.NewLiteral[*types.IntervalYearToMonth](&types.IntervalYearToMonth{Years: years, Months: months}, false)
}

func NewIntervalDaysToSecondFromString(daysToSecond string) (expr.Literal, error) {
days, seconds, subSeconds, precision, err := parseIntervalDaysToSecond(daysToSecond)
if err != nil {
return nil, err
}
return expr.NewLiteral[*types.IntervalDayToSecond](&types.IntervalDayToSecond{
Days: days,
Seconds: seconds,
PrecisionMode: &proto.Expression_Literal_IntervalDayToSecond_Precision{
Precision: precision,
},
Subseconds: subSeconds,
}, false)
}

func parseIntervalDaysToSecond(interval string) (int32, int32, int64, int32, error) {
if len(interval) < 3 || interval[0] != 'P' {
return 0, 0, 0, 0, fmt.Errorf("invalid interval format: %s", interval)
}

// Parse interval of format P[n]DT[n]H[n]M[n]S[n]F
regex := `^P(?:(\d+)D)?(?:T(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?(?:(\d*\.\d+)F)?)?$`
r := regexp.MustCompile(regex)

// Find matches
matches := r.FindStringSubmatch(interval)
if matches == nil {
return 0, 0, 0, 0, fmt.Errorf("invalid interval format: %s", interval)
}

// Parse each component
var err error
var days, hours, minutes, seconds int
var fraction float64
var subSeconds int64
if matches[1] != "" {
days, err = strconv.Atoi(matches[1])
if err != nil {
return 0, 0, 0, 0, fmt.Errorf("invalid day value: %v", err)
}

Check warning on line 229 in literal/utils.go

View check run for this annotation

Codecov / codecov/patch

literal/utils.go#L228-L229

Added lines #L228 - L229 were not covered by tests
}
if matches[2] != "" {
hours, err = strconv.Atoi(matches[2])
if err != nil {
return 0, 0, 0, 0, fmt.Errorf("invalid hour value: %v", err)
}

Check warning on line 235 in literal/utils.go

View check run for this annotation

Codecov / codecov/patch

literal/utils.go#L234-L235

Added lines #L234 - L235 were not covered by tests
}
if matches[3] != "" {
minutes, err = strconv.Atoi(matches[3])
if err != nil {
return 0, 0, 0, 0, fmt.Errorf("invalid minute value: %v", err)
}

Check warning on line 241 in literal/utils.go

View check run for this annotation

Codecov / codecov/patch

literal/utils.go#L240-L241

Added lines #L240 - L241 were not covered by tests
}
if matches[4] != "" {
seconds, err = strconv.Atoi(matches[4])
if err != nil {
return 0, 0, 0, 0, fmt.Errorf("invalid second value: %v", err)
}

Check warning on line 247 in literal/utils.go

View check run for this annotation

Codecov / codecov/patch

literal/utils.go#L246-L247

Added lines #L246 - L247 were not covered by tests
}
if matches[5] != "" {
fraction, err = strconv.ParseFloat(matches[5], 64)
if err != nil {
return 0, 0, 0, 0, fmt.Errorf("invalid fractional second value: %v", err)
}

Check warning on line 253 in literal/utils.go

View check run for this annotation

Codecov / codecov/patch

literal/utils.go#L252-L253

Added lines #L252 - L253 were not covered by tests
}

seconds += hours*3600 + minutes*60
nanoSeconds := int64(fraction * 1e9)
subSeconds = int64(fraction * 1e6)
precision := int32(types.PrecisionMicroSeconds)
if nanoSeconds > subSeconds*1e3 {
subSeconds = nanoSeconds
precision = int32(types.PrecisionNanoSeconds)
}
return int32(days), int32(seconds), subSeconds, precision, nil
}

func NewIntervalDaysToSecond(days, seconds int32, micros int64) (expr.Literal, error) {
return expr.NewLiteral[*types.IntervalDayToSecond](&types.IntervalDayToSecond{
Days: days,
Expand Down Expand Up @@ -205,3 +380,16 @@
panic(fmt.Sprintf("unknown TimePrecision %v", precision))
}
}

func NewList(elements []expr.Literal) (expr.Literal, error) {
if len(elements) == 0 {
return nil, fmt.Errorf("empty list literal")
}
firstType := reflect.TypeOf(elements[0])
for i, e := range elements {
if reflect.TypeOf(e) != firstType {
return nil, fmt.Errorf("element %d of list literal has different type", i)
}
}
return expr.NewLiteral[expr.ListLiteralValue](elements, false)
}
Loading
Loading