Skip to content

Commit

Permalink
Statistics for multiple branches (#7558)
Browse files Browse the repository at this point in the history
* starter

* prog

* flush out statsdb interface fork, and noms implementation

* more progress on tests

* StatsIO tests passing

* pretty close, thread canceller still broken

* check context dropped inside refresh loop

* tidy

* nil panic

* dolt harness setup imports setup statistics

* lot of test fixes

* bump

* fmt

* fix providers race

* don't load in create database hook

* bump

* bump

* fix integration tests

* fix more bugs

* fix drop

* more test fixes

* cleanup

* missing updateActive

* simplify bucket merging

* nick comments

* bump

* fix bad merge

* tidy and edits

* [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh

* bump test fixes

* bump

* fix bats

* bump

* [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh

* redo create db to account for variety of edge cases

* try path.join

---------

Co-authored-by: max-hoffman <[email protected]>
  • Loading branch information
max-hoffman and max-hoffman authored Mar 27, 2024
1 parent 52dc3dc commit 801a82a
Show file tree
Hide file tree
Showing 40 changed files with 2,322 additions and 1,498 deletions.
33 changes: 33 additions & 0 deletions go/Godeps/LICENSES

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions go/cmd/dolt/commands/engine/sqlengine.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ import (
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/cluster"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/mysql_file_handler"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/stats"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statsnoms"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
"github.com/dolthub/dolt/go/libraries/utils/config"
"github.com/dolthub/dolt/go/store/types"
)
Expand Down Expand Up @@ -180,7 +181,7 @@ func NewSqlEngine(
"authentication_dolt_jwt": NewAuthenticateDoltJWTPlugin(config.JwksConfig),
})

statsPro := stats.NewProvider()
statsPro := statspro.NewProvider(pro, statsnoms.NewNomsStatsFactory(mrEnv.RemoteDialProvider()))
engine.Analyzer.Catalog.StatsProvider = statsPro

engine.Analyzer.ExecBuilder = rowexec.DefaultBuilder
Expand All @@ -192,7 +193,7 @@ func NewSqlEngine(

// configuring stats depends on sessionBuilder
// sessionBuilder needs ref to statsProv
if err = statsPro.Configure(ctx, sqlEngine.NewDefaultContext, bThreads, pro, dbs); err != nil {
if err = statsPro.Configure(ctx, sqlEngine.NewDefaultContext, bThreads, dbs); err != nil {
fmt.Fprintln(cli.CliErr, err)
}

Expand Down
3 changes: 2 additions & 1 deletion go/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ require (
github.com/cespare/xxhash v1.1.0
github.com/creasty/defaults v1.6.0
github.com/dolthub/flatbuffers/v23 v23.3.3-dh.2
github.com/dolthub/go-mysql-server v0.18.1-0.20240326173717-f57423646998
github.com/dolthub/go-mysql-server v0.18.1-0.20240326223629-0f6489fffde3
github.com/dolthub/swiss v0.1.0
github.com/goccy/go-json v0.10.2
github.com/google/go-github/v57 v57.0.0
Expand Down Expand Up @@ -87,6 +87,7 @@ require (
golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1
golang.org/x/text v0.14.0
gonum.org/v1/plot v0.11.0
gopkg.in/errgo.v2 v2.1.0
gopkg.in/yaml.v3 v3.0.1
)

Expand Down
5 changes: 3 additions & 2 deletions go/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,8 @@ github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U=
github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0=
github.com/dolthub/go-icu-regex v0.0.0-20230524105445-af7e7991c97e h1:kPsT4a47cw1+y/N5SSCkma7FhAPw7KeGmD6c9PBZW9Y=
github.com/dolthub/go-icu-regex v0.0.0-20230524105445-af7e7991c97e/go.mod h1:KPUcpx070QOfJK1gNe0zx4pA5sicIK1GMikIGLKC168=
github.com/dolthub/go-mysql-server v0.18.1-0.20240326173717-f57423646998 h1:mW5rmY0D0DSek6/UW0uu3B1h84Fttk+Y9Dee1abLAM8=
github.com/dolthub/go-mysql-server v0.18.1-0.20240326173717-f57423646998/go.mod h1:99WjL4v4Ozkona7YowHl20+1B5DPhDbVZnf7WP+TX1U=
github.com/dolthub/go-mysql-server v0.18.1-0.20240326223629-0f6489fffde3 h1:TePc9iACeSLxXkXuf/kYusW6I3SeSq49ebKPccJVNKg=
github.com/dolthub/go-mysql-server v0.18.1-0.20240326223629-0f6489fffde3/go.mod h1:99WjL4v4Ozkona7YowHl20+1B5DPhDbVZnf7WP+TX1U=
github.com/dolthub/ishell v0.0.0-20221214210346-d7db0b066488 h1:0HHu0GWJH0N6a6keStrHhUAK5/o9LVfkh44pvsV4514=
github.com/dolthub/ishell v0.0.0-20221214210346-d7db0b066488/go.mod h1:ehexgi1mPxRTk0Mok/pADALuHbvATulTh6gzr7NzZto=
github.com/dolthub/jsonpath v0.0.2-0.20240227200619-19675ab05c71 h1:bMGS25NWAGTEtT5tOBsCuCrlYnLRKpbJVJkDbrTRhwQ=
Expand Down Expand Up @@ -1155,6 +1155,7 @@ gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b/go.mod h1:Co6ibVJAznAaIkqp8
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw=
gopkg.in/errgo.v2 v2.1.0 h1:0vLT13EuvQ0hNvakwLuFZ/jYrLp5F3kcWHXdRggjCE8=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
gopkg.in/gcfg.v1 v1.2.3/go.mod h1:yesOnuUOFQAhST5vPY4nbZsb/huCgGGXlipJsBn0b3o=
Expand Down
4 changes: 4 additions & 0 deletions go/libraries/doltcore/dbfactory/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,15 @@ const (
// DataDir is the directory internal to the DoltDir which holds the noms files.
DataDir = "noms"

// StatsDir is the directory in DoltDir that holds the database statistics
StatsDir = "stats"

ChunkJournalParam = "journal"
)

// DoltDataDir is the directory where noms files will be stored
var DoltDataDir = filepath.Join(DoltDir, DataDir)
var DoltStatsDir = filepath.Join(DoltDir, StatsDir)

// FileFactory is a DBFactory implementation for creating local filesys backed databases
type FileFactory struct {
Expand Down
22 changes: 7 additions & 15 deletions go/libraries/doltcore/doltdb/doltdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ const (

// LocalDirDoltDB stores the db in the current directory
var LocalDirDoltDB = "file://./" + dbfactory.DoltDataDir
var LocalDirStatsDB = "file://./" + dbfactory.DoltStatsDir

// InMemDoltDB stores the DoltDB db in memory and is primarily used for testing
var InMemDoltDB = "mem://"
Expand Down Expand Up @@ -484,15 +485,6 @@ func (ddb *DoltDB) ResolveCommitRef(ctx context.Context, ref ref.DoltRef) (*Comm
return NewCommit(ctx, ddb.vrw, ddb.ns, commitVal)
}

// ResolveStatsRef takes a StatsRef and returns an address to a table.
func (ddb *DoltDB) ResolveStatsRef(ctx context.Context) (hash.Hash, bool) {
ds, err := ddb.db.GetDataset(ctx, ref.StatsRefName)
if err != nil {
return hash.Hash{}, false
}
return ds.MaybeHeadAddr()
}

// ResolveCommitRefAtRoot takes a DoltRef and returns a Commit, or an error if the commit cannot be found. The ref given must
// point to a Commit.
func (ddb *DoltDB) ResolveCommitRefAtRoot(ctx context.Context, ref ref.DoltRef, nomsRoot hash.Hash) (*Commit, error) {
Expand Down Expand Up @@ -1804,17 +1796,17 @@ func (ddb *DoltDB) AddStash(ctx context.Context, head *Commit, stash *RootValue,
return err
}

func (ddb *DoltDB) SetStatisics(ctx context.Context, addr hash.Hash) error {
statsDs, err := ddb.db.GetDataset(ctx, ref.NewStatsRef().String())
func (ddb *DoltDB) SetStatisics(ctx context.Context, branch string, addr hash.Hash) error {
statsDs, err := ddb.db.GetDataset(ctx, ref.NewStatsRef(branch).String())
if err != nil {
return err
}
_, err = ddb.db.SetStatsRef(ctx, statsDs, addr)
return err
}

func (ddb *DoltDB) DropStatisics(ctx context.Context) error {
statsDs, err := ddb.db.GetDataset(ctx, ref.NewStatsRef().String())
func (ddb *DoltDB) DropStatisics(ctx context.Context, branch string) error {
statsDs, err := ddb.db.GetDataset(ctx, ref.NewStatsRef(branch).String())

_, err = ddb.db.Delete(ctx, statsDs, "")
if err != nil {
Expand All @@ -1826,8 +1818,8 @@ func (ddb *DoltDB) DropStatisics(ctx context.Context) error {
var ErrNoStatistics = errors.New("no statistics found")

// GetStatistics returns the value of the singleton ref.StatsRef for this database
func (ddb *DoltDB) GetStatistics(ctx context.Context) (prolly.Map, error) {
ds, err := ddb.db.GetDataset(ctx, ref.NewStatsRef().String())
func (ddb *DoltDB) GetStatistics(ctx context.Context, branch string) (prolly.Map, error) {
ds, err := ddb.db.GetDataset(ctx, ref.NewStatsRef(branch).String())
if err != nil {
return prolly.Map{}, err
}
Expand Down
12 changes: 6 additions & 6 deletions go/libraries/doltcore/migrate/environment.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,12 @@ func initMigrationDB(ctx context.Context, existing *env.DoltEnv, src, dest files
}

ierr := src.Iter(doltDir, true, func(path string, size int64, isDir bool) (stop bool) {
path, err = filepath.Rel(base, path)
if err != nil {
stop = true
return
}

if isDir {
err = dest.MkDirs(path)
stop = err != nil
Expand All @@ -105,12 +111,6 @@ func initMigrationDB(ctx context.Context, existing *env.DoltEnv, src, dest files
return
}

path, err = filepath.Rel(base, path)
if err != nil {
stop = true
return
}

if err = filesys.CopyFile(path, path, src, dest); err != nil {
stop = true
return
Expand Down
2 changes: 1 addition & 1 deletion go/libraries/doltcore/ref/ref.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ func Parse(str string) (DoltRef, error) {
}

if prefix := PrefixForType(StatsRefType); strings.HasPrefix(str, prefix) {
return NewStashRef(), nil
return NewStatsRef(str[len(prefix):]), nil
}

return nil, ErrUnknownRefType
Expand Down
9 changes: 3 additions & 6 deletions go/libraries/doltcore/ref/stats_ref.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,15 @@

package ref

// StatsRefName is a dummy name, and there cannot be more than one stats ref.
const StatsRefName = "stats"

type StatsRef struct {
stats string
}

var _ DoltRef = StatsRef{}

// NewStatsRef creates a reference to a statses list. There cannot be more than one statsRef.
func NewStatsRef() StatsRef {
return StatsRef{StatsRefName}
// NewStatsRef creates a reference to a statistic dataset head.
func NewStatsRef(branch string) StatsRef {
return StatsRef{branch}
}

// GetType will return StatsRefType
Expand Down
3 changes: 0 additions & 3 deletions go/libraries/doltcore/schema/statistic.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,6 @@ func StatsTableSqlSchema(dbName string) sql.PrimaryKeySchema {
&sql.Column{Name: StatsDbColName, Type: types.Text, PrimaryKey: true, DatabaseSource: dbName},
&sql.Column{Name: StatsTableColName, Type: types.Text, PrimaryKey: true, DatabaseSource: dbName},
&sql.Column{Name: StatsIndexColName, Type: types.Text, PrimaryKey: true, DatabaseSource: dbName},
&sql.Column{Name: StatsPositionColName, Type: types.Int64, PrimaryKey: true, DatabaseSource: dbName},
&sql.Column{Name: StatsVersionColName, Type: types.Int64, DatabaseSource: dbName},
&sql.Column{Name: StatsCommitHashColName, Type: types.Text, DatabaseSource: dbName},
&sql.Column{Name: StatsRowCountColName, Type: types.Int64, DatabaseSource: dbName},
&sql.Column{Name: StatsDistinctCountColName, Type: types.Int64, DatabaseSource: dbName},
&sql.Column{Name: StatsNullCountColName, Type: types.Int64, DatabaseSource: dbName},
Expand Down
4 changes: 2 additions & 2 deletions go/libraries/doltcore/sqle/cluster/initdbhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ func NewInitDatabaseHook(controller *Controller, bt *sql.BackgroundThreads, orig
if controller == nil {
return orig
}
return func(ctx *sql.Context, pro *sqle.DoltDatabaseProvider, name string, denv *env.DoltEnv) error {
return func(ctx *sql.Context, pro *sqle.DoltDatabaseProvider, name string, denv *env.DoltEnv, db dsess.SqlDatabase) error {
var err error
err = orig(ctx, pro, name, denv)
err = orig(ctx, pro, name, denv, db)
if err != nil {
return err
}
Expand Down
10 changes: 5 additions & 5 deletions go/libraries/doltcore/sqle/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ func (db Database) GetTableInsensitive(ctx *sql.Context, tblName string) (sql.Ta
return nil, false, err
}

return db.getTableInsensitive(ctx, nil, ds, root, tblName)
return db.getTableInsensitive(ctx, nil, ds, root, tblName, "")
}

// GetTableInsensitiveAsOf implements sql.VersionedDatabase
Expand All @@ -271,7 +271,7 @@ func (db Database) GetTableInsensitiveAsOf(ctx *sql.Context, tableName string, a

sess := dsess.DSessFromSess(ctx.Session)

table, ok, err := db.getTableInsensitive(ctx, head, sess, root, tableName)
table, ok, err := db.getTableInsensitive(ctx, head, sess, root, tableName, asOf)
if err != nil {
return nil, false, err
}
Expand Down Expand Up @@ -305,7 +305,7 @@ func (db Database) GetTableInsensitiveAsOf(ctx *sql.Context, tableName string, a

}

func (db Database) getTableInsensitive(ctx *sql.Context, head *doltdb.Commit, ds *dsess.DoltSession, root *doltdb.RootValue, tblName string) (sql.Table, bool, error) {
func (db Database) getTableInsensitive(ctx *sql.Context, head *doltdb.Commit, ds *dsess.DoltSession, root *doltdb.RootValue, tblName string, asOf interface{}) (sql.Table, bool, error) {
lwrName := strings.ToLower(tblName)

// TODO: these tables that cache a root value at construction time should not, they need to get it from the session
Expand Down Expand Up @@ -365,7 +365,7 @@ func (db Database) getTableInsensitive(ctx *sql.Context, head *doltdb.Commit, ds

case strings.HasPrefix(lwrName, doltdb.DoltConfTablePrefix):
suffix := tblName[len(doltdb.DoltConfTablePrefix):]
srcTable, ok, err := db.getTableInsensitive(ctx, head, ds, root, suffix)
srcTable, ok, err := db.getTableInsensitive(ctx, head, ds, root, suffix, asOf)
if err != nil {
return nil, false, err
} else if !ok {
Expand Down Expand Up @@ -488,7 +488,7 @@ func (db Database) getTableInsensitive(ctx *sql.Context, head *doltdb.Commit, ds
dt, found = dtables.NewDocsTable(ctx, versionableTable), true
}
case doltdb.StatisticsTableName:
dt, found = dtables.NewStatisticsTable(ctx, db.Name(), db.ddb), true
dt, found = dtables.NewStatisticsTable(ctx, db.Name(), db.ddb, asOf), true
}

if found {
Expand Down
14 changes: 11 additions & 3 deletions go/libraries/doltcore/sqle/database_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ type DoltDatabaseProvider struct {
isStandby *bool
}

func (p *DoltDatabaseProvider) DefaultBranch() string {
return p.defaultBranch
}

func (p *DoltDatabaseProvider) WithTableFunctions(fns ...sql.TableFunction) (sql.TableFunctionProvider, error) {
funcs := make(map[string]sql.TableFunction)
for _, fn := range fns {
Expand Down Expand Up @@ -454,12 +458,12 @@ func (p *DoltDatabaseProvider) CreateCollatedDatabase(ctx *sql.Context, name str
return p.registerNewDatabase(ctx, name, newEnv)
}

type InitDatabaseHook func(ctx *sql.Context, pro *DoltDatabaseProvider, name string, env *env.DoltEnv) error
type InitDatabaseHook func(ctx *sql.Context, pro *DoltDatabaseProvider, name string, env *env.DoltEnv, db dsess.SqlDatabase) error
type DropDatabaseHook func(name string)

// ConfigureReplicationDatabaseHook sets up replication for a newly created database as necessary
// TODO: consider the replication heads / all heads setting
func ConfigureReplicationDatabaseHook(ctx *sql.Context, p *DoltDatabaseProvider, name string, newEnv *env.DoltEnv) error {
func ConfigureReplicationDatabaseHook(ctx *sql.Context, p *DoltDatabaseProvider, name string, newEnv *env.DoltEnv, _ dsess.SqlDatabase) error {
_, replicationRemoteName, _ := sql.SystemVariables.GetGlobal(dsess.ReplicateToRemote)
if replicationRemoteName == "" {
return nil
Expand Down Expand Up @@ -616,6 +620,10 @@ func (p *DoltDatabaseProvider) DropDatabase(ctx *sql.Context, name string) error
if err != nil {
return err
}
err = dbfactory.DeleteFromSingletonCache(filepath.ToSlash(dropDbLoc + "/.dolt/stats/.dolt/noms"))
if err != nil {
return err
}

err = p.droppedDatabaseManager.DropDatabase(ctx, name, dropDbLoc)
if err != nil {
Expand Down Expand Up @@ -702,7 +710,7 @@ func (p *DoltDatabaseProvider) registerNewDatabase(ctx *sql.Context, name string
// If we have an initialization hook, invoke it. By default, this will
// be ConfigureReplicationDatabaseHook, which will setup replication
// for the new database if a remote url template is set.
err = p.InitDatabaseHook(ctx, p, name, newEnv)
err = p.InitDatabaseHook(ctx, p, name, newEnv, db)
if err != nil {
return err
}
Expand Down
9 changes: 7 additions & 2 deletions go/libraries/doltcore/sqle/dprocedures/stats_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ func statsFunc(fn func(ctx *sql.Context) (interface{}, error)) func(ctx *sql.Con
type AutoRefreshStatsProvider interface {
sql.StatsProvider
CancelRefreshThread(string)
StartRefreshThread(*sql.Context, dsess.DoltDatabaseProvider, string, *env.DoltEnv) error
StartRefreshThread(*sql.Context, dsess.DoltDatabaseProvider, string, *env.DoltEnv, dsess.SqlDatabase) error
ThreadStatus(string) string
}

Expand All @@ -68,9 +68,14 @@ func statsRestart(ctx *sql.Context) (interface{}, error) {

dEnv := env.Load(ctx, env.GetCurrentUserHomeDir, newFs, pro.DbFactoryUrl(), "TODO")

sqlDb, ok := pro.BaseDatabase(ctx, dbName)
if !ok {
return nil, fmt.Errorf("failed to restart stats collection: database not found: %s", dbName)
}

afp.CancelRefreshThread(dbName)

err = afp.StartRefreshThread(ctx, pro, dbName, dEnv)
err = afp.StartRefreshThread(ctx, pro, dbName, dEnv, sqlDb)
if err != nil {
return nil, fmt.Errorf("failed to restart collection: %w", err)
}
Expand Down
1 change: 1 addition & 0 deletions go/libraries/doltcore/sqle/dsess/variables.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ const (
DoltStatsAutoRefreshThreshold = "dolt_stats_auto_refresh_threshold"
DoltStatsAutoRefreshInterval = "dolt_stats_auto_refresh_interval"
DoltStatsMemoryOnly = "dolt_stats_memory_only"
DoltStatsBranches = "dolt_stats_branches"
)

const URLTemplateDatabasePlaceholder = "{database}"
Expand Down
Loading

0 comments on commit 801a82a

Please sign in to comment.