Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new --unused and --unchanged options #6

Merged
merged 2 commits into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 87 additions & 18 deletions cmd/where.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (
"github.com/spf13/cobra"
gas "github.com/wtsi-hgi/go-authserver"
"github.com/wtsi-hgi/wrstat-ui/server"
"github.com/wtsi-ssg/wrstat/v5/summary"
)

type Error string
Expand All @@ -64,13 +65,15 @@ var (
whereUsers string
whereTypes string
whereSize string
whereAge int
whereAccess int
whereShowSupergroups bool
whereSupergroup string
whereCert string
whereJSON bool
whereOrder string
whereShowUG bool
whereUnused string
whereUnchanged string
)

// whereCmd represents the where command.
Expand Down Expand Up @@ -100,24 +103,34 @@ The default of 2 should give you useful results.

You can filter what files should be considered and reported on:

--groups: only consider files that have group ownership of one of these
comma-separated groups.
--users: only consider files that have user ownership of one of these
comma-separated users.
--types: only consider files that are one of these comma-separated file types,
from this set of allowed values: vcf,vcf.gz,bcf,sam,bam,cram,fasta,
fastq,fastq.gz,ped/bed,compressed,text,log,temp,other
--groups: only consider files that have group ownership of one of these
comma-separated groups.
--users: only consider files that have user ownership of one of these
comma-separated users.
--types: only consider files that are one of these comma-separated file
types, from this set of allowed values:
vcf,vcf.gz,bcf,sam,bam,cram,fasta,fastq,fastq.gz,ped/bed,
compressed,text,log,temp,other
--unused: only consider files that have been unused (based on access time)
for certain amounts of time, from this set of allowed values
(where M is months and Y is years):
1M,2M,6M,1Y,2Y,3Y,5Y,7Y
--unchanged: only consider files that have been unchanged (based on modify time)
for certain amounts of time, using the same values as unused.

If --unused and --unchanged are not specified then files of any age are
considered. These options are mutually exclusive.

To avoid producing too much output, the --size option (specify your own units,
eg. 50M for 50 megabytes) can be used to not display directories that have less
than that size of data nested inside. Defaults to 50M. Likewise, you can use
--age (in days) to only show directories where a file nested inside hasn't been
accessed for at least that long.
--access (number of days) to only show directories where a file nested inside
hasn't been accessed for at least that long.

You can change the sort --order from the default of by 'size', to by 'count',
'age' or 'dir'.

--size, --age and --sort are ignored, however, if you choose --json output,
--size, --access and --sort are ignored, however, if you choose --json output,
which will just give you all the filtered results. In the JSON output, the Size
is in bytes and instead of "age" you get "Atime".

Expand Down Expand Up @@ -178,9 +191,20 @@ with refreshes possible up to 5 days after expiry.
die("bad --size: %s", err)
}

minAtime := time.Now().Add(-(time.Duration(whereAge*hoursPerDay) * time.Hour))
if whereUnused != "" && whereUnchanged != "" {
die("--unused and --unchanged are mutually exclusive")
}

age := summary.DGUTAgeAll
if whereUnused != "" {
age = stringToAge("A" + whereUnused)
} else if whereUnchanged != "" {
age = stringToAge("M" + whereUnchanged)
}

err = where(c, whereQueryDir, whereGroups, whereSupergroup, whereUsers, whereTypes,
minAtime := time.Now().Add(-(time.Duration(whereAccess*hoursPerDay) * time.Hour))

err = where(c, whereQueryDir, whereGroups, whereSupergroup, whereUsers, whereTypes, age,
fmt.Sprintf("%d", whereSplits), whereOrder, minSizeBytes, minAtime, whereJSON)
if err != nil {
die(err.Error())
Expand Down Expand Up @@ -209,7 +233,7 @@ func init() { //nolint:funlen
"ped/bed,compressed,text,log,temp,other) to filter on")
whereCmd.Flags().StringVar(&whereSize, "size", defaultSize,
"minimum size (specify the unit) of files nested under a directory for it to be reported on")
whereCmd.Flags().IntVar(&whereAge, "age", 0,
whereCmd.Flags().IntVar(&whereAccess, "access", 0,
"do not report on directories that contain a file whose access time falls within the last x days")
whereCmd.Flags().StringVarP(&whereCert, "cert", "c", "",
"path to the server's certificate to force trust in it")
Expand All @@ -219,6 +243,10 @@ func init() { //nolint:funlen
"output USERS and GROUPS columns")
whereCmd.Flags().BoolVarP(&whereJSON, "json", "j", false,
"output JSON (ignores --minimum and --order)")
whereCmd.Flags().StringVar(&whereUnused, "unused", "",
"unused age value to filter on (amongst 1M,2M,6M,1Y,2Y,3Y,5Y,7Y)")
whereCmd.Flags().StringVar(&whereUnchanged, "unchanged", "",
"unchanged age value to filter on (amongst 1M,2M,6M,1Y,2Y,3Y,5Y,7Y)")
}

// getServerURL gets the wrstat server URL from the commandline arg or
Expand Down Expand Up @@ -266,18 +294,59 @@ func getSupergroups(c *gas.ClientCLI) (map[string][]string, error) {
return areas, nil
}

func stringToAge(ageStr string) summary.DirGUTAge { //nolint:funlen,gocyclo,cyclop
switch ageStr {
case "A1M":
return summary.DGUTAgeA1M
case "A2M":
return summary.DGUTAgeA2M
case "A6M":
return summary.DGUTAgeA6M
case "A1Y":
return summary.DGUTAgeA1Y
case "A2Y":
return summary.DGUTAgeA2Y
case "A3Y":
return summary.DGUTAgeA3Y
case "A5Y":
return summary.DGUTAgeA5Y
case "A7Y":
return summary.DGUTAgeA7Y
case "M1M":
return summary.DGUTAgeM1M
case "M2M":
return summary.DGUTAgeM2M
case "M6M":
return summary.DGUTAgeM6M
case "M1Y":
return summary.DGUTAgeM1Y
case "M2Y":
return summary.DGUTAgeM2Y
case "M3Y":
return summary.DGUTAgeM3Y
case "M5Y":
return summary.DGUTAgeM5Y
case "M7Y":
return summary.DGUTAgeM7Y
}

die("invalid age")

return summary.DGUTAgeAll
}

// where does the main job of querying the server to answer where the data is on
// disk.
func where(c *gas.ClientCLI, dir, groups, supergroup, users, types, splits, order string,
minSizeBytes uint64, minAtime time.Time, json bool,
func where(c *gas.ClientCLI, dir, groups, supergroup, users, types string, age summary.DirGUTAge,
splits, order string, minSizeBytes uint64, minAtime time.Time, json bool,
) error {
var err error

if groups, err = mergeGroupsWithAreaGroups(c, groups, supergroup); err != nil {
return err
}

body, dss, err := server.GetWhereDataIs(c, dir, groups, users, types, splits)
body, dss, err := server.GetWhereDataIs(c, dir, groups, users, types, age, splits)
if err != nil {
return err
}
Expand Down Expand Up @@ -428,5 +497,5 @@ func printSkipped(n int) {
return
}

warn(fmt.Sprintf("(%d results not displayed as smaller than --size or younger than --age)", n))
warn("(%d results not displayed as smaller than --size or younger than --access)", n)
}
6 changes: 5 additions & 1 deletion server/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,10 @@ package server

import (
"net/http"
"strconv"

gas "github.com/wtsi-hgi/go-authserver"
"github.com/wtsi-ssg/wrstat/v5/summary"
)

const ErrBadQuery = gas.Error("bad query; check dir, group, user and type")
Expand Down Expand Up @@ -68,7 +70,8 @@ func GetGroupAreas(c *gas.ClientCLI) (map[string][]string, error) {
// You must first Login() to get a JWT that you must supply here.
//
// The other parameters correspond to arguments that dguta.Tree.Where() takes.
func GetWhereDataIs(c *gas.ClientCLI, dir, groups, users, types, splits string) ([]byte, []*DirSummary, error) {
func GetWhereDataIs(c *gas.ClientCLI, dir, groups, users, types string, age summary.DirGUTAge,
splits string) ([]byte, []*DirSummary, error) {
r, err := c.AuthenticatedRequest()
if err != nil {
return nil, nil, err
Expand All @@ -81,6 +84,7 @@ func GetWhereDataIs(c *gas.ClientCLI, dir, groups, users, types, splits string)
"groups": groups,
"users": users,
"types": types,
"age": strconv.Itoa(int(age)),
"splits": splits,
}).
Get(EndPointAuthWhere)
Expand Down
24 changes: 15 additions & 9 deletions server/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -799,7 +799,7 @@ func testClientsOnRealServer(t *testing.T, username, uid string, gids []string,
c, err := gas.NewClientCLI(jwtBasename, serverTokenBasename, "localhost:1", cert, true)
So(err, ShouldBeNil)

_, _, err = GetWhereDataIs(c, "", "", "", "", "")
_, _, err = GetWhereDataIs(c, "", "", "", "", summary.DGUTAgeAll, "")
So(err, ShouldNotBeNil)

path, err := internaldb.CreateExampleDGUTADBCustomIDs(t, uid, gids[0], gids[1], int(refTime))
Expand All @@ -818,7 +818,7 @@ func testClientsOnRealServer(t *testing.T, username, uid string, gids []string,
err = s.LoadDGUTADBs(path)
So(err, ShouldBeNil)

_, _, err = GetWhereDataIs(c, "/", "", "", "", "")
_, _, err = GetWhereDataIs(c, "/", "", "", "", summary.DGUTAgeAll, "")
So(err, ShouldNotBeNil)
So(err, ShouldEqual, gas.ErrNoAuth)

Expand All @@ -838,27 +838,33 @@ func testClientsOnRealServer(t *testing.T, username, uid string, gids []string,
err = c.Login("user", "pass")
So(err, ShouldBeNil)

_, _, err = GetWhereDataIs(c, "", "", "", "", "")
_, _, err = GetWhereDataIs(c, "", "", "", "", summary.DGUTAgeAll, "")
So(err, ShouldNotBeNil)
So(err, ShouldEqual, ErrBadQuery)

json, dcss, errg := GetWhereDataIs(c, "/", "", "", "", "0")
json, dcss, errg := GetWhereDataIs(c, "/", "", "", "", summary.DGUTAgeAll, "0")
So(errg, ShouldBeNil)
So(string(json), ShouldNotBeBlank)
So(len(dcss), ShouldEqual, 1)
So(dcss[0].Count, ShouldEqual, 24)

json, dcss, errg = GetWhereDataIs(c, "/", g.Name, "", "", "0")
json, dcss, errg = GetWhereDataIs(c, "/", g.Name, "", "", summary.DGUTAgeAll, "0")
So(errg, ShouldBeNil)
So(string(json), ShouldNotBeBlank)
So(len(dcss), ShouldEqual, 1)
So(dcss[0].Count, ShouldEqual, 13)

json, dcss, errg = GetWhereDataIs(c, "/", "", "root", "", "0")
json, dcss, errg = GetWhereDataIs(c, "/", "", "root", "", summary.DGUTAgeAll, "0")
So(errg, ShouldBeNil)
So(string(json), ShouldNotBeBlank)
So(len(dcss), ShouldEqual, 1)
So(dcss[0].Count, ShouldEqual, 14)

json, dcss, errg = GetWhereDataIs(c, "/", "", "", "", summary.DGUTAgeA7Y, "0")
So(errg, ShouldBeNil)
So(string(json), ShouldNotBeBlank)
So(len(dcss), ShouldEqual, 1)
So(dcss[0].Count, ShouldEqual, 19)
})

Convey("Normal users have access restricted only by group", func() {
Expand All @@ -873,19 +879,19 @@ func testClientsOnRealServer(t *testing.T, username, uid string, gids []string,
err = c.Login("user", "pass")
So(err, ShouldBeNil)

json, dcss, errg := GetWhereDataIs(c, "/", "", "", "", "0")
json, dcss, errg := GetWhereDataIs(c, "/", "", "", "", summary.DGUTAgeAll, "0")
So(errg, ShouldBeNil)
So(string(json), ShouldNotBeBlank)
So(len(dcss), ShouldEqual, 1)
So(dcss[0].Count, ShouldEqual, 23)

json, dcss, errg = GetWhereDataIs(c, "/", g.Name, "", "", "0")
json, dcss, errg = GetWhereDataIs(c, "/", g.Name, "", "", summary.DGUTAgeAll, "0")
So(errg, ShouldBeNil)
So(string(json), ShouldNotBeBlank)
So(len(dcss), ShouldEqual, 1)
So(dcss[0].Count, ShouldEqual, 13)

_, _, errg = GetWhereDataIs(c, "/", "", "root", "", "0")
_, _, errg = GetWhereDataIs(c, "/", "", "root", "", summary.DGUTAgeAll, "0")
So(errg, ShouldBeNil)
So(string(json), ShouldNotBeBlank)
So(len(dcss), ShouldEqual, 1)
Expand Down
Loading