diff --git a/unikmer/cmd/concat.go b/unikmer/cmd/concat.go index a1e4226..32dbbaf 100644 --- a/unikmer/cmd/concat.go +++ b/unikmer/cmd/concat.go @@ -44,14 +44,22 @@ Attentions: Run: func(cmd *cobra.Command, args []string) { opt := getOptions(cmd) runtime.GOMAXPROCS(opt.NumCPUs) - files := getFileList(args) + + var err error + + var files []string + infileList := getFlagString(cmd, "infile-list") + if infileList != "" { + files, err = getListFromFile(infileList) + checkError(err) + } else { + files = getFileList(args) + } checkFiles(files) outFile := getFlagString(cmd, "out-prefix") - var err error - if !isStdout(outFile) { outFile += extDataFile } diff --git a/unikmer/cmd/count.go b/unikmer/cmd/count.go index a4bed08..0987986 100644 --- a/unikmer/cmd/count.go +++ b/unikmer/cmd/count.go @@ -42,7 +42,17 @@ var countCmd = &cobra.Command{ opt := getOptions(cmd) runtime.GOMAXPROCS(opt.NumCPUs) seq.ValidateSeq = false - files := getFileList(args) + + var err error + + var files []string + infileList := getFlagString(cmd, "infile-list") + if infileList != "" { + files, err = getListFromFile(infileList) + checkError(err) + } else { + files = getFileList(args) + } outFile := getFlagString(cmd, "out-prefix") circular := getFlagBool(cmd, "circular") diff --git a/unikmer/cmd/diff.go b/unikmer/cmd/diff.go index ade0df3..811375b 100644 --- a/unikmer/cmd/diff.go +++ b/unikmer/cmd/diff.go @@ -48,7 +48,17 @@ Tips: `, Run: func(cmd *cobra.Command, args []string) { opt := getOptions(cmd) - files := getFileList(args) + + var err error + + var files []string + infileList := getFlagString(cmd, "infile-list") + if infileList != "" { + files, err = getListFromFile(infileList) + checkError(err) + } else { + files = getFileList(args) + } checkFiles(files) @@ -57,8 +67,6 @@ Tips: runtime.GOMAXPROCS(threads) - var err error - m := make(map[uint64]bool, mapInitSize) var infh *bufio.Reader @@ -166,7 +174,7 @@ Tips: done := make(chan int) - toStop := make(chan int, 1) + toStop := make(chan int, threads+2) doneDone := make(chan int) go func() { <-toStop @@ -236,18 +244,18 @@ Tips: var ok, mark bool m1 := maps[i] for { - select { - case <-done: - return - default: - } - ifile, ok = <-chFile if !ok { return } file = ifile.file + select { + case <-done: + return + default: + } + if opt.Verbose { log.Infof("(worker %d) process file (%d/%d): %s", i, ifile.i+1, nfiles, file) } @@ -304,14 +312,21 @@ Tips: // send file go func() { + SENDFILE: for i, file := range files[1:] { if file == files[0] { continue } + select { + case <-done: + break SENDFILE + default: + } chFile <- iFile{i + 1, file} } close(chFile) + doneSendFile <- 1 }() @@ -351,7 +366,7 @@ Tips: if len(m0) == 0 { if opt.Verbose { - log.Infof("no set difference found") + log.Warningf("no set difference found") } return } diff --git a/unikmer/cmd/dump.go b/unikmer/cmd/dump.go index c7707d6..42acb66 100644 --- a/unikmer/cmd/dump.go +++ b/unikmer/cmd/dump.go @@ -39,7 +39,17 @@ var dumpCmd = &cobra.Command{ Run: func(cmd *cobra.Command, args []string) { opt := getOptions(cmd) runtime.GOMAXPROCS(opt.NumCPUs) - files := getFileList(args) + + var err error + + var files []string + infileList := getFlagString(cmd, "infile-list") + if infileList != "" { + files, err = getListFromFile(infileList) + checkError(err) + } else { + files = getFileList(args) + } outFile := getFlagString(cmd, "out-prefix") noDedup := getFlagBool(cmd, "no-dedup") diff --git a/unikmer/cmd/grep.go b/unikmer/cmd/grep.go index c981921..599e07a 100644 --- a/unikmer/cmd/grep.go +++ b/unikmer/cmd/grep.go @@ -44,7 +44,17 @@ var grepCmd = &cobra.Command{ Run: func(cmd *cobra.Command, args []string) { opt := getOptions(cmd) runtime.GOMAXPROCS(opt.NumCPUs) - files := getFileList(args) + + var err error + + var files []string + infileList := getFlagString(cmd, "infile-list") + if infileList != "" { + files, err = getListFromFile(infileList) + checkError(err) + } else { + files = getFileList(args) + } if len(files) > 1 { checkError(fmt.Errorf("no more than one file should be given")) @@ -63,8 +73,6 @@ var grepCmd = &cobra.Command{ checkError(fmt.Errorf("one of flags -q (--query) and -f (--query-file) needed")) } - var err error - if patternFile != "" { var ok bool ok, err = pathutil.Exists(patternFile) diff --git a/unikmer/cmd/inter.go b/unikmer/cmd/inter.go index c3f2ae1..ff633e2 100644 --- a/unikmer/cmd/inter.go +++ b/unikmer/cmd/inter.go @@ -41,14 +41,22 @@ var interCmd = &cobra.Command{ Run: func(cmd *cobra.Command, args []string) { opt := getOptions(cmd) runtime.GOMAXPROCS(opt.NumCPUs) - files := getFileList(args) + + var err error + + var files []string + infileList := getFlagString(cmd, "infile-list") + if infileList != "" { + files, err = getListFromFile(infileList) + checkError(err) + } else { + files = getFileList(args) + } checkFiles(files) outFile := getFlagString(cmd, "out-prefix") - var err error - m := make(map[uint64]bool, mapInitSize) var infh *bufio.Reader diff --git a/unikmer/cmd/root.go b/unikmer/cmd/root.go index e8ab63a..c29fa89 100644 --- a/unikmer/cmd/root.go +++ b/unikmer/cmd/root.go @@ -71,4 +71,5 @@ func init() { RootCmd.PersistentFlags().BoolP("verbose", "", false, "print verbose information") RootCmd.PersistentFlags().BoolP("no-compress", "C", false, "do not compress binary file (not recommended)") RootCmd.PersistentFlags().BoolP("compact", "c", false, "write more compact binary file with little loss of speed") + RootCmd.PersistentFlags().StringP("infile-list", "i", "", "file of input files list (one file per line), if given, files from cli arguments are ignored") } diff --git a/unikmer/cmd/sample.go b/unikmer/cmd/sample.go index a08e323..1bf1110 100644 --- a/unikmer/cmd/sample.go +++ b/unikmer/cmd/sample.go @@ -46,7 +46,17 @@ Attentions: Run: func(cmd *cobra.Command, args []string) { opt := getOptions(cmd) runtime.GOMAXPROCS(opt.NumCPUs) - files := getFileList(args) + + var err error + + var files []string + infileList := getFlagString(cmd, "infile-list") + if infileList != "" { + files, err = getListFromFile(infileList) + checkError(err) + } else { + files = getFileList(args) + } checkFiles(files) @@ -54,8 +64,6 @@ Attentions: window := getFlagPositiveInt(cmd, "window") outFile := getFlagString(cmd, "out-prefix") - var err error - if !isStdout(outFile) { outFile += extDataFile } diff --git a/unikmer/cmd/stats.go b/unikmer/cmd/stats.go index 92d6565..ed41836 100644 --- a/unikmer/cmd/stats.go +++ b/unikmer/cmd/stats.go @@ -51,7 +51,17 @@ Tips: Run: func(cmd *cobra.Command, args []string) { opt := getOptions(cmd) runtime.GOMAXPROCS(opt.NumCPUs) - files := getFileList(args) + + var err error + + var files []string + infileList := getFlagString(cmd, "infile-list") + if infileList != "" { + files, err = getListFromFile(infileList) + checkError(err) + } else { + files = getFileList(args) + } checkFiles(files) diff --git a/unikmer/cmd/subset.go b/unikmer/cmd/subset.go index 44b2e97..24775a3 100644 --- a/unikmer/cmd/subset.go +++ b/unikmer/cmd/subset.go @@ -46,7 +46,17 @@ Attention: Run: func(cmd *cobra.Command, args []string) { opt := getOptions(cmd) runtime.GOMAXPROCS(opt.NumCPUs) - files := getFileList(args) + + var err error + + var files []string + infileList := getFlagString(cmd, "infile-list") + if infileList != "" { + files, err = getListFromFile(infileList) + checkError(err) + } else { + files = getFileList(args) + } if len(files) > 1 { checkError(fmt.Errorf("no more than one file should be given")) @@ -62,7 +72,6 @@ Attention: file := files[0] - var err error var infh *bufio.Reader var r *os.File diff --git a/unikmer/cmd/union.go b/unikmer/cmd/union.go index 901c764..37ddbe6 100644 --- a/unikmer/cmd/union.go +++ b/unikmer/cmd/union.go @@ -45,14 +45,22 @@ Attentions: Run: func(cmd *cobra.Command, args []string) { opt := getOptions(cmd) runtime.GOMAXPROCS(opt.NumCPUs) - files := getFileList(args) + + var err error + + var files []string + infileList := getFlagString(cmd, "infile-list") + if infileList != "" { + files, err = getListFromFile(infileList) + checkError(err) + } else { + files = getFileList(args) + } checkFiles(files) outFile := getFlagString(cmd, "out-prefix") - var err error - m := make(map[uint64]struct{}, mapInitSize) if !isStdout(outFile) { diff --git a/unikmer/cmd/util-cli.go b/unikmer/cmd/util-cli.go index dc73fdb..cb6b172 100644 --- a/unikmer/cmd/util-cli.go +++ b/unikmer/cmd/util-cli.go @@ -21,6 +21,7 @@ package cmd import ( + "bufio" "fmt" "os" "strconv" @@ -175,3 +176,21 @@ func getFlagStringSlice(cmd *cobra.Command, flag string) []string { checkError(err) return value } + +func getListFromFile(file string) ([]string, error) { + fh, err := os.Open(file) + if err != nil { + return nil, fmt.Errorf("fail to read %s: %s", file, err) + } + + lists := make([]string, 0, 1000) + scanner := bufio.NewScanner(fh) + for scanner.Scan() { + lists = append(lists, scanner.Text()) + } + if err = scanner.Err(); err != nil { + return nil, fmt.Errorf("fail to read %s: %s", file, err) + } + + return lists, nil +} diff --git a/unikmer/cmd/util-io.go b/unikmer/cmd/util-io.go index 5f9abed..1b92784 100644 --- a/unikmer/cmd/util-io.go +++ b/unikmer/cmd/util-io.go @@ -26,16 +26,25 @@ import ( "fmt" "io" "os" + "path/filepath" gzip "github.com/klauspost/pgzip" ) func outStream(file string, gzipped bool) (*bufio.Writer, io.WriteCloser, *os.File, error) { - var err error var w *os.File if file == "-" { w = os.Stdout } else { + dir := filepath.Dir(file) + fi, err := os.Stat(dir) + if err == nil && !fi.IsDir() { + return nil, nil, nil, fmt.Errorf("can not write file into a non-directory path: %s", dir) + } + if os.IsNotExist(err) { + os.MkdirAll(dir, 0755) + } + w, err = os.Create(file) if err != nil { return nil, nil, nil, fmt.Errorf("fail to write %s: %s", file, err) diff --git a/unikmer/cmd/view.go b/unikmer/cmd/view.go index b942439..f581b9e 100644 --- a/unikmer/cmd/view.go +++ b/unikmer/cmd/view.go @@ -41,7 +41,17 @@ var viewCmd = &cobra.Command{ Run: func(cmd *cobra.Command, args []string) { opt := getOptions(cmd) runtime.GOMAXPROCS(opt.NumCPUs) - files := getFileList(args) + + var err error + + var files []string + infileList := getFlagString(cmd, "infile-list") + if infileList != "" { + files, err = getListFromFile(infileList) + checkError(err) + } else { + files = getFileList(args) + } checkFiles(files)