Skip to content

Commit

Permalink
cli: add global option -i for reading files listed in file. unikmer d…
Browse files Browse the repository at this point in the history
…iff: fix concurreny bug when no diff found
  • Loading branch information
shenwei356 committed Sep 24, 2018
1 parent ed584f4 commit 975c405
Show file tree
Hide file tree
Showing 14 changed files with 166 additions and 33 deletions.
14 changes: 11 additions & 3 deletions unikmer/cmd/concat.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,22 @@ Attentions:
Run: func(cmd *cobra.Command, args []string) {
opt := getOptions(cmd)
runtime.GOMAXPROCS(opt.NumCPUs)
files := getFileList(args)

var err error

var files []string
infileList := getFlagString(cmd, "infile-list")
if infileList != "" {
files, err = getListFromFile(infileList)
checkError(err)
} else {
files = getFileList(args)
}

checkFiles(files)

outFile := getFlagString(cmd, "out-prefix")

var err error

if !isStdout(outFile) {
outFile += extDataFile
}
Expand Down
12 changes: 11 additions & 1 deletion unikmer/cmd/count.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,17 @@ var countCmd = &cobra.Command{
opt := getOptions(cmd)
runtime.GOMAXPROCS(opt.NumCPUs)
seq.ValidateSeq = false
files := getFileList(args)

var err error

var files []string
infileList := getFlagString(cmd, "infile-list")
if infileList != "" {
files, err = getListFromFile(infileList)
checkError(err)
} else {
files = getFileList(args)
}

outFile := getFlagString(cmd, "out-prefix")
circular := getFlagBool(cmd, "circular")
Expand Down
37 changes: 26 additions & 11 deletions unikmer/cmd/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,17 @@ Tips:
`,
Run: func(cmd *cobra.Command, args []string) {
opt := getOptions(cmd)
files := getFileList(args)

var err error

var files []string
infileList := getFlagString(cmd, "infile-list")
if infileList != "" {
files, err = getListFromFile(infileList)
checkError(err)
} else {
files = getFileList(args)
}

checkFiles(files)

Expand All @@ -57,8 +67,6 @@ Tips:

runtime.GOMAXPROCS(threads)

var err error

m := make(map[uint64]bool, mapInitSize)

var infh *bufio.Reader
Expand Down Expand Up @@ -166,7 +174,7 @@ Tips:

done := make(chan int)

toStop := make(chan int, 1)
toStop := make(chan int, threads+2)
doneDone := make(chan int)
go func() {
<-toStop
Expand Down Expand Up @@ -236,18 +244,18 @@ Tips:
var ok, mark bool
m1 := maps[i]
for {
select {
case <-done:
return
default:
}

ifile, ok = <-chFile
if !ok {
return
}
file = ifile.file

select {
case <-done:
return
default:
}

if opt.Verbose {
log.Infof("(worker %d) process file (%d/%d): %s", i, ifile.i+1, nfiles, file)
}
Expand Down Expand Up @@ -304,14 +312,21 @@ Tips:

// send file
go func() {
SENDFILE:
for i, file := range files[1:] {
if file == files[0] {
continue
}
select {
case <-done:
break SENDFILE
default:
}

chFile <- iFile{i + 1, file}
}
close(chFile)

doneSendFile <- 1
}()

Expand Down Expand Up @@ -351,7 +366,7 @@ Tips:

if len(m0) == 0 {
if opt.Verbose {
log.Infof("no set difference found")
log.Warningf("no set difference found")
}
return
}
Expand Down
12 changes: 11 additions & 1 deletion unikmer/cmd/dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,17 @@ var dumpCmd = &cobra.Command{
Run: func(cmd *cobra.Command, args []string) {
opt := getOptions(cmd)
runtime.GOMAXPROCS(opt.NumCPUs)
files := getFileList(args)

var err error

var files []string
infileList := getFlagString(cmd, "infile-list")
if infileList != "" {
files, err = getListFromFile(infileList)
checkError(err)
} else {
files = getFileList(args)
}

outFile := getFlagString(cmd, "out-prefix")
noDedup := getFlagBool(cmd, "no-dedup")
Expand Down
14 changes: 11 additions & 3 deletions unikmer/cmd/grep.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,17 @@ var grepCmd = &cobra.Command{
Run: func(cmd *cobra.Command, args []string) {
opt := getOptions(cmd)
runtime.GOMAXPROCS(opt.NumCPUs)
files := getFileList(args)

var err error

var files []string
infileList := getFlagString(cmd, "infile-list")
if infileList != "" {
files, err = getListFromFile(infileList)
checkError(err)
} else {
files = getFileList(args)
}

if len(files) > 1 {
checkError(fmt.Errorf("no more than one file should be given"))
Expand All @@ -63,8 +73,6 @@ var grepCmd = &cobra.Command{
checkError(fmt.Errorf("one of flags -q (--query) and -f (--query-file) needed"))
}

var err error

if patternFile != "" {
var ok bool
ok, err = pathutil.Exists(patternFile)
Expand Down
14 changes: 11 additions & 3 deletions unikmer/cmd/inter.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,22 @@ var interCmd = &cobra.Command{
Run: func(cmd *cobra.Command, args []string) {
opt := getOptions(cmd)
runtime.GOMAXPROCS(opt.NumCPUs)
files := getFileList(args)

var err error

var files []string
infileList := getFlagString(cmd, "infile-list")
if infileList != "" {
files, err = getListFromFile(infileList)
checkError(err)
} else {
files = getFileList(args)
}

checkFiles(files)

outFile := getFlagString(cmd, "out-prefix")

var err error

m := make(map[uint64]bool, mapInitSize)

var infh *bufio.Reader
Expand Down
1 change: 1 addition & 0 deletions unikmer/cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,5 @@ func init() {
RootCmd.PersistentFlags().BoolP("verbose", "", false, "print verbose information")
RootCmd.PersistentFlags().BoolP("no-compress", "C", false, "do not compress binary file (not recommended)")
RootCmd.PersistentFlags().BoolP("compact", "c", false, "write more compact binary file with little loss of speed")
RootCmd.PersistentFlags().StringP("infile-list", "i", "", "file of input files list (one file per line), if given, files from cli arguments are ignored")
}
14 changes: 11 additions & 3 deletions unikmer/cmd/sample.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,24 @@ Attentions:
Run: func(cmd *cobra.Command, args []string) {
opt := getOptions(cmd)
runtime.GOMAXPROCS(opt.NumCPUs)
files := getFileList(args)

var err error

var files []string
infileList := getFlagString(cmd, "infile-list")
if infileList != "" {
files, err = getListFromFile(infileList)
checkError(err)
} else {
files = getFileList(args)
}

checkFiles(files)

start := getFlagPositiveInt(cmd, "start")
window := getFlagPositiveInt(cmd, "window")
outFile := getFlagString(cmd, "out-prefix")

var err error

if !isStdout(outFile) {
outFile += extDataFile
}
Expand Down
12 changes: 11 additions & 1 deletion unikmer/cmd/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,17 @@ Tips:
Run: func(cmd *cobra.Command, args []string) {
opt := getOptions(cmd)
runtime.GOMAXPROCS(opt.NumCPUs)
files := getFileList(args)

var err error

var files []string
infileList := getFlagString(cmd, "infile-list")
if infileList != "" {
files, err = getListFromFile(infileList)
checkError(err)
} else {
files = getFileList(args)
}

checkFiles(files)

Expand Down
13 changes: 11 additions & 2 deletions unikmer/cmd/subset.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,17 @@ Attention:
Run: func(cmd *cobra.Command, args []string) {
opt := getOptions(cmd)
runtime.GOMAXPROCS(opt.NumCPUs)
files := getFileList(args)

var err error

var files []string
infileList := getFlagString(cmd, "infile-list")
if infileList != "" {
files, err = getListFromFile(infileList)
checkError(err)
} else {
files = getFileList(args)
}

if len(files) > 1 {
checkError(fmt.Errorf("no more than one file should be given"))
Expand All @@ -62,7 +72,6 @@ Attention:

file := files[0]

var err error
var infh *bufio.Reader
var r *os.File

Expand Down
14 changes: 11 additions & 3 deletions unikmer/cmd/union.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,22 @@ Attentions:
Run: func(cmd *cobra.Command, args []string) {
opt := getOptions(cmd)
runtime.GOMAXPROCS(opt.NumCPUs)
files := getFileList(args)

var err error

var files []string
infileList := getFlagString(cmd, "infile-list")
if infileList != "" {
files, err = getListFromFile(infileList)
checkError(err)
} else {
files = getFileList(args)
}

checkFiles(files)

outFile := getFlagString(cmd, "out-prefix")

var err error

m := make(map[uint64]struct{}, mapInitSize)

if !isStdout(outFile) {
Expand Down
19 changes: 19 additions & 0 deletions unikmer/cmd/util-cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
package cmd

import (
"bufio"
"fmt"
"os"
"strconv"
Expand Down Expand Up @@ -175,3 +176,21 @@ func getFlagStringSlice(cmd *cobra.Command, flag string) []string {
checkError(err)
return value
}

func getListFromFile(file string) ([]string, error) {
fh, err := os.Open(file)
if err != nil {
return nil, fmt.Errorf("fail to read %s: %s", file, err)
}

lists := make([]string, 0, 1000)
scanner := bufio.NewScanner(fh)
for scanner.Scan() {
lists = append(lists, scanner.Text())
}
if err = scanner.Err(); err != nil {
return nil, fmt.Errorf("fail to read %s: %s", file, err)
}

return lists, nil
}
11 changes: 10 additions & 1 deletion unikmer/cmd/util-io.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,25 @@ import (
"fmt"
"io"
"os"
"path/filepath"

gzip "github.com/klauspost/pgzip"
)

func outStream(file string, gzipped bool) (*bufio.Writer, io.WriteCloser, *os.File, error) {
var err error
var w *os.File
if file == "-" {
w = os.Stdout
} else {
dir := filepath.Dir(file)
fi, err := os.Stat(dir)
if err == nil && !fi.IsDir() {
return nil, nil, nil, fmt.Errorf("can not write file into a non-directory path: %s", dir)
}
if os.IsNotExist(err) {
os.MkdirAll(dir, 0755)
}

w, err = os.Create(file)
if err != nil {
return nil, nil, nil, fmt.Errorf("fail to write %s: %s", file, err)
Expand Down
12 changes: 11 additions & 1 deletion unikmer/cmd/view.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,17 @@ var viewCmd = &cobra.Command{
Run: func(cmd *cobra.Command, args []string) {
opt := getOptions(cmd)
runtime.GOMAXPROCS(opt.NumCPUs)
files := getFileList(args)

var err error

var files []string
infileList := getFlagString(cmd, "infile-list")
if infileList != "" {
files, err = getListFromFile(infileList)
checkError(err)
} else {
files = getFileList(args)
}

checkFiles(files)

Expand Down

0 comments on commit 975c405

Please sign in to comment.