From 66c53e3d6bd9bb21b885b4fc09eee14e919643bb Mon Sep 17 00:00:00 2001 From: Frank Austin Nothaft Date: Fri, 15 Dec 2017 20:35:29 -0800 Subject: [PATCH] [ADAM-1834] Add proper extensions for SAM/BAM/CRAM output formats. Resolves #1834. --- .../org/bdgenomics/adam/rdd/read/ADAMBAMOutputFormat.scala | 4 ++-- .../org/bdgenomics/adam/rdd/read/ADAMCRAMOutputFormat.scala | 4 ++-- .../org/bdgenomics/adam/rdd/read/ADAMSAMOutputFormat.scala | 4 ++-- .../bdgenomics/adam/rdd/read/AlignmentRecordRDDSuite.scala | 4 ++-- .../org/bdgenomics/adam/util/ParallelFileMergerSuite.scala | 6 +++--- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/ADAMBAMOutputFormat.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/ADAMBAMOutputFormat.scala index b0380b24b3..953f506bd8 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/ADAMBAMOutputFormat.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/ADAMBAMOutputFormat.scala @@ -47,7 +47,7 @@ class ADAMBAMOutputFormat[K] readSAMHeaderFrom(path, conf) // now that we have the header set, we need to make a record reader - return new KeyIgnoringBAMRecordWriter[K](getDefaultWorkFile(context, ""), + return new KeyIgnoringBAMRecordWriter[K](getDefaultWorkFile(context, ".bam"), header, true, context) @@ -84,7 +84,7 @@ class ADAMBAMOutputFormatHeaderLess[K] readSAMHeaderFrom(path, conf) // now that we have the header set, we need to make a record reader - return new KeyIgnoringBAMRecordWriter[K](getDefaultWorkFile(context, ""), + return new KeyIgnoringBAMRecordWriter[K](getDefaultWorkFile(context, ".bam"), header, false, context) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/ADAMCRAMOutputFormat.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/ADAMCRAMOutputFormat.scala index 7ddda38588..0fe6514478 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/ADAMCRAMOutputFormat.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/ADAMCRAMOutputFormat.scala @@ -47,7 +47,7 @@ class ADAMCRAMOutputFormat[K] readSAMHeaderFrom(path, conf) // now that we have the header set, we need to make a record reader - return new KeyIgnoringCRAMRecordWriter[K](getDefaultWorkFile(context, ""), + return new KeyIgnoringCRAMRecordWriter[K](getDefaultWorkFile(context, ".cram"), header, true, context) @@ -84,7 +84,7 @@ class ADAMCRAMOutputFormatHeaderLess[K] readSAMHeaderFrom(path, conf) // now that we have the header set, we need to make a record reader - return new KeyIgnoringCRAMRecordWriter[K](getDefaultWorkFile(context, ""), + return new KeyIgnoringCRAMRecordWriter[K](getDefaultWorkFile(context, ".cram"), header, false, context) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/ADAMSAMOutputFormat.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/ADAMSAMOutputFormat.scala index b035f739f2..d0b3013e49 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/ADAMSAMOutputFormat.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/ADAMSAMOutputFormat.scala @@ -44,7 +44,7 @@ class ADAMSAMOutputFormat[K] readSAMHeaderFrom(path, conf) // now that we have the header set, we need to make a record reader - return new KeyIgnoringSAMRecordWriter(getDefaultWorkFile(context, ""), + return new KeyIgnoringSAMRecordWriter(getDefaultWorkFile(context, ".sam"), header, true, context) @@ -71,7 +71,7 @@ class ADAMSAMOutputFormatHeaderLess[K] readSAMHeaderFrom(path, conf) // now that we have the header set, we need to make a record reader - return new KeyIgnoringSAMRecordWriter(getDefaultWorkFile(context, ""), + return new KeyIgnoringSAMRecordWriter(getDefaultWorkFile(context, ".sam"), header, false, context) diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDSuite.scala index e647d98982..99ef9cf6df 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDSuite.scala @@ -296,7 +296,7 @@ class AlignmentRecordRDDSuite extends ADAMFunSuite { ardd.saveAsSam(tempFile.toAbsolutePath.toString + "/reads12.sam", asType = Some(SAMFormat.SAM)) - val rdd12B = sc.loadBam(tempFile.toAbsolutePath.toString + "/reads12.sam/part-r-00000") + val rdd12B = sc.loadBam(tempFile.toAbsolutePath.toString + "/reads12.sam/part-r-00000.sam") assert(rdd12B.rdd.count() === rdd12A.rdd.count()) @@ -358,7 +358,7 @@ class AlignmentRecordRDDSuite extends ADAMFunSuite { asSingleFile = false, isSorted = true) - val rddB = sc.loadBam(tempFile + "/part-r-00000") + val rddB = sc.loadBam(tempFile + "/part-r-00000.cram") assert(rddB.rdd.count() === rddA.rdd.count()) diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/util/ParallelFileMergerSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/util/ParallelFileMergerSuite.scala index ec60c051fd..592baddfb0 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/util/ParallelFileMergerSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/util/ParallelFileMergerSuite.scala @@ -155,7 +155,7 @@ class ParallelFileMergerSuite extends ADAMFunSuite { val fs = FileSystem.get(sc.hadoopConfiguration) val filesToMerge = (Seq(outPath + "_head") ++ (0 until 4).map(i => { - outPath + "_tail/part-r-0000%d".format(i) + outPath + "_tail/part-r-0000%d.sam".format(i) })).map(new Path(_)) .map(p => (p.toString, 0L, fs.getFileStatus(p).getLen().toLong - 1L)) @@ -179,7 +179,7 @@ class ParallelFileMergerSuite extends ADAMFunSuite { val fs = FileSystem.get(sc.hadoopConfiguration) val filesToMerge = (Seq(outPath + "_head") ++ (0 until 4).map(i => { - outPath + "_tail/part-r-0000%d".format(i) + outPath + "_tail/part-r-0000%d.bam".format(i) })).map(new Path(_)) .map(p => (p.toString, 0L, fs.getFileStatus(p).getLen().toLong - 1L)) @@ -206,7 +206,7 @@ class ParallelFileMergerSuite extends ADAMFunSuite { val fs = FileSystem.get(sc.hadoopConfiguration) val filesToMerge = (Seq(outPath + "_head") ++ (0 until 4).map(i => { - outPath + "_tail/part-r-0000%d".format(i) + outPath + "_tail/part-r-0000%d.cram".format(i) })).map(new Path(_)) .map(p => (p.toString, 0L, fs.getFileStatus(p).getLen().toLong - 1L))