diff --git a/src/main/java/htsjdk/samtools/util/IntervalList.java b/src/main/java/htsjdk/samtools/util/IntervalList.java index e1f8735260..24fb8c4230 100644 --- a/src/main/java/htsjdk/samtools/util/IntervalList.java +++ b/src/main/java/htsjdk/samtools/util/IntervalList.java @@ -864,6 +864,7 @@ public static class IntervalMergerIterator implements Iterator { MutableFeature current = null; boolean currentStrandNegative = false; + String currentFirstName = null; public IntervalMergerIterator(Iterator intervals, final boolean combineAbuttingIntervals, final boolean enforceSameStrand, final boolean concatenateNames) { this.inputIntervals = intervals; @@ -891,27 +892,37 @@ private Interval getNext() { while (inputIntervals.hasNext()) { next = inputIntervals.next(); if (current == null) { - toBeMerged.add(next); + if (concatenateNames) { + toBeMerged.add(next); + } current = new MutableFeature(next); currentStrandNegative = next.isNegativeStrand(); + currentFirstName = next.getName(); } else if (current.overlaps(next) || (combineAbuttingIntervals && current.withinDistanceOf(next,1))) { if (enforceSameStrands && currentStrandNegative != next.isNegativeStrand()) { throw new SAMException("Strands were not equal for: " + current.toString() + " and " + next.toString()); } - toBeMerged.add(next); + if (concatenateNames) { + toBeMerged.add(next); + } current.end = Math.max(current.getEnd(), next.getEnd()); } else { // Emit merged/unique interval - final Interval retVal = merge(toBeMerged, concatenateNames); + final Interval retVal = concatenateNames ? merge(toBeMerged, concatenateNames) : + new Interval(current.getContig(), current.getStart(), current.getEnd(), currentStrandNegative, currentFirstName); toBeMerged.clear(); current.setAll(next); currentStrandNegative = next.isNegativeStrand(); - toBeMerged.add(next); + currentFirstName = next.getName(); + if (concatenateNames) { + toBeMerged.add(next); + } return retVal; } } // Emit merged/unique interval - final Interval retVal = merge(toBeMerged, concatenateNames); + final Interval retVal = concatenateNames ? merge(toBeMerged, concatenateNames) : + new Interval(current.getContig(), current.getStart(), current.getEnd(), currentStrandNegative, currentFirstName); toBeMerged.clear(); current = null; return retVal; diff --git a/src/test/java/htsjdk/samtools/util/IntervalListTest.java b/src/test/java/htsjdk/samtools/util/IntervalListTest.java index 7010773599..924c6c6feb 100644 --- a/src/test/java/htsjdk/samtools/util/IntervalListTest.java +++ b/src/test/java/htsjdk/samtools/util/IntervalListTest.java @@ -702,4 +702,65 @@ public static Object[][] brokenFiles() { public void testBreaks(final Path brokenIntervalFile){ IntervalList.fromPath(brokenIntervalFile); } + + @Test + public void testLargeIteratorMerge() { + final IntervalList intervals = new IntervalList(this.fileHeader); + intervals.add(new Interval("1", 1, 2, false, "foo")); + for (int i = 2; i < 100000; i++) { + intervals.add(new Interval("1", i, i + 1, false, "bar")); + } + final Interval merged = new IntervalList.IntervalMergerIterator(intervals.iterator(), true, false, false).next(); + Assert.assertEquals(merged, new Interval("1", 1, 100000)); + Assert.assertEquals(merged.getName(), "foo"); + } + + @DataProvider + public static Object[][] lessMemForMergeWithNoNames() { + String contig = "1"; + Interval interval1 = new Interval(contig, 1, 100, false, "foo"); + Interval interval2 = new Interval(contig, 101, 200, false, "bar"); + Interval interval3 = new Interval(contig, 301, 400, false, "baz"); + Interval overlapInterval = new Interval(contig, 350, 450, false, "overlap"); + Interval interval4 = new Interval(contig, 401, 500, false, "qux"); + Interval combined1NoConcat = new Interval(contig, 1, 200, false, "foo"); + Interval combined2NoConcat = new Interval(contig, 301, 500, false, "baz"); + Interval combined1WithConcat = new Interval(contig, 1, 200, false, "foo|bar"); + Interval combined2WithConcat = new Interval(contig, 301, 500, false, "baz|qux"); + Interval combined2WithConcatAndOverlap = new Interval(contig, 301, 500, false, "baz|overlap|qux"); + return new Object[][]{ + {Collections.emptyList(), Collections.emptyList(), Collections.emptyList()}, + {Arrays.asList(interval1), Arrays.asList(interval1), Arrays.asList(interval1)}, + {Arrays.asList(interval1, interval2), Arrays.asList(combined1NoConcat), Arrays.asList(combined1WithConcat)}, + {Arrays.asList(interval1, interval2, interval3), Arrays.asList(combined1NoConcat, interval3), Arrays.asList(combined1WithConcat, interval3)}, + {Arrays.asList(interval1, interval2, interval3, interval4), Arrays.asList(combined1NoConcat, combined2NoConcat), Arrays.asList(combined1WithConcat, combined2WithConcat)}, + {Arrays.asList(interval1, interval2, interval3, overlapInterval, interval4), Arrays.asList(combined1NoConcat, combined2NoConcat), Arrays.asList(combined1WithConcat, combined2WithConcatAndOverlap)} + }; + } + + @Test(dataProvider = "lessMemForMergeWithNoNames") + public void testLessMemForMergeWithNoNames(final List intervals, final List expectedNoConcat, final List expectedWithConcat) { + final IntervalList intervalList = new IntervalList(this.fileHeader); + intervalList.addall(intervals); + + final IntervalList.IntervalMergerIterator firstNameMergerIterator = new IntervalList.IntervalMergerIterator(intervals.iterator(), true, false, false); + Collection firstNameMerged = CollectionUtil.makeCollection(firstNameMergerIterator); + Assert.assertEquals(firstNameMerged, expectedNoConcat); + List firstNameMergedList = new ArrayList<>(firstNameMerged); + for(int i=0; i concatNameMerged = CollectionUtil.makeCollection(concatNameMergerIterator); + Assert.assertEquals(concatNameMerged, expectedWithConcat); + List allNamesMergedList = new ArrayList<>(concatNameMerged); + for(int i=0; i