diff --git a/src/main/java/htsjdk/samtools/seekablestream/SeekableStreamFactory.java b/src/main/java/htsjdk/samtools/seekablestream/SeekableStreamFactory.java index 7148323be4..5ddd95acd1 100644 --- a/src/main/java/htsjdk/samtools/seekablestream/SeekableStreamFactory.java +++ b/src/main/java/htsjdk/samtools/seekablestream/SeekableStreamFactory.java @@ -23,12 +23,17 @@ */ package htsjdk.samtools.seekablestream; +import htsjdk.io.HtsPath; +import htsjdk.io.IOPath; import htsjdk.samtools.util.IOUtil; +import htsjdk.tribble.TribbleException; + import java.io.File; import java.io.IOException; import java.net.URI; import java.net.URL; import java.nio.channels.SeekableByteChannel; +import java.nio.file.Path; import java.util.function.Function; /** @@ -61,9 +66,28 @@ public static ISeekableStreamFactory getInstance(){ * Does this path point to a regular file on disk and not something like a URL? * @param path the path to test * @return true if the path is to a file on disk + * @deprecated this method is simplistic and no longer particularly useful since IOPath allows similar access to + * various non-file data sources, internal use has been replaced with {@link #isSpecialCase(String)} */ + @Deprecated public static boolean isFilePath(final String path) { - return ! ( path.startsWith("http:") || path.startsWith("https:") || path.startsWith("ftp:") ); + return !isSpecialCaseUrlType(path); + } + + /** + * is this path being handled by one of the legacy SeekableStream types (http(s) / ftp) + * + * @param path a path to check + * @return if the path is not being handled by a FileSystemProvider and it can be read by legacy streams + */ + public static boolean isSpecialCase(final String path){ + return !new HtsPath(path).isPath() //if we have a provider for it that's what we'll use + && isSpecialCaseUrlType(path); // otherwise we fall back to the special handlers + } + + //is this onee of the url types that has legacy htsjdk support built in? + private static boolean isSpecialCaseUrlType(final String path) { + return path.startsWith("http:") || path.startsWith("https:") || path.startsWith("ftp:"); } private static class DefaultSeekableStreamFactory implements ISeekableStreamFactory { @@ -79,7 +103,7 @@ public SeekableStream getStreamFor(final String path) throws IOException { } /** - * The wrapper will only be applied to the stream if the stream is treated as a {@link java.nio.file.Path} + * The wrapper will only be applied to the stream if the stream is treated as a {@link Path} * * This currently means any uri with a scheme that is not http, https, ftp, or file will have the wrapper applied to it * @@ -89,26 +113,30 @@ public SeekableStream getStreamFor(final String path) throws IOException { @Override public SeekableStream getStreamFor(final String path, Function wrapper) throws IOException { - // todo -- add support for SeekableBlockInputStream - - if (path.startsWith("http:") || path.startsWith("https:")) { - final URL url = new URL(path); - return new SeekableHTTPStream(url); - } else if (path.startsWith("ftp:")) { - return new SeekableFTPStream(new URL(path)); - } else if (path.startsWith("file:")) { - try { - // convert to URI in order to obtain a decoded version of the path string suitable - // for use with the File constructor - final String decodedPath = new URI(path).getPath(); - return new SeekableFileStream(new File(decodedPath)); - } catch (java.net.URISyntaxException e) { - throw new IllegalArgumentException(String.format("The input string %s contains a URI scheme but is not a valid URI", path), e); - } - } else if (IOUtil.hasScheme(path)) { - return new SeekablePathStream(IOUtil.getPath(path), wrapper); + return getStreamFor(new HtsPath(path), wrapper); + } + + + /** + * The wrapper will only be applied to the stream if the stream is treated as a non file:// {@link Path} + * + * This has a fall back to htsjdk's built in http and ftp providers if no FileSystemProvder is available for them + * + * @param path an IOPath to be opened + * @param wrapper a wrapper to apply to the stream allowing direct transformations on the byte stream to be applied + * @throws IOException + */ + public static SeekableStream getStreamFor(final IOPath path, Function wrapper) throws IOException { + if(path.isPath()) { + return path.getScheme().equals("file") + ? new SeekablePathStream(path.toPath()) //don't apply the wrapper to local files + : new SeekablePathStream(path.toPath(), wrapper); } else { - return new SeekableFileStream(new File(path)); + return switch(path.getScheme()){ + case "http", "https" -> new SeekableHTTPStream(new URL(path.getURIString())); + case "ftp" -> new SeekableFTPStream((new URL(path.getURIString()))); + default -> throw new TribbleException("Unknown path type. No FileSystemProvider available for " + path.getRawInputString()); + }; } } diff --git a/src/main/java/htsjdk/tribble/FeatureCodec.java b/src/main/java/htsjdk/tribble/FeatureCodec.java index fc3e8f6181..68199b282f 100644 --- a/src/main/java/htsjdk/tribble/FeatureCodec.java +++ b/src/main/java/htsjdk/tribble/FeatureCodec.java @@ -18,6 +18,7 @@ package htsjdk.tribble; +import htsjdk.io.IOPath; import htsjdk.samtools.util.LocationAware; import htsjdk.tribble.index.tabix.TabixFormat; diff --git a/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java b/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java index 768c797ac0..fac9c0f109 100644 --- a/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java +++ b/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java @@ -23,6 +23,7 @@ */ package htsjdk.tribble; +import htsjdk.io.HtsPath; import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.seekablestream.SeekableStreamFactory; import htsjdk.samtools.util.IOUtil; @@ -40,6 +41,7 @@ import java.net.URI; import java.net.URLEncoder; import java.nio.channels.SeekableByteChannel; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -60,9 +62,9 @@ public class TribbleIndexedFeatureReader extends Abst private Index index; /** - * is the path pointing to our source data a regular file? + * is the path backed by old style built in http(s) / ftp support instead of a FileSystemProvider */ - private final boolean pathIsRegularFile; + private final boolean pathIsOldStyleHttpOrFtp; /** * a potentially reusable seekable stream for queries over regular files @@ -97,8 +99,7 @@ public TribbleIndexedFeatureReader(final String featurePath, final FeatureCodec< } } - // does path point to a regular file? - this.pathIsRegularFile = SeekableStreamFactory.isFilePath(path); + this.pathIsOldStyleHttpOrFtp = SeekableStreamFactory.isSpecialCase(path); readHeader(); } @@ -203,7 +204,7 @@ private SeekableStream getSeekableStream() throws IOException { * @return true if */ private boolean reuseStreamInQuery() { - return pathIsRegularFile; + return !pathIsOldStyleHttpOrFtp; } @Override @@ -252,7 +253,7 @@ private void readHeader() throws IOException { PositionalBufferedStream pbs = null; try { is = ParsingUtils.openInputStream(path, wrapper); - if (IOUtil.hasBlockCompressedExtension(new URI(URLEncoder.encode(path, "UTF-8")))) { + if (IOUtil.hasBlockCompressedExtension(new URI(URLEncoder.encode(path, StandardCharsets.UTF_8)))) { // TODO: TEST/FIX THIS! https://github.com/samtools/htsjdk/issues/944 // TODO -- warning I don't think this can work, the buffered input stream screws up position is = new GZIPInputStream(new BufferedInputStream(is)); diff --git a/src/test/java/htsjdk/samtools/seekablestream/SeekableStreamFactoryTest.java b/src/test/java/htsjdk/samtools/seekablestream/SeekableStreamFactoryTest.java index c5e6126ee9..6ba47b024d 100644 --- a/src/test/java/htsjdk/samtools/seekablestream/SeekableStreamFactoryTest.java +++ b/src/test/java/htsjdk/samtools/seekablestream/SeekableStreamFactoryTest.java @@ -17,13 +17,28 @@ public class SeekableStreamFactoryTest extends HtsjdkTest { private static final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools"); - @Test - public void testIsFilePath() { - Assert.assertEquals(SeekableStreamFactory.isFilePath("x"), true); - Assert.assertEquals(SeekableStreamFactory.isFilePath(""), true); - Assert.assertEquals(SeekableStreamFactory.isFilePath("http://broadinstitute.org"), false); - Assert.assertEquals(SeekableStreamFactory.isFilePath("https://broadinstitute.org"), false); - Assert.assertEquals(SeekableStreamFactory.isFilePath("ftp://broadinstitute.org"), false); + @DataProvider + public Object[][] getSpecialCasePaths(){ + return new Object[][]{ + {"x", true}, + {"", true}, + {"http://broadinstitute.org", false}, + {"https://broadinstitute.org", false}, + {"ftp://broadinstitute.org", false} + }; + } + + @Test(dataProvider = "getSpecialCasePaths") + public void testIsFilePath(String path, boolean expected) { + Assert.assertEquals(SeekableStreamFactory.isFilePath(path), expected); + } + + + // this test isn't particularly useful since we're not testing the meaninful case of having the http-nio provider + // installed + @Test(dataProvider = "getSpecialCasePaths") + public void testIsSpecialCase(String path, boolean expected) { + Assert.assertEquals(SeekableStreamFactory.isSpecialCase(path), ! expected); } @DataProvider(name="getStreamForData")