Skip to content

Commit

Permalink
Re-Ordering priority in SeekableStreamFactory
Browse files Browse the repository at this point in the history
* Now when constructing streams it will prefer NIO plugins.
* If no http / ftp plugin exists it will fall back to the htsjdk bulit in
* No longer special casing local files, these will now be handled through NIO as well.
* Deprecating SeekableStreamFactory.isFilePath() since it is no longer used and interacts poorly
  with nio filesystem providers
  • Loading branch information
lbergelson committed Dec 11, 2023
1 parent 3964abe commit 6cdcbd4
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,17 @@
*/
package htsjdk.samtools.seekablestream;

import htsjdk.io.HtsPath;
import htsjdk.io.IOPath;
import htsjdk.samtools.util.IOUtil;
import htsjdk.tribble.TribbleException;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URL;
import java.nio.channels.SeekableByteChannel;
import java.nio.file.Path;
import java.util.function.Function;

/**
Expand Down Expand Up @@ -61,9 +66,28 @@ public static ISeekableStreamFactory getInstance(){
* Does this path point to a regular file on disk and not something like a URL?
* @param path the path to test
* @return true if the path is to a file on disk
* @deprecated this method is simplistic and no longer particularly useful since IOPath allows similar access to
* various non-file data sources, internal use has been replaced with {@link #isSpecialCase(String)}
*/
@Deprecated
public static boolean isFilePath(final String path) {
return ! ( path.startsWith("http:") || path.startsWith("https:") || path.startsWith("ftp:") );
return !isSpecialCaseUrlType(path);
}

/**
* is this path being handled by one of the legacy SeekableStream types (http(s) / ftp)
*
* @param path a path to check
* @return if the path is not being handled by a FileSystemProvider and it can be read by legacy streams
*/
public static boolean isSpecialCase(final String path){
return !new HtsPath(path).isPath() //if we have a provider for it that's what we'll use
&& isSpecialCaseUrlType(path); // otherwise we fall back to the special handlers
}

//is this onee of the url types that has legacy htsjdk support built in?
private static boolean isSpecialCaseUrlType(final String path) {
return path.startsWith("http:") || path.startsWith("https:") || path.startsWith("ftp:");
}

private static class DefaultSeekableStreamFactory implements ISeekableStreamFactory {
Expand All @@ -79,7 +103,7 @@ public SeekableStream getStreamFor(final String path) throws IOException {
}

/**
* The wrapper will only be applied to the stream if the stream is treated as a {@link java.nio.file.Path}
* The wrapper will only be applied to the stream if the stream is treated as a {@link Path}
*
* This currently means any uri with a scheme that is not http, https, ftp, or file will have the wrapper applied to it
*
Expand All @@ -89,26 +113,30 @@ public SeekableStream getStreamFor(final String path) throws IOException {
@Override
public SeekableStream getStreamFor(final String path,
Function<SeekableByteChannel, SeekableByteChannel> wrapper) throws IOException {
// todo -- add support for SeekableBlockInputStream

if (path.startsWith("http:") || path.startsWith("https:")) {
final URL url = new URL(path);
return new SeekableHTTPStream(url);
} else if (path.startsWith("ftp:")) {
return new SeekableFTPStream(new URL(path));
} else if (path.startsWith("file:")) {
try {
// convert to URI in order to obtain a decoded version of the path string suitable
// for use with the File constructor
final String decodedPath = new URI(path).getPath();
return new SeekableFileStream(new File(decodedPath));
} catch (java.net.URISyntaxException e) {
throw new IllegalArgumentException(String.format("The input string %s contains a URI scheme but is not a valid URI", path), e);
}
} else if (IOUtil.hasScheme(path)) {
return new SeekablePathStream(IOUtil.getPath(path), wrapper);
return getStreamFor(new HtsPath(path), wrapper);
}


/**
* The wrapper will only be applied to the stream if the stream is treated as a non file:// {@link Path}
*
* This has a fall back to htsjdk's built in http and ftp providers if no FileSystemProvder is available for them
*
* @param path an IOPath to be opened
* @param wrapper a wrapper to apply to the stream allowing direct transformations on the byte stream to be applied
* @throws IOException
*/
public static SeekableStream getStreamFor(final IOPath path, Function<SeekableByteChannel, SeekableByteChannel> wrapper) throws IOException {
if(path.isPath()) {
return path.getScheme().equals("file")
? new SeekablePathStream(path.toPath()) //don't apply the wrapper to local files
: new SeekablePathStream(path.toPath(), wrapper);
} else {
return new SeekableFileStream(new File(path));
return switch(path.getScheme()){
case "http", "https" -> new SeekableHTTPStream(new URL(path.getURIString()));
case "ftp" -> new SeekableFTPStream((new URL(path.getURIString())));
default -> throw new TribbleException("Unknown path type. No FileSystemProvider available for " + path.getRawInputString());
};
}
}

Expand Down
1 change: 1 addition & 0 deletions src/main/java/htsjdk/tribble/FeatureCodec.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

package htsjdk.tribble;

import htsjdk.io.IOPath;
import htsjdk.samtools.util.LocationAware;
import htsjdk.tribble.index.tabix.TabixFormat;

Expand Down
13 changes: 7 additions & 6 deletions src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
*/
package htsjdk.tribble;

import htsjdk.io.HtsPath;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.seekablestream.SeekableStreamFactory;
import htsjdk.samtools.util.IOUtil;
Expand All @@ -40,6 +41,7 @@
import java.net.URI;
import java.net.URLEncoder;
import java.nio.channels.SeekableByteChannel;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
Expand All @@ -60,9 +62,9 @@ public class TribbleIndexedFeatureReader<T extends Feature, SOURCE> extends Abst
private Index index;

/**
* is the path pointing to our source data a regular file?
* is the path backed by old style built in http(s) / ftp support instead of a FileSystemProvider
*/
private final boolean pathIsRegularFile;
private final boolean pathIsOldStyleHttpOrFtp;

/**
* a potentially reusable seekable stream for queries over regular files
Expand Down Expand Up @@ -97,8 +99,7 @@ public TribbleIndexedFeatureReader(final String featurePath, final FeatureCodec<
}
}

// does path point to a regular file?
this.pathIsRegularFile = SeekableStreamFactory.isFilePath(path);
this.pathIsOldStyleHttpOrFtp = SeekableStreamFactory.isSpecialCase(path);

readHeader();
}
Expand Down Expand Up @@ -203,7 +204,7 @@ private SeekableStream getSeekableStream() throws IOException {
* @return true if
*/
private boolean reuseStreamInQuery() {
return pathIsRegularFile;
return !pathIsOldStyleHttpOrFtp;
}

@Override
Expand Down Expand Up @@ -252,7 +253,7 @@ private void readHeader() throws IOException {
PositionalBufferedStream pbs = null;
try {
is = ParsingUtils.openInputStream(path, wrapper);
if (IOUtil.hasBlockCompressedExtension(new URI(URLEncoder.encode(path, "UTF-8")))) {
if (IOUtil.hasBlockCompressedExtension(new URI(URLEncoder.encode(path, StandardCharsets.UTF_8)))) {
// TODO: TEST/FIX THIS! https://github.com/samtools/htsjdk/issues/944
// TODO -- warning I don't think this can work, the buffered input stream screws up position
is = new GZIPInputStream(new BufferedInputStream(is));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,28 @@
public class SeekableStreamFactoryTest extends HtsjdkTest {
private static final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools");

@Test
public void testIsFilePath() {
Assert.assertEquals(SeekableStreamFactory.isFilePath("x"), true);
Assert.assertEquals(SeekableStreamFactory.isFilePath(""), true);
Assert.assertEquals(SeekableStreamFactory.isFilePath("http://broadinstitute.org"), false);
Assert.assertEquals(SeekableStreamFactory.isFilePath("https://broadinstitute.org"), false);
Assert.assertEquals(SeekableStreamFactory.isFilePath("ftp://broadinstitute.org"), false);
@DataProvider
public Object[][] getSpecialCasePaths(){
return new Object[][]{
{"x", true},
{"", true},
{"http://broadinstitute.org", false},
{"https://broadinstitute.org", false},
{"ftp://broadinstitute.org", false}
};
}

@Test(dataProvider = "getSpecialCasePaths")
public void testIsFilePath(String path, boolean expected) {
Assert.assertEquals(SeekableStreamFactory.isFilePath(path), expected);
}


// this test isn't particularly useful since we're not testing the meaninful case of having the http-nio provider
// installed
@Test(dataProvider = "getSpecialCasePaths")
public void testIsSpecialCase(String path, boolean expected) {
Assert.assertEquals(SeekableStreamFactory.isSpecialCase(path), ! expected);
}

@DataProvider(name="getStreamForData")
Expand Down

0 comments on commit 6cdcbd4

Please sign in to comment.