Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Re-Ordering priority in SeekableStreamFactory #1693

Merged
merged 5 commits into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,17 @@
*/
package htsjdk.samtools.seekablestream;

import htsjdk.io.HtsPath;
import htsjdk.io.IOPath;
import htsjdk.samtools.util.IOUtil;
import htsjdk.tribble.TribbleException;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URL;
import java.nio.channels.SeekableByteChannel;
import java.nio.file.Path;
import java.util.function.Function;

/**
Expand Down Expand Up @@ -61,9 +66,28 @@ public static ISeekableStreamFactory getInstance(){
* Does this path point to a regular file on disk and not something like a URL?
* @param path the path to test
* @return true if the path is to a file on disk
* @deprecated this method is simplistic and no longer particularly useful since IOPath allows similar access to
* various non-file data sources, internal use has been replaced with {@link #isSpecialCase(String)}
*/
@Deprecated
public static boolean isFilePath(final String path) {
return ! ( path.startsWith("http:") || path.startsWith("https:") || path.startsWith("ftp:") );
return !isSpecialCaseUrlType(path);
}

/**
* is this path being handled by one of the legacy SeekableStream types (http(s) / ftp)
*
* @param path a path to check
* @return if the path is not being handled by a FileSystemProvider and it can be read by legacy streams
*/
public static boolean isSpecialCase(final String path){
return !new HtsPath(path).isPath() //if we have a provider for it that's what we'll use
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe break out out a hasNIOPlugin() (or similar) method for the new HtsPath(path).isPath() everywhere, for clarity?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or should you be using hasFileSystemProvider() instead?

&& isSpecialCaseUrlType(path); // otherwise we fall back to the special handlers
}

//is this onee of the url types that has legacy htsjdk support built in?
private static boolean isSpecialCaseUrlType(final String path) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isSupportedByLegacyCloudReaders()?

return path.startsWith("http:") || path.startsWith("https:") || path.startsWith("ftp:");
}

private static class DefaultSeekableStreamFactory implements ISeekableStreamFactory {
Expand All @@ -79,7 +103,7 @@ public SeekableStream getStreamFor(final String path) throws IOException {
}

/**
* The wrapper will only be applied to the stream if the stream is treated as a {@link java.nio.file.Path}
* The wrapper will only be applied to the stream if the stream is treated as a {@link Path}
*
* This currently means any uri with a scheme that is not http, https, ftp, or file will have the wrapper applied to it
*
Expand All @@ -89,26 +113,30 @@ public SeekableStream getStreamFor(final String path) throws IOException {
@Override
public SeekableStream getStreamFor(final String path,
Function<SeekableByteChannel, SeekableByteChannel> wrapper) throws IOException {
// todo -- add support for SeekableBlockInputStream

if (path.startsWith("http:") || path.startsWith("https:")) {
final URL url = new URL(path);
return new SeekableHTTPStream(url);
} else if (path.startsWith("ftp:")) {
return new SeekableFTPStream(new URL(path));
} else if (path.startsWith("file:")) {
try {
// convert to URI in order to obtain a decoded version of the path string suitable
// for use with the File constructor
final String decodedPath = new URI(path).getPath();
return new SeekableFileStream(new File(decodedPath));
} catch (java.net.URISyntaxException e) {
throw new IllegalArgumentException(String.format("The input string %s contains a URI scheme but is not a valid URI", path), e);
}
} else if (IOUtil.hasScheme(path)) {
return new SeekablePathStream(IOUtil.getPath(path), wrapper);
return getStreamFor(new HtsPath(path), wrapper);
}


/**
* The wrapper will only be applied to the stream if the stream is treated as a non file:// {@link Path}
*
* This has a fall back to htsjdk's built in http and ftp providers if no FileSystemProvder is available for them
*
* @param path an IOPath to be opened
* @param wrapper a wrapper to apply to the stream allowing direct transformations on the byte stream to be applied
* @throws IOException
*/
public static SeekableStream getStreamFor(final IOPath path, Function<SeekableByteChannel, SeekableByteChannel> wrapper) throws IOException {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there aren't already tests covering file:// vs. local file paths (no protocol), we might want one

if(path.isPath()) {
return path.getScheme().equals("file")
? new SeekablePathStream(path.toPath()) //don't apply the wrapper to local files
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Check usages of SeekablePathStream.getSource() to make sure the switch from File -> Path for local files is safe (concerned about file:// appearing unexpectedly downstream). Or to be super-safe, could stick with SeekableFileStream for now.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I switched back to SeekableFileStream although it makes me sad.

: new SeekablePathStream(path.toPath(), wrapper);
} else {
return new SeekableFileStream(new File(path));
return switch(path.getScheme()){
case "http", "https" -> new SeekableHTTPStream(new URL(path.getURIString()));
case "ftp" -> new SeekableFTPStream((new URL(path.getURIString())));
default -> throw new TribbleException("Unknown path type. No FileSystemProvider available for " + path.getRawInputString());
};
}
}

Expand Down
1 change: 1 addition & 0 deletions src/main/java/htsjdk/tribble/FeatureCodec.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

package htsjdk.tribble;

import htsjdk.io.IOPath;
import htsjdk.samtools.util.LocationAware;
import htsjdk.tribble.index.tabix.TabixFormat;

Expand Down
13 changes: 7 additions & 6 deletions src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
*/
package htsjdk.tribble;

import htsjdk.io.HtsPath;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.seekablestream.SeekableStreamFactory;
import htsjdk.samtools.util.IOUtil;
Expand All @@ -40,6 +41,7 @@
import java.net.URI;
import java.net.URLEncoder;
import java.nio.channels.SeekableByteChannel;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
Expand All @@ -60,9 +62,9 @@ public class TribbleIndexedFeatureReader<T extends Feature, SOURCE> extends Abst
private Index index;

/**
* is the path pointing to our source data a regular file?
* is the path backed by old style built in http(s) / ftp support instead of a FileSystemProvider
*/
private final boolean pathIsRegularFile;
private final boolean pathIsOldStyleHttpOrFtp;

/**
* a potentially reusable seekable stream for queries over regular files
Expand Down Expand Up @@ -97,8 +99,7 @@ public TribbleIndexedFeatureReader(final String featurePath, final FeatureCodec<
}
}

// does path point to a regular file?
this.pathIsRegularFile = SeekableStreamFactory.isFilePath(path);
this.pathIsOldStyleHttpOrFtp = SeekableStreamFactory.isSpecialCase(path);

readHeader();
}
Expand Down Expand Up @@ -203,7 +204,7 @@ private SeekableStream getSeekableStream() throws IOException {
* @return true if
*/
private boolean reuseStreamInQuery() {
return pathIsRegularFile;
return !pathIsOldStyleHttpOrFtp;
}

@Override
Expand Down Expand Up @@ -252,7 +253,7 @@ private void readHeader() throws IOException {
PositionalBufferedStream pbs = null;
try {
is = ParsingUtils.openInputStream(path, wrapper);
if (IOUtil.hasBlockCompressedExtension(new URI(URLEncoder.encode(path, "UTF-8")))) {
if (IOUtil.hasBlockCompressedExtension(new URI(URLEncoder.encode(path, StandardCharsets.UTF_8)))) {
// TODO: TEST/FIX THIS! https://github.com/samtools/htsjdk/issues/944
// TODO -- warning I don't think this can work, the buffered input stream screws up position
is = new GZIPInputStream(new BufferedInputStream(is));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,28 @@
public class SeekableStreamFactoryTest extends HtsjdkTest {
private static final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools");

@Test
public void testIsFilePath() {
Assert.assertEquals(SeekableStreamFactory.isFilePath("x"), true);
Assert.assertEquals(SeekableStreamFactory.isFilePath(""), true);
Assert.assertEquals(SeekableStreamFactory.isFilePath("http://broadinstitute.org"), false);
Assert.assertEquals(SeekableStreamFactory.isFilePath("https://broadinstitute.org"), false);
Assert.assertEquals(SeekableStreamFactory.isFilePath("ftp://broadinstitute.org"), false);
@DataProvider
public Object[][] getSpecialCasePaths(){
return new Object[][]{
{"x", true},
{"", true},
{"http://broadinstitute.org", false},
{"https://broadinstitute.org", false},
{"ftp://broadinstitute.org", false}
};
}

@Test(dataProvider = "getSpecialCasePaths")
public void testIsFilePath(String path, boolean expected) {
Assert.assertEquals(SeekableStreamFactory.isFilePath(path), expected);
}


// this test isn't particularly useful since we're not testing the meaninful case of having the http-nio provider
// installed
@Test(dataProvider = "getSpecialCasePaths")
public void testIsSpecialCase(String path, boolean expected) {
Assert.assertEquals(SeekableStreamFactory.isSpecialCase(path), ! expected);
}

@DataProvider(name="getStreamForData")
Expand Down