Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Re-Ordering priority in SeekableStreamFactory #1693

Merged
merged 5 commits into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions src/main/java/htsjdk/io/HtsPath.java
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,10 @@ private URI getURIForString(final String pathString) {
tempURI = getCachedPath().toUri();
}
} catch (URISyntaxException uriException) {
//check that the uri wasn't a badly encoded absolute uri of some sort
//if you don't do this it will be treated as a badly formed file:/ url
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

file://

assertNoNonFileScheme(pathString, uriException);

// the input string isn't a valid URI; assume its a local (non-URI) file reference, and
// use the URI resulting from the corresponding Path
try {
Expand All @@ -276,5 +280,25 @@ private URI getURIForString(final String pathString) {

return tempURI;
}

/**
* check that there isn't a non file scheme at the start of the path
* @param pathString
* @param cause
*/
private static void assertNoNonFileScheme(String pathString, URISyntaxException cause){
final String[] split = pathString.split(":");
if(split.length > 1){
if(split[0] == null || split[0].isEmpty()){
throw new IllegalArgumentException("Malformed url " + pathString + " includes an empty scheme." +
"\nCheck that it is fully encoded.", cause);
}
if(!split[0].equals("file")){
throw new IllegalArgumentException("Malformed url " + pathString + " includes a scheme: " + split[0] + ":/ but was an invalid URI." +
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

:/ -> ://

"\nCheck that it is fully encoded.", cause);
}
}

}

}
1 change: 1 addition & 0 deletions src/main/java/htsjdk/samtools/SAMRecordSetBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ public class SAMRecordSetBuilder implements Iterable<SAMRecord> {
"chr21", "chr22", "chrX", "chrY", "chrM"
};


private static final String READ_GROUP_ID = "1";
private static final String SAMPLE = "FREE_SAMPLE";
private final Random random = new Random(TestUtil.RANDOM_SEED);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,15 @@
*/
package htsjdk.samtools.seekablestream;

import htsjdk.samtools.util.IOUtil;
import java.io.File;
import htsjdk.io.HtsPath;
import htsjdk.io.IOPath;
import htsjdk.tribble.TribbleException;

import java.io.IOException;
import java.net.URI;
import java.net.URL;
import java.nio.channels.SeekableByteChannel;
import java.nio.file.Path;
import java.util.Set;
import java.util.function.Function;

/**
Expand All @@ -40,6 +43,14 @@
public class SeekableStreamFactory{

private static final ISeekableStreamFactory DEFAULT_FACTORY;
private static final String HTTP = "http";
private static final String HTTPS = "https";
private static final String FTP = "ftp";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See below -- recommend including "://" in these constants to guard against files named after a protocol

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead of adding them to the constants which are also used directly compared against getScheme the isHandledBy method now appends '://' to them.

/**
* the set of url schemes that have special support in htsjdk that isn't through a FileSystemProvider
*/
private static final Set<String> URL_SCHEMES_WITH_LEGACY_SUPPORT = Set.of(HTTP, FTP, HTTPS);
public static final String FILE_SCHEME = "file";
private static ISeekableStreamFactory currentFactory;

static{
Expand All @@ -61,9 +72,28 @@ public static ISeekableStreamFactory getInstance(){
* Does this path point to a regular file on disk and not something like a URL?
* @param path the path to test
* @return true if the path is to a file on disk
* @deprecated this method is simplistic and no longer particularly useful since IOPath allows similar access to
* various non-file data sources, internal use has been replaced with {@link #isBeingHandledByLegacyUrlSupport(String)}
*/
@Deprecated
public static boolean isFilePath(final String path) {
return ! ( path.startsWith("http:") || path.startsWith("https:") || path.startsWith("ftp:") );
return !canBeHandledByLegacyUrlSupport(path);
}

/**
* is this path being handled by one of the legacy SeekableStream types (http(s) / ftp)
*
* @param path a path to check
* @return if the path is not being handled by a FileSystemProvider and it can be read by legacy streams
*/
public static boolean isBeingHandledByLegacyUrlSupport(final String path){
return !new HtsPath(path).hasFileSystemProvider() //if we have a provider for it that's what we'll use
&& canBeHandledByLegacyUrlSupport(path); // otherwise we fall back to the special handlers
}

//is this onee of the url types that has legacy htsjdk support built in?
public static boolean canBeHandledByLegacyUrlSupport(final String path) {
return URL_SCHEMES_WITH_LEGACY_SUPPORT.stream().anyMatch(path::startsWith);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't you be checking that the path starts with "http://" (for example) instead of "http"? What if there's a file named "http"?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

}

private static class DefaultSeekableStreamFactory implements ISeekableStreamFactory {
Expand All @@ -79,7 +109,7 @@ public SeekableStream getStreamFor(final String path) throws IOException {
}

/**
* The wrapper will only be applied to the stream if the stream is treated as a {@link java.nio.file.Path}
* The wrapper will only be applied to the stream if the stream is treated as a {@link Path}
*
* This currently means any uri with a scheme that is not http, https, ftp, or file will have the wrapper applied to it
*
Expand All @@ -89,26 +119,30 @@ public SeekableStream getStreamFor(final String path) throws IOException {
@Override
public SeekableStream getStreamFor(final String path,
Function<SeekableByteChannel, SeekableByteChannel> wrapper) throws IOException {
// todo -- add support for SeekableBlockInputStream

if (path.startsWith("http:") || path.startsWith("https:")) {
final URL url = new URL(path);
return new SeekableHTTPStream(url);
} else if (path.startsWith("ftp:")) {
return new SeekableFTPStream(new URL(path));
} else if (path.startsWith("file:")) {
try {
// convert to URI in order to obtain a decoded version of the path string suitable
// for use with the File constructor
final String decodedPath = new URI(path).getPath();
return new SeekableFileStream(new File(decodedPath));
} catch (java.net.URISyntaxException e) {
throw new IllegalArgumentException(String.format("The input string %s contains a URI scheme but is not a valid URI", path), e);
}
} else if (IOUtil.hasScheme(path)) {
return new SeekablePathStream(IOUtil.getPath(path), wrapper);
return getStreamFor(new HtsPath(path), wrapper);
}


/**
* The wrapper will only be applied to the stream if the stream is treated as a non file:// {@link Path}
*
* This has a fall back to htsjdk's built in http and ftp providers if no FileSystemProvder is available for them
*
* @param path an IOPath to be opened
* @param wrapper a wrapper to apply to the stream allowing direct transformations on the byte stream to be applied
* @throws IOException
*/
public static SeekableStream getStreamFor(final IOPath path, Function<SeekableByteChannel, SeekableByteChannel> wrapper) throws IOException {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there aren't already tests covering file:// vs. local file paths (no protocol), we might want one

if(path.hasFileSystemProvider()) {
return path.getScheme().equals(FILE_SCHEME)
? new SeekableFileStream(path.toPath().toFile()) //don't apply the wrapper to local files
: new SeekablePathStream(path.toPath(), wrapper);
} else {
return new SeekableFileStream(new File(path));
return switch(path.getScheme()){
case HTTP, HTTPS -> new SeekableHTTPStream(new URL(path.getRawInputString()));
case FTP -> new SeekableFTPStream((new URL(path.getRawInputString())));
default -> throw new TribbleException("Unknown path type. No FileSystemProvider available for " + path.getRawInputString());
};
}
}

Expand Down
1 change: 1 addition & 0 deletions src/main/java/htsjdk/tribble/FeatureCodec.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

package htsjdk.tribble;

import htsjdk.io.IOPath;
import htsjdk.samtools.util.LocationAware;
import htsjdk.tribble.index.tabix.TabixFormat;

Expand Down
13 changes: 7 additions & 6 deletions src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
*/
package htsjdk.tribble;

import htsjdk.io.HtsPath;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.seekablestream.SeekableStreamFactory;
import htsjdk.samtools.util.IOUtil;
Expand All @@ -40,6 +41,7 @@
import java.net.URI;
import java.net.URLEncoder;
import java.nio.channels.SeekableByteChannel;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
Expand All @@ -60,9 +62,9 @@ public class TribbleIndexedFeatureReader<T extends Feature, SOURCE> extends Abst
private Index index;

/**
* is the path pointing to our source data a regular file?
* is the path backed by old style built in http(s) / ftp support instead of a FileSystemProvider
*/
private final boolean pathIsRegularFile;
private final boolean pathIsOldStyleHttpOrFtp;

/**
* a potentially reusable seekable stream for queries over regular files
Expand Down Expand Up @@ -97,8 +99,7 @@ public TribbleIndexedFeatureReader(final String featurePath, final FeatureCodec<
}
}

// does path point to a regular file?
this.pathIsRegularFile = SeekableStreamFactory.isFilePath(path);
this.pathIsOldStyleHttpOrFtp = SeekableStreamFactory.isBeingHandledByLegacyUrlSupport(path);

readHeader();
}
Expand Down Expand Up @@ -203,7 +204,7 @@ private SeekableStream getSeekableStream() throws IOException {
* @return true if
*/
private boolean reuseStreamInQuery() {
return pathIsRegularFile;
return !pathIsOldStyleHttpOrFtp;
}

@Override
Expand Down Expand Up @@ -252,7 +253,7 @@ private void readHeader() throws IOException {
PositionalBufferedStream pbs = null;
try {
is = ParsingUtils.openInputStream(path, wrapper);
if (IOUtil.hasBlockCompressedExtension(new URI(URLEncoder.encode(path, "UTF-8")))) {
if (IOUtil.hasBlockCompressedExtension(new HtsPath(path).getURI())) {
// TODO: TEST/FIX THIS! https://github.com/samtools/htsjdk/issues/944
// TODO -- warning I don't think this can work, the buffered input stream screws up position
is = new GZIPInputStream(new BufferedInputStream(is));
Expand Down
8 changes: 7 additions & 1 deletion src/main/java/htsjdk/tribble/util/FTPHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import java.io.IOException;
import java.io.InputStream;
import java.net.URISyntaxException;
import java.net.URL;

/**
Expand Down Expand Up @@ -35,7 +36,12 @@ public long getContentLength() throws IOException {

@Override
public InputStream openInputStream() throws IOException {
String file = url.getPath();
String file = null;
try {
file = url.toURI().getPath();
} catch (URISyntaxException e) {
throw new IOException(e);
}
FTPClient ftp = FTPUtils.connect(url.getHost(), url.getUserInfo(), null);
ftp.pasv();
ftp.retr(file);
Expand Down
45 changes: 25 additions & 20 deletions src/main/java/htsjdk/tribble/util/ParsingUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,20 @@
*/
package htsjdk.tribble.util;

import htsjdk.io.HtsPath;
import htsjdk.io.IOPath;
import htsjdk.samtools.seekablestream.SeekablePathStream;
import htsjdk.samtools.seekablestream.SeekableStreamFactory;
import htsjdk.samtools.util.IOUtil;

import java.awt.Color;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.Constructor;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.channels.SeekableByteChannel;
import java.nio.file.Files;
import java.util.*;
Expand All @@ -49,7 +52,7 @@ public class ParsingUtils {
private static URLHelperFactory urlHelperFactory = RemoteURLHelper::new;

// HTML 4.1 color table, + orange and magenta
private static Map<String, String> colorSymbols = new HashMap();
private static final Map<String, String> colorSymbols = new HashMap<>();

static {
colorSymbols.put("white", "FFFFFF");
Expand Down Expand Up @@ -81,32 +84,35 @@ public static InputStream openInputStream(String path)
return openInputStream(path, null);
}

static private final Set<String> URL_SCHEMES = new HashSet<>(Arrays.asList("http", "ftp", "https"));

/**
* open an input stream from the given path and wrap the raw byte stream with a wrapper if given
*
* the wrapper will only be applied to paths that are not http, https, ftp, or file, i.e. any {@link java.nio.file.Path}
* using a custom filesystem plugin
* the wrapper will only be applied to paths that are
* 1. not local files
* 2. not being handled by the legacy http(s)/ftp providers
* i.e. any {@link java.nio.file.Path} using a custom FileSystem plugin
* @param uri a uri like string
* @param wrapper to wrap the input stream in, may be used to implement caching or prefetching, etc
* @return An inputStream appropriately created from uri and conditionally wrapped with wrapper (only in certain cases)
* @throws IOException when stream cannot be opened against uri
*/
public static InputStream openInputStream(final String uri, final Function<SeekableByteChannel, SeekableByteChannel> wrapper)
throws IOException {

final InputStream inputStream;

if (URL_SCHEMES.stream().anyMatch(uri::startsWith)) {
inputStream = getURLHelper(new URL(uri)).openInputStream();
} else if (!IOUtil.hasScheme(uri)) {
File file = new File(uri);
inputStream = Files.newInputStream(file.toPath());
final IOPath path = new HtsPath(uri);
if(path.hasFileSystemProvider()){
if(path.isPath()) {
return path.getScheme().equals("file")
? Files.newInputStream(path.toPath())
: new SeekablePathStream(path.toPath(), wrapper);
} else {
throw new IOException("FileSystemProvider for path " + path.getRawInputString() + " exits but failed to " +
" create path. \n" + path.getToPathFailureReason());
}
} else if( SeekableStreamFactory.canBeHandledByLegacyUrlSupport(uri)){
return getURLHelper(new URL(uri)).openInputStream();
} else {
inputStream = new SeekablePathStream(IOUtil.getPath(uri), wrapper);
throw new IOException("No FileSystemProvider available to handle path: " + path.getRawInputString());
}
return inputStream;
}

public static <T> String join(String separator, Collection<T> objects) {
Expand Down Expand Up @@ -402,10 +408,9 @@ private static Color hexToColor(String string) {
}

public static boolean resourceExists(String resource) throws IOException{

boolean remoteFile = resource.startsWith("http://") || resource.startsWith("https://") || resource.startsWith("ftp://");
boolean remoteFile = SeekableStreamFactory.isBeingHandledByLegacyUrlSupport(resource);
if (remoteFile) {
URL url = null;
URL url;
try {
url = new URL(resource);
} catch (MalformedURLException e) {
Expand Down
3 changes: 3 additions & 0 deletions src/test/java/htsjdk/io/HtsPathUnitTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ public Object[][] validHtsPath() {
{"gcs://abucket/bucket", "gcs://abucket/bucket", false, false},
{"gendb://somegdb", "gendb://somegdb", false, false},
{"chr1:1-100", "chr1:1-100", false, false},
{"ftp://broad.org/file", "ftp://broad.org/file", false, false},
{"ftp://broad.org/with%20space", "ftp://broad.org/with%20space", false, false},

//**********************************************************************************************
// Valid URIs which ARE valid NIO URIs (there *IS* an installed file system provider), but are
Expand Down Expand Up @@ -167,6 +169,7 @@ public Object[][] invalidHtsPath() {
// the nul character is rejected on all of the supported platforms in both local
// filenames and URIs, so use it to test HtsPath constructor failure on all platforms
{"\0"},
{"ftp://broad.org/file with space"} // this has a non-file scheme but isn't encoded properly
};
}

Expand Down
Loading